diff --git a/crypto_sign/dilithium/dilithium2/META.yml b/crypto_sign/dilithium/dilithium2/META.yml new file mode 100644 index 00000000..0249aac7 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/META.yml @@ -0,0 +1,31 @@ +name: Dilithium2 +type: signature +claimed-nist-level: 2 +length-public-key: 1312 +length-secret-key: 2544 +length-signature: 2420 +nistkat-sha256: 9c636528bf81c03df6ad8f9471cb1b4d9097d66af825d4f60b7ff0d941ca4d37 +testvectors-sha256: 166fc2481358d5a1b7a528b30af36ad069b049b5755cf63b843ce0f25f35aeb6 +principal-submitters: + - Vadim Lyubashevsky +auxiliary-submitters: + - Léo Ducas + - Eike Kiltz + - Tancrède Lepoint + - Peter Schwabe + - Gregor Seiler + - Damien Stehlé +implementations: + - name: clean + version: https://github.com/pq-crystals/dilithium/commit/1e63a1e880401166f105ab44ec67464c9714a315 via https://github.com/jschanck/package-pqclean/tree/b158a891/dilithium + - name: avx2 + version: https://github.com/pq-crystals/dilithium/commit/1e63a1e880401166f105ab44ec67464c9714a315 via https://github.com/jschanck/package-pqclean/tree/b158a891/dilithium + supported_platforms: + - architecture: x86_64 + operating_systems: + - Linux + - Darwin + required_flags: + - aes + - avx2 + - popcnt diff --git a/crypto_sign/dilithium/dilithium2/avx2/LICENSE b/crypto_sign/dilithium/dilithium2/avx2/LICENSE new file mode 100644 index 00000000..08473af7 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/LICENSE @@ -0,0 +1,5 @@ +Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/) + +For Keccak and AES we are using public-domain +code from sources and by authors listed in +comments on top of the respective files. diff --git a/crypto_sign/dilithium/dilithium2/avx2/align.h b/crypto_sign/dilithium/dilithium2/avx2/align.h new file mode 100644 index 00000000..1e74b915 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/align.h @@ -0,0 +1,19 @@ +#ifndef PQCLEAN_DILITHIUM2_AVX2_ALIGN_H +#define PQCLEAN_DILITHIUM2_AVX2_ALIGN_H + +#include +#include + +#define ALIGNED_UINT8(N) \ + union { \ + uint8_t coeffs[N]; \ + __m256i vec[((N)+31)/32]; \ + } + +#define ALIGNED_INT32(N) \ + union { \ + int32_t coeffs[N]; \ + __m256i vec[((N)+7)/8]; \ + } + +#endif diff --git a/crypto_sign/dilithium/dilithium2/avx2/api.h b/crypto_sign/dilithium/dilithium2/avx2/api.h new file mode 100644 index 00000000..7eaf76b3 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/api.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_DILITHIUM2_AVX2_API_H +#define PQCLEAN_DILITHIUM2_AVX2_API_H + +#include +#include + +#define PQCLEAN_DILITHIUM2_AVX2_CRYPTO_PUBLICKEYBYTES 1312 +#define PQCLEAN_DILITHIUM2_AVX2_CRYPTO_SECRETKEYBYTES 2544 +#define PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES 2420 +#define PQCLEAN_DILITHIUM2_AVX2_CRYPTO_ALGNAME "Dilithium2" + + +int PQCLEAN_DILITHIUM2_AVX2_crypto_sign_keypair(uint8_t *pk, uint8_t *sk); + +int PQCLEAN_DILITHIUM2_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +int PQCLEAN_DILITHIUM2_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +int PQCLEAN_DILITHIUM2_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +int PQCLEAN_DILITHIUM2_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/dilithium/dilithium2/avx2/cdecl.h b/crypto_sign/dilithium/dilithium2/avx2/cdecl.h new file mode 100644 index 00000000..d23e7646 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/cdecl.h @@ -0,0 +1,24 @@ +#ifndef PQCLEAN_DILITHIUM2_AVX2_CDECL_H +#define PQCLEAN_DILITHIUM2_AVX2_CDECL_H + + + +#define _8XQ 0 +#define _8XQINV 8 +#define _8XDIV_QINV 16 +#define _8XDIV 24 +#define _ZETAS_QINV 32 +#define _ZETAS 328 + +/* The C ABI on MacOS exports all symbols with a leading + * underscore. This means that any symbols we refer to from + * C files (functions) can't be found, and all symbols we + * refer to from ASM also can't be found (nttconsts.c). + * + * This define helps us get around this + */ + +#define _cdecl(s) _##s +#define cdecl(s) s + +#endif diff --git a/crypto_sign/dilithium/dilithium2/avx2/consts.c b/crypto_sign/dilithium/dilithium2/avx2/consts.c new file mode 100644 index 00000000..7190fa5c --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/consts.c @@ -0,0 +1,101 @@ +#include "consts.h" +#include "params.h" +#include + +#define QINV 58728449 // q^(-1) mod 2^32 +#define MONT (-4186625) // 2^32 mod q +#define DIV 41978 // mont^2/256 +#define DIV_QINV (-8395782) + +const qdata_t PQCLEAN_DILITHIUM2_AVX2_qdata = {{ +//#define _8XQ 0 + Q, Q, Q, Q, Q, Q, Q, Q, + +//#define _8XQINV 8 + QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV, + +//#define _8XDIV_QINV 16 + DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, + +//#define _8XDIV 24 + DIV, DIV, DIV, DIV, DIV, DIV, DIV, DIV, + +//#define _ZETAS_QINV 32 + -151046689, 1830765815, -1929875198, -1927777021, 1640767044, 1477910808, 1612161320, 1640734244, + 308362795, 308362795, 308362795, 308362795, -1815525077, -1815525077, -1815525077, -1815525077, + -1374673747, -1374673747, -1374673747, -1374673747, -1091570561, -1091570561, -1091570561, -1091570561, + -1929495947, -1929495947, -1929495947, -1929495947, 515185417, 515185417, 515185417, 515185417, + -285697463, -285697463, -285697463, -285697463, 625853735, 625853735, 625853735, 625853735, + 1727305304, 1727305304, 2082316400, 2082316400, -1364982364, -1364982364, 858240904, 858240904, + 1806278032, 1806278032, 222489248, 222489248, -346752664, -346752664, 684667771, 684667771, + 1654287830, 1654287830, -878576921, -878576921, -1257667337, -1257667337, -748618600, -748618600, + 329347125, 329347125, 1837364258, 1837364258, -1443016191, -1443016191, -1170414139, -1170414139, + -1846138265, -1631226336, -1404529459, 1838055109, 1594295555, -1076973524, -1898723372, -594436433, + -202001019, -475984260, -561427818, 1797021249, -1061813248, 2059733581, -1661512036, -1104976547, + -1750224323, -901666090, 418987550, 1831915353, -1925356481, 992097815, 879957084, 2024403852, + 1484874664, -1636082790, -285388938, -1983539117, -1495136972, -950076368, -1714807468, -952438995, + -1574918427, 1350681039, -2143979939, 1599739335, -1285853323, -993005454, -1440787840, 568627424, + -783134478, -588790216, 289871779, -1262003603, 2135294594, -1018755525, -889861155, 1665705315, + 1321868265, 1225434135, -1784632064, 666258756, 675310538, -1555941048, -1999506068, -1499481951, + -695180180, -1375177022, 1777179795, 334803717, -178766299, -518252220, 1957047970, 1146323031, + -654783359, -1974159335, 1651689966, 140455867, -1039411342, 1955560694, 1529189038, -2131021878, + -247357819, 1518161567, -86965173, 1708872713, 1787797779, 1638590967, -120646188, -1669960606, + -916321552, 1155548552, 2143745726, 1210558298, -1261461890, -318346816, 628664287, -1729304568, + 1422575624, 1424130038, -1185330464, 235321234, 168022240, 1206536194, 985155484, -894060583, + -898413, -1363460238, -605900043, 2027833504, 14253662, 1014493059, 863641633, 1819892093, + 2124962073, -1223601433, -1920467227, -1637785316, -1536588520, 694382729, 235104446, -1045062172, + 831969619, -300448763, 756955444, -260312805, 1554794072, 1339088280, -2040058690, -853476187, + -2047270596, -1723816713, -1591599803, -440824168, 1119856484, 1544891539, 155290192, -973777462, + 991903578, 912367099, -44694137, 1176904444, -421552614, -818371958, 1747917558, -325927722, + 908452108, 1851023419, -1176751719, -1354528380, -72690498, -314284737, 985022747, 963438279, + -1078959975, 604552167, -1021949428, 608791570, 173440395, -2126092136, -1316619236, -1039370342, + 6087993, -110126092, 565464272, -1758099917, -1600929361, 879867909, -1809756372, 400711272, + 1363007700, 30313375, -326425360, 1683520342, -517299994, 2027935492, -1372618620, 128353682, + -1123881663, 137583815, -635454918, -642772911, 45766801, 671509323, -2070602178, 419615363, + 1216882040, -270590488, -1276805128, 371462360, -1357098057, -384158533, 827959816, -596344473, + 702390549, -279505433, -260424530, -71875110, -1208667171, -1499603926, 2036925262, -540420426, + 746144248, -1420958686, 2032221021, 1904936414, 1257750362, 1926727420, 1931587462, 1258381762, + 885133339, 1629985060, 1967222129, 6363718, -1287922800, 1136965286, 1779436847, 1116720494, + 1042326957, 1405999311, 713994583, 940195359, -1542497137, 2061661095, -883155599, 1726753853, + -1547952704, 394851342, 283780712, 776003547, 1123958025, 201262505, 1934038751, 374860238, + +//#define _ZETAS 328 + -3975713, 25847, -2608894, -518909, 237124, -777960, -876248, 466468, + 1826347, 1826347, 1826347, 1826347, 2353451, 2353451, 2353451, 2353451, + -359251, -359251, -359251, -359251, -2091905, -2091905, -2091905, -2091905, + 3119733, 3119733, 3119733, 3119733, -2884855, -2884855, -2884855, -2884855, + 3111497, 3111497, 3111497, 3111497, 2680103, 2680103, 2680103, 2680103, + 2725464, 2725464, 1024112, 1024112, -1079900, -1079900, 3585928, 3585928, + -549488, -549488, -1119584, -1119584, 2619752, 2619752, -2108549, -2108549, + -2118186, -2118186, -3859737, -3859737, -1399561, -1399561, -3277672, -3277672, + 1757237, 1757237, -19422, -19422, 4010497, 4010497, 280005, 280005, + 2706023, 95776, 3077325, 3530437, -1661693, -3592148, -2537516, 3915439, + -3861115, -3043716, 3574422, -2867647, 3539968, -300467, 2348700, -539299, + -1699267, -1643818, 3505694, -3821735, 3507263, -2140649, -1600420, 3699596, + 811944, 531354, 954230, 3881043, 3900724, -2556880, 2071892, -2797779, + -3930395, -3677745, -1452451, 2176455, -1257611, -4083598, -3190144, -3632928, + 3412210, 2147896, -2967645, -411027, -671102, -22981, -381987, 1852771, + -3343383, 508951, 44288, 904516, -3724342, 1653064, 2389356, 759969, + 189548, 3159746, -2409325, 1315589, 1285669, -812732, -3019102, -3628969, + -1528703, -3041255, 3475950, -1585221, 1939314, -1000202, -3157330, 126922, + -983419, 2715295, -3693493, -2477047, -1228525, -1308169, 1349076, -1430430, + 264944, 3097992, -1100098, 3958618, -8578, -3249728, -210977, -1316856, + -3553272, -1851402, -177440, 1341330, -1584928, -1439742, -3881060, 3839961, + 2091667, -3342478, 266997, -3520352, 900702, 495491, -655327, -3556995, + 342297, 3437287, 2842341, 4055324, -3767016, -2994039, -1333058, -451100, + -1279661, 1500165, -542412, -2584293, -2013608, 1957272, -3183426, 810149, + -3038916, 2213111, -426683, -1667432, -2939036, 183443, -554416, 3937738, + 3407706, 2244091, 2434439, -3759364, 1859098, -1613174, -3122442, -525098, + 286988, -3342277, 2691481, 1247620, 1250494, 1869119, 1237275, 1312455, + 1917081, 777191, -2831860, -3724270, 2432395, 3369112, 162844, 1652634, + 3523897, -975884, 1723600, -1104333, -2235985, -976891, 3919660, 1400424, + 2316500, -2446433, -1235728, -1197226, 909542, -43260, 2031748, -768622, + -2437823, 1735879, -2590150, 2486353, 2635921, 1903435, -3318210, 3306115, + -2546312, 2235880, -1671176, 594136, 2454455, 185531, 1616392, -3694233, + 3866901, 1717735, -1803090, -260646, -420899, 1612842, -48306, -846154, + 3817976, -3562462, 3513181, -3193378, 819034, -522500, 3207046, -3595838, + 4108315, 203044, 1265009, 1595974, -3548272, -1050970, -1430225, -1962642, + -1374803, 3406031, -1846953, -3776993, -164721, -1207385, 3014001, -1799107, + 269760, 472078, 1910376, -3833893, -2286327, -3545687, -1362209, 1976782, + } +}; diff --git a/crypto_sign/dilithium/dilithium2/avx2/consts.h b/crypto_sign/dilithium/dilithium2/avx2/consts.h new file mode 100644 index 00000000..44a50460 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/consts.h @@ -0,0 +1,10 @@ +#ifndef PQCLEAN_DILITHIUM2_AVX2_CONSTS_H +#define PQCLEAN_DILITHIUM2_AVX2_CONSTS_H +#include "align.h" +#include "cdecl.h" + + +typedef ALIGNED_INT32(624) qdata_t; +extern const qdata_t PQCLEAN_DILITHIUM2_AVX2_qdata; + +#endif diff --git a/crypto_sign/dilithium/dilithium2/avx2/f1600x4.S b/crypto_sign/dilithium/dilithium2/avx2/f1600x4.S new file mode 100644 index 00000000..76f89ca6 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/f1600x4.S @@ -0,0 +1,909 @@ +/* Taken from Bas Westerbaan's new 4-way SHAKE implementation + * for Sphincs+ (https://github.com/sphincs/sphincsplus/pull/14/), + * but uses vpshufb for byte-granular rotations as in the Keccak Code Package. */ + +#include "cdecl.h" + +.data +.p2align 5 +rho8: +.byte 7,0,1,2,3,4,5,6,15,8,9,10,11,12,13,14,7,0,1,2,3,4,5,6,15,8,9,10,11,12,13,14 +rho56: +.byte 1,2,3,4,5,6,7,0,9,10,11,12,13,14,15,8,1,2,3,4,5,6,7,0,9,10,11,12,13,14,15,8 + +.text +.global cdecl(PQCLEAN_DILITHIUM2_AVX2_f1600x4) +.global _cdecl(PQCLEAN_DILITHIUM2_AVX2_f1600x4) +cdecl(PQCLEAN_DILITHIUM2_AVX2_f1600x4): +_cdecl(PQCLEAN_DILITHIUM2_AVX2_f1600x4): +vmovdqa rho8(%rip), %ymm0 +movq $6, %rax +looptop: +vmovdqa 0(%rdi), %ymm8 +vmovdqa 32(%rdi), %ymm9 +vmovdqa 64(%rdi), %ymm10 +vmovdqa 96(%rdi), %ymm11 +vmovdqa 128(%rdi), %ymm12 +vpxor 160(%rdi), %ymm8, %ymm8 +vpxor 192(%rdi), %ymm9, %ymm9 +vpxor 224(%rdi), %ymm10, %ymm10 +vpxor 256(%rdi), %ymm11, %ymm11 +vpxor 288(%rdi), %ymm12, %ymm12 +vpxor 320(%rdi), %ymm8, %ymm8 +vpxor 352(%rdi), %ymm9, %ymm9 +vpxor 384(%rdi), %ymm10, %ymm10 +vpxor 416(%rdi), %ymm11, %ymm11 +vpxor 448(%rdi), %ymm12, %ymm12 +vpxor 480(%rdi), %ymm8, %ymm8 +vpxor 512(%rdi), %ymm9, %ymm9 +vpxor 544(%rdi), %ymm10, %ymm10 +vpxor 576(%rdi), %ymm11, %ymm11 +vpxor 608(%rdi), %ymm12, %ymm12 +vpxor 640(%rdi), %ymm8, %ymm8 +vpxor 672(%rdi), %ymm9, %ymm9 +vpxor 704(%rdi), %ymm10, %ymm10 +vpxor 736(%rdi), %ymm11, %ymm11 +vpxor 768(%rdi), %ymm12, %ymm12 +vpsllq $1, %ymm9, %ymm13 +vpsllq $1, %ymm10, %ymm14 +vpsllq $1, %ymm11, %ymm15 +vpsllq $1, %ymm12, %ymm7 +vpsllq $1, %ymm8, %ymm6 +vpsrlq $63, %ymm9, %ymm5 +vpsrlq $63, %ymm10, %ymm4 +vpsrlq $63, %ymm11, %ymm3 +vpsrlq $63, %ymm12, %ymm2 +vpsrlq $63, %ymm8, %ymm1 +vpor %ymm13, %ymm5, %ymm5 +vpor %ymm14, %ymm4, %ymm4 +vpor %ymm15, %ymm3, %ymm3 +vpor %ymm7, %ymm2, %ymm2 +vpor %ymm6, %ymm1, %ymm1 +vpxor %ymm5, %ymm12, %ymm5 +vpxor %ymm4, %ymm8, %ymm4 +vpxor %ymm3, %ymm9, %ymm3 +vpxor %ymm2, %ymm10, %ymm2 +vpxor %ymm1, %ymm11, %ymm1 +vpxor 0(%rdi), %ymm5, %ymm8 +vpxor 192(%rdi), %ymm4, %ymm9 +vpxor 384(%rdi), %ymm3, %ymm10 +vpxor 576(%rdi), %ymm2, %ymm11 +vpxor 768(%rdi), %ymm1, %ymm12 +vpsllq $44, %ymm9, %ymm14 +vpsllq $43, %ymm10, %ymm15 +vpsllq $21, %ymm11, %ymm7 +vpsllq $14, %ymm12, %ymm6 +vpsrlq $20, %ymm9, %ymm9 +vpsrlq $21, %ymm10, %ymm10 +vpsrlq $43, %ymm11, %ymm11 +vpsrlq $50, %ymm12, %ymm12 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vpbroadcastq 0(%rsi), %ymm8 +vpxor %ymm8, %ymm13, %ymm13 +vmovdqa %ymm13, 0(%rdi) +vmovdqa %ymm14, 192(%rdi) +vmovdqa %ymm15, 384(%rdi) +vmovdqa %ymm7, 576(%rdi) +vmovdqa %ymm6, 768(%rdi) +vpxor 96(%rdi), %ymm2, %ymm8 +vpxor 288(%rdi), %ymm1, %ymm9 +vpxor 320(%rdi), %ymm5, %ymm10 +vpxor 512(%rdi), %ymm4, %ymm11 +vpxor 704(%rdi), %ymm3, %ymm12 +vpsllq $28, %ymm8, %ymm13 +vpsllq $20, %ymm9, %ymm14 +vpsllq $3, %ymm10, %ymm15 +vpsllq $45, %ymm11, %ymm7 +vpsllq $61, %ymm12, %ymm6 +vpsrlq $36, %ymm8, %ymm8 +vpsrlq $44, %ymm9, %ymm9 +vpsrlq $61, %ymm10, %ymm10 +vpsrlq $19, %ymm11, %ymm11 +vpsrlq $3, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 320(%rdi) +vmovdqa %ymm14, 512(%rdi) +vmovdqa %ymm15, 704(%rdi) +vmovdqa %ymm7, 96(%rdi) +vmovdqa %ymm6, 288(%rdi) +vpxor 32(%rdi), %ymm4, %ymm8 +vpxor 224(%rdi), %ymm3, %ymm9 +vpxor 416(%rdi), %ymm2, %ymm10 +vpxor 608(%rdi), %ymm1, %ymm11 +vpxor 640(%rdi), %ymm5, %ymm12 +vpsllq $1, %ymm8, %ymm13 +vpsllq $6, %ymm9, %ymm14 +vpsllq $25, %ymm10, %ymm15 +#vpsllq $8, %ymm11, %ymm7 +vpsllq $18, %ymm12, %ymm6 +vpsrlq $63, %ymm8, %ymm8 +vpsrlq $58, %ymm9, %ymm9 +vpsrlq $39, %ymm10, %ymm10 +#vpsrlq $56, %ymm11, %ymm11 +vpsrlq $46, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +#vpor %ymm7, %ymm11, %ymm11 +vpshufb %ymm0, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 640(%rdi) +vmovdqa %ymm14, 32(%rdi) +vmovdqa %ymm15, 224(%rdi) +vmovdqa %ymm7, 416(%rdi) +vmovdqa %ymm6, 608(%rdi) +vpxor 128(%rdi), %ymm1, %ymm8 +vpxor 160(%rdi), %ymm5, %ymm9 +vpxor 352(%rdi), %ymm4, %ymm10 +vpxor 544(%rdi), %ymm3, %ymm11 +vpxor 736(%rdi), %ymm2, %ymm12 +vpsllq $27, %ymm8, %ymm13 +vpsllq $36, %ymm9, %ymm14 +vpsllq $10, %ymm10, %ymm15 +vpsllq $15, %ymm11, %ymm7 +#vpsllq $56, %ymm12, %ymm6 +vpsrlq $37, %ymm8, %ymm8 +vpsrlq $28, %ymm9, %ymm9 +vpsrlq $54, %ymm10, %ymm10 +vpsrlq $49, %ymm11, %ymm11 +#vpsrlq $8, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +#vpor %ymm6, %ymm12, %ymm12 +vpshufb rho56(%rip), %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 160(%rdi) +vmovdqa %ymm14, 352(%rdi) +vmovdqa %ymm15, 544(%rdi) +vmovdqa %ymm7, 736(%rdi) +vmovdqa %ymm6, 128(%rdi) +vpxor 64(%rdi), %ymm3, %ymm8 +vpxor 256(%rdi), %ymm2, %ymm9 +vpxor 448(%rdi), %ymm1, %ymm10 +vpxor 480(%rdi), %ymm5, %ymm11 +vpxor 672(%rdi), %ymm4, %ymm12 +vpsllq $62, %ymm8, %ymm13 +vpsllq $55, %ymm9, %ymm14 +vpsllq $39, %ymm10, %ymm15 +vpsllq $41, %ymm11, %ymm7 +vpsllq $2, %ymm12, %ymm6 +vpsrlq $2, %ymm8, %ymm8 +vpsrlq $9, %ymm9, %ymm9 +vpsrlq $25, %ymm10, %ymm10 +vpsrlq $23, %ymm11, %ymm11 +vpsrlq $62, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 480(%rdi) +vmovdqa %ymm14, 672(%rdi) +vmovdqa %ymm15, 64(%rdi) +vmovdqa %ymm7, 256(%rdi) +vmovdqa %ymm6, 448(%rdi) +vmovdqa 0(%rdi), %ymm8 +vmovdqa 32(%rdi), %ymm9 +vmovdqa 64(%rdi), %ymm10 +vmovdqa 96(%rdi), %ymm11 +vmovdqa 128(%rdi), %ymm12 +vpxor 160(%rdi), %ymm8, %ymm8 +vpxor 192(%rdi), %ymm9, %ymm9 +vpxor 224(%rdi), %ymm10, %ymm10 +vpxor 256(%rdi), %ymm11, %ymm11 +vpxor 288(%rdi), %ymm12, %ymm12 +vpxor 320(%rdi), %ymm8, %ymm8 +vpxor 352(%rdi), %ymm9, %ymm9 +vpxor 384(%rdi), %ymm10, %ymm10 +vpxor 416(%rdi), %ymm11, %ymm11 +vpxor 448(%rdi), %ymm12, %ymm12 +vpxor 480(%rdi), %ymm8, %ymm8 +vpxor 512(%rdi), %ymm9, %ymm9 +vpxor 544(%rdi), %ymm10, %ymm10 +vpxor 576(%rdi), %ymm11, %ymm11 +vpxor 608(%rdi), %ymm12, %ymm12 +vpxor 640(%rdi), %ymm8, %ymm8 +vpxor 672(%rdi), %ymm9, %ymm9 +vpxor 704(%rdi), %ymm10, %ymm10 +vpxor 736(%rdi), %ymm11, %ymm11 +vpxor 768(%rdi), %ymm12, %ymm12 +vpsllq $1, %ymm9, %ymm13 +vpsllq $1, %ymm10, %ymm14 +vpsllq $1, %ymm11, %ymm15 +vpsllq $1, %ymm12, %ymm7 +vpsllq $1, %ymm8, %ymm6 +vpsrlq $63, %ymm9, %ymm5 +vpsrlq $63, %ymm10, %ymm4 +vpsrlq $63, %ymm11, %ymm3 +vpsrlq $63, %ymm12, %ymm2 +vpsrlq $63, %ymm8, %ymm1 +vpor %ymm13, %ymm5, %ymm5 +vpor %ymm14, %ymm4, %ymm4 +vpor %ymm15, %ymm3, %ymm3 +vpor %ymm7, %ymm2, %ymm2 +vpor %ymm6, %ymm1, %ymm1 +vpxor %ymm5, %ymm12, %ymm5 +vpxor %ymm4, %ymm8, %ymm4 +vpxor %ymm3, %ymm9, %ymm3 +vpxor %ymm2, %ymm10, %ymm2 +vpxor %ymm1, %ymm11, %ymm1 +vpxor 0(%rdi), %ymm5, %ymm8 +vpxor 512(%rdi), %ymm4, %ymm9 +vpxor 224(%rdi), %ymm3, %ymm10 +vpxor 736(%rdi), %ymm2, %ymm11 +vpxor 448(%rdi), %ymm1, %ymm12 +vpsllq $44, %ymm9, %ymm14 +vpsllq $43, %ymm10, %ymm15 +vpsllq $21, %ymm11, %ymm7 +vpsllq $14, %ymm12, %ymm6 +vpsrlq $20, %ymm9, %ymm9 +vpsrlq $21, %ymm10, %ymm10 +vpsrlq $43, %ymm11, %ymm11 +vpsrlq $50, %ymm12, %ymm12 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vpbroadcastq 8(%rsi), %ymm8 +vpxor %ymm8, %ymm13, %ymm13 +vmovdqa %ymm13, 0(%rdi) +vmovdqa %ymm14, 512(%rdi) +vmovdqa %ymm15, 224(%rdi) +vmovdqa %ymm7, 736(%rdi) +vmovdqa %ymm6, 448(%rdi) +vpxor 576(%rdi), %ymm2, %ymm8 +vpxor 288(%rdi), %ymm1, %ymm9 +vpxor 640(%rdi), %ymm5, %ymm10 +vpxor 352(%rdi), %ymm4, %ymm11 +vpxor 64(%rdi), %ymm3, %ymm12 +vpsllq $28, %ymm8, %ymm13 +vpsllq $20, %ymm9, %ymm14 +vpsllq $3, %ymm10, %ymm15 +vpsllq $45, %ymm11, %ymm7 +vpsllq $61, %ymm12, %ymm6 +vpsrlq $36, %ymm8, %ymm8 +vpsrlq $44, %ymm9, %ymm9 +vpsrlq $61, %ymm10, %ymm10 +vpsrlq $19, %ymm11, %ymm11 +vpsrlq $3, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 640(%rdi) +vmovdqa %ymm14, 352(%rdi) +vmovdqa %ymm15, 64(%rdi) +vmovdqa %ymm7, 576(%rdi) +vmovdqa %ymm6, 288(%rdi) +vpxor 192(%rdi), %ymm4, %ymm8 +vpxor 704(%rdi), %ymm3, %ymm9 +vpxor 416(%rdi), %ymm2, %ymm10 +vpxor 128(%rdi), %ymm1, %ymm11 +vpxor 480(%rdi), %ymm5, %ymm12 +vpsllq $1, %ymm8, %ymm13 +vpsllq $6, %ymm9, %ymm14 +vpsllq $25, %ymm10, %ymm15 +#vpsllq $8, %ymm11, %ymm7 +vpsllq $18, %ymm12, %ymm6 +vpsrlq $63, %ymm8, %ymm8 +vpsrlq $58, %ymm9, %ymm9 +vpsrlq $39, %ymm10, %ymm10 +#vpsrlq $56, %ymm11, %ymm11 +vpsrlq $46, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +#vpor %ymm7, %ymm11, %ymm11 +vpshufb %ymm0, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 480(%rdi) +vmovdqa %ymm14, 192(%rdi) +vmovdqa %ymm15, 704(%rdi) +vmovdqa %ymm7, 416(%rdi) +vmovdqa %ymm6, 128(%rdi) +vpxor 768(%rdi), %ymm1, %ymm8 +vpxor 320(%rdi), %ymm5, %ymm9 +vpxor 32(%rdi), %ymm4, %ymm10 +vpxor 544(%rdi), %ymm3, %ymm11 +vpxor 256(%rdi), %ymm2, %ymm12 +vpsllq $27, %ymm8, %ymm13 +vpsllq $36, %ymm9, %ymm14 +vpsllq $10, %ymm10, %ymm15 +vpsllq $15, %ymm11, %ymm7 +#vpsllq $56, %ymm12, %ymm6 +vpsrlq $37, %ymm8, %ymm8 +vpsrlq $28, %ymm9, %ymm9 +vpsrlq $54, %ymm10, %ymm10 +vpsrlq $49, %ymm11, %ymm11 +#vpsrlq $8, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +#vpor %ymm6, %ymm12, %ymm12 +vpshufb rho56(%rip), %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 320(%rdi) +vmovdqa %ymm14, 32(%rdi) +vmovdqa %ymm15, 544(%rdi) +vmovdqa %ymm7, 256(%rdi) +vmovdqa %ymm6, 768(%rdi) +vpxor 384(%rdi), %ymm3, %ymm8 +vpxor 96(%rdi), %ymm2, %ymm9 +vpxor 608(%rdi), %ymm1, %ymm10 +vpxor 160(%rdi), %ymm5, %ymm11 +vpxor 672(%rdi), %ymm4, %ymm12 +vpsllq $62, %ymm8, %ymm13 +vpsllq $55, %ymm9, %ymm14 +vpsllq $39, %ymm10, %ymm15 +vpsllq $41, %ymm11, %ymm7 +vpsllq $2, %ymm12, %ymm6 +vpsrlq $2, %ymm8, %ymm8 +vpsrlq $9, %ymm9, %ymm9 +vpsrlq $25, %ymm10, %ymm10 +vpsrlq $23, %ymm11, %ymm11 +vpsrlq $62, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 160(%rdi) +vmovdqa %ymm14, 672(%rdi) +vmovdqa %ymm15, 384(%rdi) +vmovdqa %ymm7, 96(%rdi) +vmovdqa %ymm6, 608(%rdi) +vmovdqa 0(%rdi), %ymm8 +vmovdqa 32(%rdi), %ymm9 +vmovdqa 64(%rdi), %ymm10 +vmovdqa 96(%rdi), %ymm11 +vmovdqa 128(%rdi), %ymm12 +vpxor 160(%rdi), %ymm8, %ymm8 +vpxor 192(%rdi), %ymm9, %ymm9 +vpxor 224(%rdi), %ymm10, %ymm10 +vpxor 256(%rdi), %ymm11, %ymm11 +vpxor 288(%rdi), %ymm12, %ymm12 +vpxor 320(%rdi), %ymm8, %ymm8 +vpxor 352(%rdi), %ymm9, %ymm9 +vpxor 384(%rdi), %ymm10, %ymm10 +vpxor 416(%rdi), %ymm11, %ymm11 +vpxor 448(%rdi), %ymm12, %ymm12 +vpxor 480(%rdi), %ymm8, %ymm8 +vpxor 512(%rdi), %ymm9, %ymm9 +vpxor 544(%rdi), %ymm10, %ymm10 +vpxor 576(%rdi), %ymm11, %ymm11 +vpxor 608(%rdi), %ymm12, %ymm12 +vpxor 640(%rdi), %ymm8, %ymm8 +vpxor 672(%rdi), %ymm9, %ymm9 +vpxor 704(%rdi), %ymm10, %ymm10 +vpxor 736(%rdi), %ymm11, %ymm11 +vpxor 768(%rdi), %ymm12, %ymm12 +vpsllq $1, %ymm9, %ymm13 +vpsllq $1, %ymm10, %ymm14 +vpsllq $1, %ymm11, %ymm15 +vpsllq $1, %ymm12, %ymm7 +vpsllq $1, %ymm8, %ymm6 +vpsrlq $63, %ymm9, %ymm5 +vpsrlq $63, %ymm10, %ymm4 +vpsrlq $63, %ymm11, %ymm3 +vpsrlq $63, %ymm12, %ymm2 +vpsrlq $63, %ymm8, %ymm1 +vpor %ymm13, %ymm5, %ymm5 +vpor %ymm14, %ymm4, %ymm4 +vpor %ymm15, %ymm3, %ymm3 +vpor %ymm7, %ymm2, %ymm2 +vpor %ymm6, %ymm1, %ymm1 +vpxor %ymm5, %ymm12, %ymm5 +vpxor %ymm4, %ymm8, %ymm4 +vpxor %ymm3, %ymm9, %ymm3 +vpxor %ymm2, %ymm10, %ymm2 +vpxor %ymm1, %ymm11, %ymm1 +vpxor 0(%rdi), %ymm5, %ymm8 +vpxor 352(%rdi), %ymm4, %ymm9 +vpxor 704(%rdi), %ymm3, %ymm10 +vpxor 256(%rdi), %ymm2, %ymm11 +vpxor 608(%rdi), %ymm1, %ymm12 +vpsllq $44, %ymm9, %ymm14 +vpsllq $43, %ymm10, %ymm15 +vpsllq $21, %ymm11, %ymm7 +vpsllq $14, %ymm12, %ymm6 +vpsrlq $20, %ymm9, %ymm9 +vpsrlq $21, %ymm10, %ymm10 +vpsrlq $43, %ymm11, %ymm11 +vpsrlq $50, %ymm12, %ymm12 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vpbroadcastq 16(%rsi), %ymm8 +vpxor %ymm8, %ymm13, %ymm13 +vmovdqa %ymm13, 0(%rdi) +vmovdqa %ymm14, 352(%rdi) +vmovdqa %ymm15, 704(%rdi) +vmovdqa %ymm7, 256(%rdi) +vmovdqa %ymm6, 608(%rdi) +vpxor 736(%rdi), %ymm2, %ymm8 +vpxor 288(%rdi), %ymm1, %ymm9 +vpxor 480(%rdi), %ymm5, %ymm10 +vpxor 32(%rdi), %ymm4, %ymm11 +vpxor 384(%rdi), %ymm3, %ymm12 +vpsllq $28, %ymm8, %ymm13 +vpsllq $20, %ymm9, %ymm14 +vpsllq $3, %ymm10, %ymm15 +vpsllq $45, %ymm11, %ymm7 +vpsllq $61, %ymm12, %ymm6 +vpsrlq $36, %ymm8, %ymm8 +vpsrlq $44, %ymm9, %ymm9 +vpsrlq $61, %ymm10, %ymm10 +vpsrlq $19, %ymm11, %ymm11 +vpsrlq $3, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 480(%rdi) +vmovdqa %ymm14, 32(%rdi) +vmovdqa %ymm15, 384(%rdi) +vmovdqa %ymm7, 736(%rdi) +vmovdqa %ymm6, 288(%rdi) +vpxor 512(%rdi), %ymm4, %ymm8 +vpxor 64(%rdi), %ymm3, %ymm9 +vpxor 416(%rdi), %ymm2, %ymm10 +vpxor 768(%rdi), %ymm1, %ymm11 +vpxor 160(%rdi), %ymm5, %ymm12 +vpsllq $1, %ymm8, %ymm13 +vpsllq $6, %ymm9, %ymm14 +vpsllq $25, %ymm10, %ymm15 +#vpsllq $8, %ymm11, %ymm7 +vpsllq $18, %ymm12, %ymm6 +vpsrlq $63, %ymm8, %ymm8 +vpsrlq $58, %ymm9, %ymm9 +vpsrlq $39, %ymm10, %ymm10 +#vpsrlq $56, %ymm11, %ymm11 +vpsrlq $46, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +#vpor %ymm7, %ymm11, %ymm11 +vpshufb %ymm0, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 160(%rdi) +vmovdqa %ymm14, 512(%rdi) +vmovdqa %ymm15, 64(%rdi) +vmovdqa %ymm7, 416(%rdi) +vmovdqa %ymm6, 768(%rdi) +vpxor 448(%rdi), %ymm1, %ymm8 +vpxor 640(%rdi), %ymm5, %ymm9 +vpxor 192(%rdi), %ymm4, %ymm10 +vpxor 544(%rdi), %ymm3, %ymm11 +vpxor 96(%rdi), %ymm2, %ymm12 +vpsllq $27, %ymm8, %ymm13 +vpsllq $36, %ymm9, %ymm14 +vpsllq $10, %ymm10, %ymm15 +vpsllq $15, %ymm11, %ymm7 +#vpsllq $56, %ymm12, %ymm6 +vpsrlq $37, %ymm8, %ymm8 +vpsrlq $28, %ymm9, %ymm9 +vpsrlq $54, %ymm10, %ymm10 +vpsrlq $49, %ymm11, %ymm11 +#vpsrlq $8, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +#vpor %ymm6, %ymm12, %ymm12 +vpshufb rho56(%rip), %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 640(%rdi) +vmovdqa %ymm14, 192(%rdi) +vmovdqa %ymm15, 544(%rdi) +vmovdqa %ymm7, 96(%rdi) +vmovdqa %ymm6, 448(%rdi) +vpxor 224(%rdi), %ymm3, %ymm8 +vpxor 576(%rdi), %ymm2, %ymm9 +vpxor 128(%rdi), %ymm1, %ymm10 +vpxor 320(%rdi), %ymm5, %ymm11 +vpxor 672(%rdi), %ymm4, %ymm12 +vpsllq $62, %ymm8, %ymm13 +vpsllq $55, %ymm9, %ymm14 +vpsllq $39, %ymm10, %ymm15 +vpsllq $41, %ymm11, %ymm7 +vpsllq $2, %ymm12, %ymm6 +vpsrlq $2, %ymm8, %ymm8 +vpsrlq $9, %ymm9, %ymm9 +vpsrlq $25, %ymm10, %ymm10 +vpsrlq $23, %ymm11, %ymm11 +vpsrlq $62, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 320(%rdi) +vmovdqa %ymm14, 672(%rdi) +vmovdqa %ymm15, 224(%rdi) +vmovdqa %ymm7, 576(%rdi) +vmovdqa %ymm6, 128(%rdi) +vmovdqa 0(%rdi), %ymm8 +vmovdqa 32(%rdi), %ymm9 +vmovdqa 64(%rdi), %ymm10 +vmovdqa 96(%rdi), %ymm11 +vmovdqa 128(%rdi), %ymm12 +vpxor 160(%rdi), %ymm8, %ymm8 +vpxor 192(%rdi), %ymm9, %ymm9 +vpxor 224(%rdi), %ymm10, %ymm10 +vpxor 256(%rdi), %ymm11, %ymm11 +vpxor 288(%rdi), %ymm12, %ymm12 +vpxor 320(%rdi), %ymm8, %ymm8 +vpxor 352(%rdi), %ymm9, %ymm9 +vpxor 384(%rdi), %ymm10, %ymm10 +vpxor 416(%rdi), %ymm11, %ymm11 +vpxor 448(%rdi), %ymm12, %ymm12 +vpxor 480(%rdi), %ymm8, %ymm8 +vpxor 512(%rdi), %ymm9, %ymm9 +vpxor 544(%rdi), %ymm10, %ymm10 +vpxor 576(%rdi), %ymm11, %ymm11 +vpxor 608(%rdi), %ymm12, %ymm12 +vpxor 640(%rdi), %ymm8, %ymm8 +vpxor 672(%rdi), %ymm9, %ymm9 +vpxor 704(%rdi), %ymm10, %ymm10 +vpxor 736(%rdi), %ymm11, %ymm11 +vpxor 768(%rdi), %ymm12, %ymm12 +vpsllq $1, %ymm9, %ymm13 +vpsllq $1, %ymm10, %ymm14 +vpsllq $1, %ymm11, %ymm15 +vpsllq $1, %ymm12, %ymm7 +vpsllq $1, %ymm8, %ymm6 +vpsrlq $63, %ymm9, %ymm5 +vpsrlq $63, %ymm10, %ymm4 +vpsrlq $63, %ymm11, %ymm3 +vpsrlq $63, %ymm12, %ymm2 +vpsrlq $63, %ymm8, %ymm1 +vpor %ymm13, %ymm5, %ymm5 +vpor %ymm14, %ymm4, %ymm4 +vpor %ymm15, %ymm3, %ymm3 +vpor %ymm7, %ymm2, %ymm2 +vpor %ymm6, %ymm1, %ymm1 +vpxor %ymm5, %ymm12, %ymm5 +vpxor %ymm4, %ymm8, %ymm4 +vpxor %ymm3, %ymm9, %ymm3 +vpxor %ymm2, %ymm10, %ymm2 +vpxor %ymm1, %ymm11, %ymm1 +vpxor 0(%rdi), %ymm5, %ymm8 +vpxor 32(%rdi), %ymm4, %ymm9 +vpxor 64(%rdi), %ymm3, %ymm10 +vpxor 96(%rdi), %ymm2, %ymm11 +vpxor 128(%rdi), %ymm1, %ymm12 +vpsllq $44, %ymm9, %ymm14 +vpsllq $43, %ymm10, %ymm15 +vpsllq $21, %ymm11, %ymm7 +vpsllq $14, %ymm12, %ymm6 +vpsrlq $20, %ymm9, %ymm9 +vpsrlq $21, %ymm10, %ymm10 +vpsrlq $43, %ymm11, %ymm11 +vpsrlq $50, %ymm12, %ymm12 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vpbroadcastq 24(%rsi), %ymm8 +vpxor %ymm8, %ymm13, %ymm13 +vmovdqa %ymm13, 0(%rdi) +vmovdqa %ymm14, 32(%rdi) +vmovdqa %ymm15, 64(%rdi) +vmovdqa %ymm7, 96(%rdi) +vmovdqa %ymm6, 128(%rdi) +vpxor 256(%rdi), %ymm2, %ymm8 +vpxor 288(%rdi), %ymm1, %ymm9 +vpxor 160(%rdi), %ymm5, %ymm10 +vpxor 192(%rdi), %ymm4, %ymm11 +vpxor 224(%rdi), %ymm3, %ymm12 +vpsllq $28, %ymm8, %ymm13 +vpsllq $20, %ymm9, %ymm14 +vpsllq $3, %ymm10, %ymm15 +vpsllq $45, %ymm11, %ymm7 +vpsllq $61, %ymm12, %ymm6 +vpsrlq $36, %ymm8, %ymm8 +vpsrlq $44, %ymm9, %ymm9 +vpsrlq $61, %ymm10, %ymm10 +vpsrlq $19, %ymm11, %ymm11 +vpsrlq $3, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 160(%rdi) +vmovdqa %ymm14, 192(%rdi) +vmovdqa %ymm15, 224(%rdi) +vmovdqa %ymm7, 256(%rdi) +vmovdqa %ymm6, 288(%rdi) +vpxor 352(%rdi), %ymm4, %ymm8 +vpxor 384(%rdi), %ymm3, %ymm9 +vpxor 416(%rdi), %ymm2, %ymm10 +vpxor 448(%rdi), %ymm1, %ymm11 +vpxor 320(%rdi), %ymm5, %ymm12 +vpsllq $1, %ymm8, %ymm13 +vpsllq $6, %ymm9, %ymm14 +vpsllq $25, %ymm10, %ymm15 +#vpsllq $8, %ymm11, %ymm7 +vpsllq $18, %ymm12, %ymm6 +vpsrlq $63, %ymm8, %ymm8 +vpsrlq $58, %ymm9, %ymm9 +vpsrlq $39, %ymm10, %ymm10 +#vpsrlq $56, %ymm11, %ymm11 +vpsrlq $46, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +#vpor %ymm7, %ymm11, %ymm11 +vpshufb %ymm0, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 320(%rdi) +vmovdqa %ymm14, 352(%rdi) +vmovdqa %ymm15, 384(%rdi) +vmovdqa %ymm7, 416(%rdi) +vmovdqa %ymm6, 448(%rdi) +vpxor 608(%rdi), %ymm1, %ymm8 +vpxor 480(%rdi), %ymm5, %ymm9 +vpxor 512(%rdi), %ymm4, %ymm10 +vpxor 544(%rdi), %ymm3, %ymm11 +vpxor 576(%rdi), %ymm2, %ymm12 +vpsllq $27, %ymm8, %ymm13 +vpsllq $36, %ymm9, %ymm14 +vpsllq $10, %ymm10, %ymm15 +vpsllq $15, %ymm11, %ymm7 +#vpsllq $56, %ymm12, %ymm6 +vpsrlq $37, %ymm8, %ymm8 +vpsrlq $28, %ymm9, %ymm9 +vpsrlq $54, %ymm10, %ymm10 +vpsrlq $49, %ymm11, %ymm11 +#vpsrlq $8, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +#vpor %ymm6, %ymm12, %ymm12 +vpshufb rho56(%rip), %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 480(%rdi) +vmovdqa %ymm14, 512(%rdi) +vmovdqa %ymm15, 544(%rdi) +vmovdqa %ymm7, 576(%rdi) +vmovdqa %ymm6, 608(%rdi) +vpxor 704(%rdi), %ymm3, %ymm8 +vpxor 736(%rdi), %ymm2, %ymm9 +vpxor 768(%rdi), %ymm1, %ymm10 +vpxor 640(%rdi), %ymm5, %ymm11 +vpxor 672(%rdi), %ymm4, %ymm12 +vpsllq $62, %ymm8, %ymm13 +vpsllq $55, %ymm9, %ymm14 +vpsllq $39, %ymm10, %ymm15 +vpsllq $41, %ymm11, %ymm7 +vpsllq $2, %ymm12, %ymm6 +vpsrlq $2, %ymm8, %ymm8 +vpsrlq $9, %ymm9, %ymm9 +vpsrlq $25, %ymm10, %ymm10 +vpsrlq $23, %ymm11, %ymm11 +vpsrlq $62, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 640(%rdi) +vmovdqa %ymm14, 672(%rdi) +vmovdqa %ymm15, 704(%rdi) +vmovdqa %ymm7, 736(%rdi) +vmovdqa %ymm6, 768(%rdi) +addq $32, %rsi +subq $1, %rax +jnz looptop +ret diff --git a/crypto_sign/dilithium/dilithium2/avx2/fips202x4.c b/crypto_sign/dilithium/dilithium2/avx2/fips202x4.c new file mode 100644 index 00000000..53e72945 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/fips202x4.c @@ -0,0 +1,219 @@ +#include "fips202.h" +#include "fips202x4.h" +#include +#include +#include +#include + +#define NROUNDS 24 + +/* Keccak round constants */ +static const uint64_t KeccakF_RoundConstants[NROUNDS] = { + (uint64_t)0x0000000000000001ULL, + (uint64_t)0x0000000000008082ULL, + (uint64_t)0x800000000000808aULL, + (uint64_t)0x8000000080008000ULL, + (uint64_t)0x000000000000808bULL, + (uint64_t)0x0000000080000001ULL, + (uint64_t)0x8000000080008081ULL, + (uint64_t)0x8000000000008009ULL, + (uint64_t)0x000000000000008aULL, + (uint64_t)0x0000000000000088ULL, + (uint64_t)0x0000000080008009ULL, + (uint64_t)0x000000008000000aULL, + (uint64_t)0x000000008000808bULL, + (uint64_t)0x800000000000008bULL, + (uint64_t)0x8000000000008089ULL, + (uint64_t)0x8000000000008003ULL, + (uint64_t)0x8000000000008002ULL, + (uint64_t)0x8000000000000080ULL, + (uint64_t)0x000000000000800aULL, + (uint64_t)0x800000008000000aULL, + (uint64_t)0x8000000080008081ULL, + (uint64_t)0x8000000000008080ULL, + (uint64_t)0x0000000080000001ULL, + (uint64_t)0x8000000080008008ULL +}; + +static void keccakx4_absorb_once(__m256i s[25], + unsigned int r, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen, + uint8_t p) { + size_t i; + uint64_t pos = 0; + __m256i t, idx; + + for (i = 0; i < 25; ++i) { + s[i] = _mm256_setzero_si256(); + } + + idx = _mm256_set_epi64x((long long)in3, (long long)in2, (long long)in1, (long long)in0); + while (inlen >= r) { + for (i = 0; i < r / 8; ++i) { + t = _mm256_i64gather_epi64((long long *)pos, idx, 1); + s[i] = _mm256_xor_si256(s[i], t); + pos += 8; + } + inlen -= r; + + PQCLEAN_DILITHIUM2_AVX2_f1600x4(s, KeccakF_RoundConstants); + } + + for (i = 0; i < inlen / 8; ++i) { + t = _mm256_i64gather_epi64((long long *)pos, idx, 1); + s[i] = _mm256_xor_si256(s[i], t); + pos += 8; + } + inlen -= 8 * i; + + if (inlen) { + t = _mm256_i64gather_epi64((long long *)pos, idx, 1); + idx = _mm256_set1_epi64x((long long)((1ULL << (8 * inlen)) - 1)); + t = _mm256_and_si256(t, idx); + s[i] = _mm256_xor_si256(s[i], t); + } + + t = _mm256_set1_epi64x((uint64_t)p << 8 * inlen); + s[i] = _mm256_xor_si256(s[i], t); + t = _mm256_set1_epi64x((long long)(1ULL << 63)); + s[r / 8 - 1] = _mm256_xor_si256(s[r / 8 - 1], t); +} + +static void keccakx4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + unsigned int r, + __m256i s[25]) { + unsigned int i; + __m128d t; + + while (nblocks > 0) { + PQCLEAN_DILITHIUM2_AVX2_f1600x4(s, KeccakF_RoundConstants); + for (i = 0; i < r / 8; ++i) { + t = _mm_castsi128_pd(_mm256_castsi256_si128(s[i])); + _mm_storel_pd((double *)&out0[8 * i], t); + _mm_storeh_pd((double *)&out1[8 * i], t); + t = _mm_castsi128_pd(_mm256_extracti128_si256(s[i], 1)); + _mm_storel_pd((double *)&out2[8 * i], t); + _mm_storeh_pd((double *)&out3[8 * i], t); + } + + out0 += r; + out1 += r; + out2 += r; + out3 += r; + --nblocks; + } +} + +void PQCLEAN_DILITHIUM2_AVX2_shake128x4_absorb_once(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen) { + keccakx4_absorb_once(state->s, SHAKE128_RATE, in0, in1, in2, in3, inlen, 0x1F); +} + +void PQCLEAN_DILITHIUM2_AVX2_shake128x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state) { + keccakx4_squeezeblocks(out0, out1, out2, out3, nblocks, SHAKE128_RATE, state->s); +} + +void PQCLEAN_DILITHIUM2_AVX2_shake256x4_absorb_once(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen) { + keccakx4_absorb_once(state->s, SHAKE256_RATE, in0, in1, in2, in3, inlen, 0x1F); +} + +void PQCLEAN_DILITHIUM2_AVX2_shake256x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state) { + keccakx4_squeezeblocks(out0, out1, out2, out3, nblocks, SHAKE256_RATE, state->s); +} + +void PQCLEAN_DILITHIUM2_AVX2_shake128x4(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t outlen, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen) { + unsigned int i; + size_t nblocks = outlen / SHAKE128_RATE; + uint8_t t[4][SHAKE128_RATE]; + keccakx4_state state; + + PQCLEAN_DILITHIUM2_AVX2_shake128x4_absorb_once(&state, in0, in1, in2, in3, inlen); + PQCLEAN_DILITHIUM2_AVX2_shake128x4_squeezeblocks(out0, out1, out2, out3, nblocks, &state); + + out0 += nblocks * SHAKE128_RATE; + out1 += nblocks * SHAKE128_RATE; + out2 += nblocks * SHAKE128_RATE; + out3 += nblocks * SHAKE128_RATE; + outlen -= nblocks * SHAKE128_RATE; + + if (outlen) { + PQCLEAN_DILITHIUM2_AVX2_shake128x4_squeezeblocks(t[0], t[1], t[2], t[3], 1, &state); + for (i = 0; i < outlen; ++i) { + out0[i] = t[0][i]; + out1[i] = t[1][i]; + out2[i] = t[2][i]; + out3[i] = t[3][i]; + } + } +} + +void PQCLEAN_DILITHIUM2_AVX2_shake256x4(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t outlen, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen) { + unsigned int i; + size_t nblocks = outlen / SHAKE256_RATE; + uint8_t t[4][SHAKE256_RATE]; + keccakx4_state state; + + PQCLEAN_DILITHIUM2_AVX2_shake256x4_absorb_once(&state, in0, in1, in2, in3, inlen); + PQCLEAN_DILITHIUM2_AVX2_shake256x4_squeezeblocks(out0, out1, out2, out3, nblocks, &state); + + out0 += nblocks * SHAKE256_RATE; + out1 += nblocks * SHAKE256_RATE; + out2 += nblocks * SHAKE256_RATE; + out3 += nblocks * SHAKE256_RATE; + outlen -= nblocks * SHAKE256_RATE; + + if (outlen) { + PQCLEAN_DILITHIUM2_AVX2_shake256x4_squeezeblocks(t[0], t[1], t[2], t[3], 1, &state); + for (i = 0; i < outlen; ++i) { + out0[i] = t[0][i]; + out1[i] = t[1][i]; + out2[i] = t[2][i]; + out3[i] = t[3][i]; + } + } +} diff --git a/crypto_sign/dilithium/dilithium2/avx2/fips202x4.h b/crypto_sign/dilithium/dilithium2/avx2/fips202x4.h new file mode 100644 index 00000000..9026106b --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/fips202x4.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_DILITHIUM2_AVX2_FIPS202X4_H +#define PQCLEAN_DILITHIUM2_AVX2_FIPS202X4_H + +#include +#include +#include + +typedef struct { + __m256i s[25]; +} keccakx4_state; + +void PQCLEAN_DILITHIUM2_AVX2_f1600x4(__m256i *s, const uint64_t *rc); + +void PQCLEAN_DILITHIUM2_AVX2_shake128x4_absorb_once(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen); + +void PQCLEAN_DILITHIUM2_AVX2_shake128x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state); + +void PQCLEAN_DILITHIUM2_AVX2_shake256x4_absorb_once(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen); + +void PQCLEAN_DILITHIUM2_AVX2_shake256x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state); + +void PQCLEAN_DILITHIUM2_AVX2_shake128x4(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t outlen, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen); + +void PQCLEAN_DILITHIUM2_AVX2_shake256x4(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t outlen, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen); + +#endif diff --git a/crypto_sign/dilithium/dilithium2/avx2/invntt.S b/crypto_sign/dilithium/dilithium2/avx2/invntt.S new file mode 100644 index 00000000..364559b7 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/invntt.S @@ -0,0 +1,240 @@ +#include "cdecl.h" +.include "shuffle.inc" + +.macro butterfly l,h,zl0=1,zl1=1,zh0=2,zh1=2 +vpsubd %ymm\l,%ymm\h,%ymm12 +vpaddd %ymm\h,%ymm\l,%ymm\l + +vpmuldq %ymm\zl0,%ymm12,%ymm13 +vmovshdup %ymm12,%ymm\h +vpmuldq %ymm\zl1,%ymm\h,%ymm14 + +vpmuldq %ymm\zh0,%ymm12,%ymm12 +vpmuldq %ymm\zh1,%ymm\h,%ymm\h + +vpmuldq %ymm0,%ymm13,%ymm13 +vpmuldq %ymm0,%ymm14,%ymm14 + +vpsubd %ymm13,%ymm12,%ymm12 +vpsubd %ymm14,%ymm\h,%ymm\h + +vmovshdup %ymm12,%ymm12 +vpblendd $0xAA,%ymm\h,%ymm12,%ymm\h +.endm + +.macro levels0t5 off +vmovdqa 256*\off+ 0(%rdi),%ymm4 +vmovdqa 256*\off+ 32(%rdi),%ymm5 +vmovdqa 256*\off+ 64(%rdi),%ymm6 +vmovdqa 256*\off+ 96(%rdi),%ymm7 +vmovdqa 256*\off+128(%rdi),%ymm8 +vmovdqa 256*\off+160(%rdi),%ymm9 +vmovdqa 256*\off+192(%rdi),%ymm10 +vmovdqa 256*\off+224(%rdi),%ymm11 + +/* level 0 */ +vpermq $0x1B,(_ZETAS_QINV+296-8*\off-8)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+296-8*\off-8)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 4,5,1,3,2,15 + +vpermq $0x1B,(_ZETAS_QINV+296-8*\off-40)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+296-8*\off-40)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 6,7,1,3,2,15 + +vpermq $0x1B,(_ZETAS_QINV+296-8*\off-72)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+296-8*\off-72)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 8,9,1,3,2,15 + +vpermq $0x1B,(_ZETAS_QINV+296-8*\off-104)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+296-8*\off-104)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 10,11,1,3,2,15 + +/* level 1 */ +vpermq $0x1B,(_ZETAS_QINV+168-8*\off-8)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+168-8*\off-8)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 4,6,1,3,2,15 +butterfly 5,7,1,3,2,15 + +vpermq $0x1B,(_ZETAS_QINV+168-8*\off-40)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+168-8*\off-40)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 8,10,1,3,2,15 +butterfly 9,11,1,3,2,15 + +/* level 2 */ +vpermq $0x1B,(_ZETAS_QINV+104-8*\off-8)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+104-8*\off-8)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 4,8,1,3,2,15 +butterfly 5,9,1,3,2,15 +butterfly 6,10,1,3,2,15 +butterfly 7,11,1,3,2,15 + +/* level 3 */ +shuffle2 4,5,3,5 +shuffle2 6,7,4,7 +shuffle2 8,9,6,9 +shuffle2 10,11,8,11 + +vpermq $0x1B,(_ZETAS_QINV+72-8*\off-8)*4(%rsi),%ymm1 +vpermq $0x1B,(_ZETAS+72-8*\off-8)*4(%rsi),%ymm2 +butterfly 3,5 +butterfly 4,7 +butterfly 6,9 +butterfly 8,11 + +/* level 4 */ +shuffle4 3,4,10,4 +shuffle4 6,8,3,8 +shuffle4 5,7,6,7 +shuffle4 9,11,5,11 + +vpermq $0x1B,(_ZETAS_QINV+40-8*\off-8)*4(%rsi),%ymm1 +vpermq $0x1B,(_ZETAS+40-8*\off-8)*4(%rsi),%ymm2 +butterfly 10,4 +butterfly 3,8 +butterfly 6,7 +butterfly 5,11 + +/* level 5 */ +shuffle8 10,3,9,3 +shuffle8 6,5,10,5 +shuffle8 4,8,6,8 +shuffle8 7,11,4,11 + +vpbroadcastd (_ZETAS_QINV+7-\off)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+7-\off)*4(%rsi),%ymm2 +butterfly 9,3 +butterfly 10,5 +butterfly 6,8 +butterfly 4,11 + +vmovdqa %ymm9,256*\off+ 0(%rdi) +vmovdqa %ymm10,256*\off+ 32(%rdi) +vmovdqa %ymm6,256*\off+ 64(%rdi) +vmovdqa %ymm4,256*\off+ 96(%rdi) +vmovdqa %ymm3,256*\off+128(%rdi) +vmovdqa %ymm5,256*\off+160(%rdi) +vmovdqa %ymm8,256*\off+192(%rdi) +vmovdqa %ymm11,256*\off+224(%rdi) +.endm + +.macro levels6t7 off +vmovdqa 0+32*\off(%rdi),%ymm4 +vmovdqa 128+32*\off(%rdi),%ymm5 +vmovdqa 256+32*\off(%rdi),%ymm6 +vmovdqa 384+32*\off(%rdi),%ymm7 +vmovdqa 512+32*\off(%rdi),%ymm8 +vmovdqa 640+32*\off(%rdi),%ymm9 +vmovdqa 768+32*\off(%rdi),%ymm10 +vmovdqa 896+32*\off(%rdi),%ymm11 + +/* level 6 */ +vpbroadcastd (_ZETAS_QINV+3)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+3)*4(%rsi),%ymm2 +butterfly 4,6 +butterfly 5,7 + +vpbroadcastd (_ZETAS_QINV+2)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+2)*4(%rsi),%ymm2 +butterfly 8,10 +butterfly 9,11 + +/* level 7 */ +vpbroadcastd (_ZETAS_QINV+0)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+0)*4(%rsi),%ymm2 + +butterfly 4,8 +butterfly 5,9 +butterfly 6,10 +butterfly 7,11 + +vmovdqa %ymm8,512+32*\off(%rdi) +vmovdqa %ymm9,640+32*\off(%rdi) +vmovdqa %ymm10,768+32*\off(%rdi) +vmovdqa %ymm11,896+32*\off(%rdi) + +vmovdqa (_8XDIV_QINV)*4(%rsi),%ymm1 +vmovdqa (_8XDIV)*4(%rsi),%ymm2 +vpmuldq %ymm1,%ymm4,%ymm12 +vpmuldq %ymm1,%ymm5,%ymm13 +vmovshdup %ymm4,%ymm8 +vmovshdup %ymm5,%ymm9 +vpmuldq %ymm1,%ymm8,%ymm14 +vpmuldq %ymm1,%ymm9,%ymm15 +vpmuldq %ymm2,%ymm4,%ymm4 +vpmuldq %ymm2,%ymm5,%ymm5 +vpmuldq %ymm2,%ymm8,%ymm8 +vpmuldq %ymm2,%ymm9,%ymm9 +vpmuldq %ymm0,%ymm12,%ymm12 +vpmuldq %ymm0,%ymm13,%ymm13 +vpmuldq %ymm0,%ymm14,%ymm14 +vpmuldq %ymm0,%ymm15,%ymm15 +vpsubd %ymm12,%ymm4,%ymm4 +vpsubd %ymm13,%ymm5,%ymm5 +vpsubd %ymm14,%ymm8,%ymm8 +vpsubd %ymm15,%ymm9,%ymm9 +vmovshdup %ymm4,%ymm4 +vmovshdup %ymm5,%ymm5 +vpblendd $0xAA,%ymm8,%ymm4,%ymm4 +vpblendd $0xAA,%ymm9,%ymm5,%ymm5 + +vpmuldq %ymm1,%ymm6,%ymm12 +vpmuldq %ymm1,%ymm7,%ymm13 +vmovshdup %ymm6,%ymm8 +vmovshdup %ymm7,%ymm9 +vpmuldq %ymm1,%ymm8,%ymm14 +vpmuldq %ymm1,%ymm9,%ymm15 +vpmuldq %ymm2,%ymm6,%ymm6 +vpmuldq %ymm2,%ymm7,%ymm7 +vpmuldq %ymm2,%ymm8,%ymm8 +vpmuldq %ymm2,%ymm9,%ymm9 +vpmuldq %ymm0,%ymm12,%ymm12 +vpmuldq %ymm0,%ymm13,%ymm13 +vpmuldq %ymm0,%ymm14,%ymm14 +vpmuldq %ymm0,%ymm15,%ymm15 +vpsubd %ymm12,%ymm6,%ymm6 +vpsubd %ymm13,%ymm7,%ymm7 +vpsubd %ymm14,%ymm8,%ymm8 +vpsubd %ymm15,%ymm9,%ymm9 +vmovshdup %ymm6,%ymm6 +vmovshdup %ymm7,%ymm7 +vpblendd $0xAA,%ymm8,%ymm6,%ymm6 +vpblendd $0xAA,%ymm9,%ymm7,%ymm7 + +vmovdqa %ymm4, 0+32*\off(%rdi) +vmovdqa %ymm5,128+32*\off(%rdi) +vmovdqa %ymm6,256+32*\off(%rdi) +vmovdqa %ymm7,384+32*\off(%rdi) +.endm + +.text +.global cdecl(PQCLEAN_DILITHIUM2_AVX2_invntt_avx) +.global _cdecl(PQCLEAN_DILITHIUM2_AVX2_invntt_avx) +cdecl(PQCLEAN_DILITHIUM2_AVX2_invntt_avx): +_cdecl(PQCLEAN_DILITHIUM2_AVX2_invntt_avx): +vmovdqa _8XQ*4(%rsi),%ymm0 + +levels0t5 0 +levels0t5 1 +levels0t5 2 +levels0t5 3 + +levels6t7 0 +levels6t7 1 +levels6t7 2 +levels6t7 3 + +ret diff --git a/crypto_sign/dilithium/dilithium2/avx2/ntt.S b/crypto_sign/dilithium/dilithium2/avx2/ntt.S new file mode 100644 index 00000000..5c0a80fc --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/ntt.S @@ -0,0 +1,199 @@ +#include "cdecl.h" +.include "shuffle.inc" + +.macro butterfly l,h,zl0=1,zl1=1,zh0=2,zh1=2 +vpmuldq %ymm\zl0,%ymm\h,%ymm13 +vmovshdup %ymm\h,%ymm12 +vpmuldq %ymm\zl1,%ymm12,%ymm14 + +vpmuldq %ymm\zh0,%ymm\h,%ymm\h +vpmuldq %ymm\zh1,%ymm12,%ymm12 + +vpmuldq %ymm0,%ymm13,%ymm13 +vpmuldq %ymm0,%ymm14,%ymm14 + +vmovshdup %ymm\h,%ymm\h +vpblendd $0xAA,%ymm12,%ymm\h,%ymm\h + +vpsubd %ymm\h,%ymm\l,%ymm12 +vpaddd %ymm\h,%ymm\l,%ymm\l + +vmovshdup %ymm13,%ymm13 +vpblendd $0xAA,%ymm14,%ymm13,%ymm13 + +vpaddd %ymm13,%ymm12,%ymm\h +vpsubd %ymm13,%ymm\l,%ymm\l +.endm + +.macro levels0t1 off +/* level 0 */ +vpbroadcastd (_ZETAS_QINV+1)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+1)*4(%rsi),%ymm2 + +vmovdqa 0+32*\off(%rdi),%ymm4 +vmovdqa 128+32*\off(%rdi),%ymm5 +vmovdqa 256+32*\off(%rdi),%ymm6 +vmovdqa 384+32*\off(%rdi),%ymm7 +vmovdqa 512+32*\off(%rdi),%ymm8 +vmovdqa 640+32*\off(%rdi),%ymm9 +vmovdqa 768+32*\off(%rdi),%ymm10 +vmovdqa 896+32*\off(%rdi),%ymm11 + +butterfly 4,8 +butterfly 5,9 +butterfly 6,10 +butterfly 7,11 + +/* level 1 */ +vpbroadcastd (_ZETAS_QINV+2)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+2)*4(%rsi),%ymm2 +butterfly 4,6 +butterfly 5,7 + +vpbroadcastd (_ZETAS_QINV+3)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+3)*4(%rsi),%ymm2 +butterfly 8,10 +butterfly 9,11 + +vmovdqa %ymm4, 0+32*\off(%rdi) +vmovdqa %ymm5,128+32*\off(%rdi) +vmovdqa %ymm6,256+32*\off(%rdi) +vmovdqa %ymm7,384+32*\off(%rdi) +vmovdqa %ymm8,512+32*\off(%rdi) +vmovdqa %ymm9,640+32*\off(%rdi) +vmovdqa %ymm10,768+32*\off(%rdi) +vmovdqa %ymm11,896+32*\off(%rdi) +.endm + +.macro levels2t7 off +/* level 2 */ +vmovdqa 256*\off+ 0(%rdi),%ymm4 +vmovdqa 256*\off+ 32(%rdi),%ymm5 +vmovdqa 256*\off+ 64(%rdi),%ymm6 +vmovdqa 256*\off+ 96(%rdi),%ymm7 +vmovdqa 256*\off+128(%rdi),%ymm8 +vmovdqa 256*\off+160(%rdi),%ymm9 +vmovdqa 256*\off+192(%rdi),%ymm10 +vmovdqa 256*\off+224(%rdi),%ymm11 + +vpbroadcastd (_ZETAS_QINV+4+\off)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+4+\off)*4(%rsi),%ymm2 + +butterfly 4,8 +butterfly 5,9 +butterfly 6,10 +butterfly 7,11 + +shuffle8 4,8,3,8 +shuffle8 5,9,4,9 +shuffle8 6,10,5,10 +shuffle8 7,11,6,11 + +/* level 3 */ +vmovdqa (_ZETAS_QINV+8+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+8+8*\off)*4(%rsi),%ymm2 + +butterfly 3,5 +butterfly 8,10 +butterfly 4,6 +butterfly 9,11 + +shuffle4 3,5,7,5 +shuffle4 8,10,3,10 +shuffle4 4,6,8,6 +shuffle4 9,11,4,11 + +/* level 4 */ +vmovdqa (_ZETAS_QINV+40+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+40+8*\off)*4(%rsi),%ymm2 + +butterfly 7,8 +butterfly 5,6 +butterfly 3,4 +butterfly 10,11 + +shuffle2 7,8,9,8 +shuffle2 5,6,7,6 +shuffle2 3,4,5,4 +shuffle2 10,11,3,11 + +/* level 5 */ +vmovdqa (_ZETAS_QINV+72+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+72+8*\off)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 + +butterfly 9,5,1,10,2,15 +butterfly 8,4,1,10,2,15 +butterfly 7,3,1,10,2,15 +butterfly 6,11,1,10,2,15 + +/* level 6 */ +vmovdqa (_ZETAS_QINV+104+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+104+8*\off)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 9,7,1,10,2,15 +butterfly 8,6,1,10,2,15 + +vmovdqa (_ZETAS_QINV+104+8*\off+32)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+104+8*\off+32)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 5,3,1,10,2,15 +butterfly 4,11,1,10,2,15 + +/* level 7 */ +vmovdqa (_ZETAS_QINV+168+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+168+8*\off)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 9,8,1,10,2,15 + +vmovdqa (_ZETAS_QINV+168+8*\off+32)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+168+8*\off+32)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 7,6,1,10,2,15 + +vmovdqa (_ZETAS_QINV+168+8*\off+64)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+168+8*\off+64)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 5,4,1,10,2,15 + +vmovdqa (_ZETAS_QINV+168+8*\off+96)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+168+8*\off+96)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 3,11,1,10,2,15 + +vmovdqa %ymm9,256*\off+ 0(%rdi) +vmovdqa %ymm8,256*\off+ 32(%rdi) +vmovdqa %ymm7,256*\off+ 64(%rdi) +vmovdqa %ymm6,256*\off+ 96(%rdi) +vmovdqa %ymm5,256*\off+128(%rdi) +vmovdqa %ymm4,256*\off+160(%rdi) +vmovdqa %ymm3,256*\off+192(%rdi) +vmovdqa %ymm11,256*\off+224(%rdi) +.endm + +.text +.global cdecl(PQCLEAN_DILITHIUM2_AVX2_ntt_avx) +.global _cdecl(PQCLEAN_DILITHIUM2_AVX2_ntt_avx) +cdecl(PQCLEAN_DILITHIUM2_AVX2_ntt_avx): +_cdecl(PQCLEAN_DILITHIUM2_AVX2_ntt_avx): +vmovdqa _8XQ*4(%rsi),%ymm0 + +levels0t1 0 +levels0t1 1 +levels0t1 2 +levels0t1 3 + +levels2t7 0 +levels2t7 1 +levels2t7 2 +levels2t7 3 + +ret + diff --git a/crypto_sign/dilithium/dilithium2/avx2/ntt.h b/crypto_sign/dilithium/dilithium2/avx2/ntt.h new file mode 100644 index 00000000..fd62445f --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/ntt.h @@ -0,0 +1,14 @@ +#ifndef PQCLEAN_DILITHIUM2_AVX2_NTT_H +#define PQCLEAN_DILITHIUM2_AVX2_NTT_H + +#include + +void PQCLEAN_DILITHIUM2_AVX2_ntt_avx(__m256i *a, const __m256i *PQCLEAN_DILITHIUM2_AVX2_qdata); +void PQCLEAN_DILITHIUM2_AVX2_invntt_avx(__m256i *a, const __m256i *PQCLEAN_DILITHIUM2_AVX2_qdata); + +void PQCLEAN_DILITHIUM2_AVX2_nttunpack_avx(__m256i *a); + +void PQCLEAN_DILITHIUM2_AVX2_pointwise_avx(__m256i *c, const __m256i *a, const __m256i *b, const __m256i *PQCLEAN_DILITHIUM2_AVX2_qdata); +void PQCLEAN_DILITHIUM2_AVX2_pointwise_acc_avx(__m256i *c, const __m256i *a, const __m256i *b, const __m256i *PQCLEAN_DILITHIUM2_AVX2_qdata); + +#endif diff --git a/crypto_sign/dilithium/dilithium2/avx2/packing.c b/crypto_sign/dilithium/dilithium2/avx2/packing.c new file mode 100644 index 00000000..d6fafe43 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/packing.c @@ -0,0 +1,261 @@ +#include "packing.h" +#include "params.h" +#include "poly.h" +#include "polyvec.h" + + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_pack_pk +* +* Description: Bit-pack public key pk = (rho, t1). +* +* Arguments: - uint8_t pk[]: output byte array +* - const uint8_t rho[]: byte array containing rho +* - const polyveck *t1: pointer to vector t1 +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_pack_pk(uint8_t pk[PQCLEAN_DILITHIUM2_AVX2_CRYPTO_PUBLICKEYBYTES], + const uint8_t rho[SEEDBYTES], + const polyveck *t1) { + unsigned int i; + + for (i = 0; i < SEEDBYTES; ++i) { + pk[i] = rho[i]; + } + pk += SEEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_AVX2_polyt1_pack(pk + i * POLYT1_PACKEDBYTES, &t1->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_unpack_pk +* +* Description: Unpack public key pk = (rho, t1). +* +* Arguments: - const uint8_t rho[]: output byte array for rho +* - const polyveck *t1: pointer to output vector t1 +* - uint8_t pk[]: byte array containing bit-packed pk +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_unpack_pk(uint8_t rho[SEEDBYTES], + polyveck *t1, + const uint8_t pk[PQCLEAN_DILITHIUM2_AVX2_CRYPTO_PUBLICKEYBYTES]) { + unsigned int i; + + for (i = 0; i < SEEDBYTES; ++i) { + rho[i] = pk[i]; + } + pk += SEEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_AVX2_polyt1_unpack(&t1->vec[i], pk + i * POLYT1_PACKEDBYTES); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_pack_sk +* +* Description: Bit-pack secret key sk = (rho, tr, key, t0, s1, s2). +* +* Arguments: - uint8_t sk[]: output byte array +* - const uint8_t rho[]: byte array containing rho +* - const uint8_t tr[]: byte array containing tr +* - const uint8_t key[]: byte array containing key +* - const polyveck *t0: pointer to vector t0 +* - const polyvecl *s1: pointer to vector s1 +* - const polyveck *s2: pointer to vector s2 +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_pack_sk(uint8_t sk[PQCLEAN_DILITHIUM2_AVX2_CRYPTO_SECRETKEYBYTES], + const uint8_t rho[SEEDBYTES], + const uint8_t tr[CRHBYTES], + const uint8_t key[SEEDBYTES], + const polyveck *t0, + const polyvecl *s1, + const polyveck *s2) { + unsigned int i; + + for (i = 0; i < SEEDBYTES; ++i) { + sk[i] = rho[i]; + } + sk += SEEDBYTES; + + for (i = 0; i < SEEDBYTES; ++i) { + sk[i] = key[i]; + } + sk += SEEDBYTES; + + for (i = 0; i < CRHBYTES; ++i) { + sk[i] = tr[i]; + } + sk += CRHBYTES; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM2_AVX2_polyeta_pack(sk + i * POLYETA_PACKEDBYTES, &s1->vec[i]); + } + sk += L * POLYETA_PACKEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_AVX2_polyeta_pack(sk + i * POLYETA_PACKEDBYTES, &s2->vec[i]); + } + sk += K * POLYETA_PACKEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_AVX2_polyt0_pack(sk + i * POLYT0_PACKEDBYTES, &t0->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_unpack_sk +* +* Description: Unpack secret key sk = (rho, tr, key, t0, s1, s2). +* +* Arguments: - const uint8_t rho[]: output byte array for rho +* - const uint8_t tr[]: output byte array for tr +* - const uint8_t key[]: output byte array for key +* - const polyveck *t0: pointer to output vector t0 +* - const polyvecl *s1: pointer to output vector s1 +* - const polyveck *s2: pointer to output vector s2 +* - uint8_t sk[]: byte array containing bit-packed sk +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_unpack_sk(uint8_t rho[SEEDBYTES], + uint8_t tr[CRHBYTES], + uint8_t key[SEEDBYTES], + polyveck *t0, + polyvecl *s1, + polyveck *s2, + const uint8_t sk[PQCLEAN_DILITHIUM2_AVX2_CRYPTO_SECRETKEYBYTES]) { + unsigned int i; + + for (i = 0; i < SEEDBYTES; ++i) { + rho[i] = sk[i]; + } + sk += SEEDBYTES; + + for (i = 0; i < SEEDBYTES; ++i) { + key[i] = sk[i]; + } + sk += SEEDBYTES; + + for (i = 0; i < CRHBYTES; ++i) { + tr[i] = sk[i]; + } + sk += CRHBYTES; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM2_AVX2_polyeta_unpack(&s1->vec[i], sk + i * POLYETA_PACKEDBYTES); + } + sk += L * POLYETA_PACKEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_AVX2_polyeta_unpack(&s2->vec[i], sk + i * POLYETA_PACKEDBYTES); + } + sk += K * POLYETA_PACKEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_AVX2_polyt0_unpack(&t0->vec[i], sk + i * POLYT0_PACKEDBYTES); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_pack_sig +* +* Description: Bit-pack signature sig = (c, z, h). +* +* Arguments: - uint8_t sig[]: output byte array +* - const uint8_t *c: pointer to PQCLEAN_DILITHIUM2_AVX2_challenge hash length SEEDBYTES +* - const polyvecl *z: pointer to vector z +* - const polyveck *h: pointer to hint vector h +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_pack_sig(uint8_t sig[PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES], + const uint8_t c[SEEDBYTES], + const polyvecl *z, + const polyveck *h) { + unsigned int i, j, k; + + for (i = 0; i < SEEDBYTES; ++i) { + sig[i] = c[i]; + } + sig += SEEDBYTES; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM2_AVX2_polyz_pack(sig + i * POLYZ_PACKEDBYTES, &z->vec[i]); + } + sig += L * POLYZ_PACKEDBYTES; + + /* Encode h */ + for (i = 0; i < OMEGA + K; ++i) { + sig[i] = 0; + } + + k = 0; + for (i = 0; i < K; ++i) { + for (j = 0; j < N; ++j) { + if (h->vec[i].coeffs[j] != 0) { + sig[k++] = (uint8_t) j; + } + } + + sig[OMEGA + i] = (uint8_t) k; + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_unpack_sig +* +* Description: Unpack signature sig = (c, z, h). +* +* Arguments: - uint8_t *c: pointer to output PQCLEAN_DILITHIUM2_AVX2_challenge hash +* - polyvecl *z: pointer to output vector z +* - polyveck *h: pointer to output hint vector h +* - const uint8_t sig[]: byte array containing +* bit-packed signature +* +* Returns 1 in case of malformed signature; otherwise 0. +**************************************************/ +int PQCLEAN_DILITHIUM2_AVX2_unpack_sig(uint8_t c[SEEDBYTES], + polyvecl *z, + polyveck *h, + const uint8_t sig[PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES]) { + unsigned int i, j, k; + + for (i = 0; i < SEEDBYTES; ++i) { + c[i] = sig[i]; + } + sig += SEEDBYTES; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM2_AVX2_polyz_unpack(&z->vec[i], sig + i * POLYZ_PACKEDBYTES); + } + sig += L * POLYZ_PACKEDBYTES; + + /* Decode h */ + k = 0; + for (i = 0; i < K; ++i) { + for (j = 0; j < N; ++j) { + h->vec[i].coeffs[j] = 0; + } + + if (sig[OMEGA + i] < k || sig[OMEGA + i] > OMEGA) { + return 1; + } + + for (j = k; j < sig[OMEGA + i]; ++j) { + /* Coefficients are ordered for strong unforgeability */ + if (j > k && sig[j] <= sig[j - 1]) { + return 1; + } + h->vec[i].coeffs[sig[j]] = 1; + } + + k = sig[OMEGA + i]; + } + + /* Extra indices are zero for strong unforgeability */ + for (j = k; j < OMEGA; ++j) { + if (sig[j]) { + return 1; + } + } + + return 0; +} diff --git a/crypto_sign/dilithium/dilithium2/avx2/packing.h b/crypto_sign/dilithium/dilithium2/avx2/packing.h new file mode 100644 index 00000000..12400f09 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/packing.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_DILITHIUM2_AVX2_PACKING_H +#define PQCLEAN_DILITHIUM2_AVX2_PACKING_H +#include "params.h" +#include "polyvec.h" +#include + +void PQCLEAN_DILITHIUM2_AVX2_pack_pk(uint8_t pk[PQCLEAN_DILITHIUM2_AVX2_CRYPTO_PUBLICKEYBYTES], const uint8_t rho[SEEDBYTES], const polyveck *t1); + +void PQCLEAN_DILITHIUM2_AVX2_pack_sk(uint8_t sk[PQCLEAN_DILITHIUM2_AVX2_CRYPTO_SECRETKEYBYTES], + const uint8_t rho[SEEDBYTES], + const uint8_t tr[CRHBYTES], + const uint8_t key[SEEDBYTES], + const polyveck *t0, + const polyvecl *s1, + const polyveck *s2); + +void PQCLEAN_DILITHIUM2_AVX2_pack_sig(uint8_t sig[PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES], const uint8_t c[SEEDBYTES], const polyvecl *z, const polyveck *h); + +void PQCLEAN_DILITHIUM2_AVX2_unpack_pk(uint8_t rho[SEEDBYTES], polyveck *t1, const uint8_t pk[PQCLEAN_DILITHIUM2_AVX2_CRYPTO_PUBLICKEYBYTES]); + +void PQCLEAN_DILITHIUM2_AVX2_unpack_sk(uint8_t rho[SEEDBYTES], + uint8_t tr[CRHBYTES], + uint8_t key[SEEDBYTES], + polyveck *t0, + polyvecl *s1, + polyveck *s2, + const uint8_t sk[PQCLEAN_DILITHIUM2_AVX2_CRYPTO_SECRETKEYBYTES]); + +int PQCLEAN_DILITHIUM2_AVX2_unpack_sig(uint8_t c[SEEDBYTES], polyvecl *z, polyveck *h, const uint8_t sig[PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES]); + +#endif diff --git a/crypto_sign/dilithium/dilithium2/avx2/params.h b/crypto_sign/dilithium/dilithium2/avx2/params.h new file mode 100644 index 00000000..6556cc70 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/params.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_DILITHIUM2_AVX2_PARAMS_H +#define PQCLEAN_DILITHIUM2_AVX2_PARAMS_H + + + +#define SEEDBYTES 32 +#define CRHBYTES 48 +#define N 256 +#define Q 8380417 +#define D 13 +#define ROOT_OF_UNITY 1753 + +#define K 4 +#define L 4 +#define ETA 2 +#define TAU 39 +#define BETA 78 +#define GAMMA1 (1 << 17) +#define GAMMA2 ((Q-1)/88) +#define OMEGA 80 +#define PQCLEAN_DILITHIUM2_AVX2_CRYPTO_ALGNAME "Dilithium2" + + +#define POLYT1_PACKEDBYTES 320 +#define POLYT0_PACKEDBYTES 416 +#define POLYVECH_PACKEDBYTES (OMEGA + K) + +#define POLYZ_PACKEDBYTES 576 + +#define POLYW1_PACKEDBYTES 192 + +#define POLYETA_PACKEDBYTES 96 + +#define PQCLEAN_DILITHIUM2_AVX2_CRYPTO_PUBLICKEYBYTES (SEEDBYTES + K*POLYT1_PACKEDBYTES) +#define PQCLEAN_DILITHIUM2_AVX2_CRYPTO_SECRETKEYBYTES (2*SEEDBYTES + CRHBYTES \ + + L*POLYETA_PACKEDBYTES \ + + K*POLYETA_PACKEDBYTES \ + + K*POLYT0_PACKEDBYTES) +#define PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES (SEEDBYTES + L*POLYZ_PACKEDBYTES + POLYVECH_PACKEDBYTES) + +#endif diff --git a/crypto_sign/dilithium/dilithium2/avx2/pointwise.S b/crypto_sign/dilithium/dilithium2/avx2/pointwise.S new file mode 100644 index 00000000..1c8c8122 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/pointwise.S @@ -0,0 +1,199 @@ +#include "params.h" +#include "cdecl.h" + +.text +.global cdecl(PQCLEAN_DILITHIUM2_AVX2_pointwise_avx) +.global _cdecl(PQCLEAN_DILITHIUM2_AVX2_pointwise_avx) +cdecl(PQCLEAN_DILITHIUM2_AVX2_pointwise_avx): +_cdecl(PQCLEAN_DILITHIUM2_AVX2_pointwise_avx): +#consts +vmovdqa _8XQINV*4(%rcx),%ymm0 +vmovdqa _8XQ*4(%rcx),%ymm1 + +xor %eax,%eax +_looptop1: +#load +vmovdqa (%rsi),%ymm2 +vmovdqa 32(%rsi),%ymm4 +vmovdqa 64(%rsi),%ymm6 +vmovdqa (%rdx),%ymm10 +vmovdqa 32(%rdx),%ymm12 +vmovdqa 64(%rdx),%ymm14 +vpsrlq $32,%ymm2,%ymm3 +vpsrlq $32,%ymm4,%ymm5 +vmovshdup %ymm6,%ymm7 +vpsrlq $32,%ymm10,%ymm11 +vpsrlq $32,%ymm12,%ymm13 +vmovshdup %ymm14,%ymm15 + +#mul +vpmuldq %ymm2,%ymm10,%ymm2 +vpmuldq %ymm3,%ymm11,%ymm3 +vpmuldq %ymm4,%ymm12,%ymm4 +vpmuldq %ymm5,%ymm13,%ymm5 +vpmuldq %ymm6,%ymm14,%ymm6 +vpmuldq %ymm7,%ymm15,%ymm7 + +#reduce +vpmuldq %ymm0,%ymm2,%ymm10 +vpmuldq %ymm0,%ymm3,%ymm11 +vpmuldq %ymm0,%ymm4,%ymm12 +vpmuldq %ymm0,%ymm5,%ymm13 +vpmuldq %ymm0,%ymm6,%ymm14 +vpmuldq %ymm0,%ymm7,%ymm15 +vpmuldq %ymm1,%ymm10,%ymm10 +vpmuldq %ymm1,%ymm11,%ymm11 +vpmuldq %ymm1,%ymm12,%ymm12 +vpmuldq %ymm1,%ymm13,%ymm13 +vpmuldq %ymm1,%ymm14,%ymm14 +vpmuldq %ymm1,%ymm15,%ymm15 +vpsubq %ymm10,%ymm2,%ymm2 +vpsubq %ymm11,%ymm3,%ymm3 +vpsubq %ymm12,%ymm4,%ymm4 +vpsubq %ymm13,%ymm5,%ymm5 +vpsubq %ymm14,%ymm6,%ymm6 +vpsubq %ymm15,%ymm7,%ymm7 +vpsrlq $32,%ymm2,%ymm2 +vpsrlq $32,%ymm4,%ymm4 +vmovshdup %ymm6,%ymm6 + +#store +vpblendd $0xAA,%ymm3,%ymm2,%ymm2 +vpblendd $0xAA,%ymm5,%ymm4,%ymm4 +vpblendd $0xAA,%ymm7,%ymm6,%ymm6 +vmovdqa %ymm2,(%rdi) +vmovdqa %ymm4,32(%rdi) +vmovdqa %ymm6,64(%rdi) + +add $96,%rdi +add $96,%rsi +add $96,%rdx +add $1,%eax +cmp $10,%eax +jb _looptop1 + +vmovdqa (%rsi),%ymm2 +vmovdqa 32(%rsi),%ymm4 +vmovdqa (%rdx),%ymm10 +vmovdqa 32(%rdx),%ymm12 +vpsrlq $32,%ymm2,%ymm3 +vpsrlq $32,%ymm4,%ymm5 +vmovshdup %ymm10,%ymm11 +vmovshdup %ymm12,%ymm13 + +#mul +vpmuldq %ymm2,%ymm10,%ymm2 +vpmuldq %ymm3,%ymm11,%ymm3 +vpmuldq %ymm4,%ymm12,%ymm4 +vpmuldq %ymm5,%ymm13,%ymm5 + +#reduce +vpmuldq %ymm0,%ymm2,%ymm10 +vpmuldq %ymm0,%ymm3,%ymm11 +vpmuldq %ymm0,%ymm4,%ymm12 +vpmuldq %ymm0,%ymm5,%ymm13 +vpmuldq %ymm1,%ymm10,%ymm10 +vpmuldq %ymm1,%ymm11,%ymm11 +vpmuldq %ymm1,%ymm12,%ymm12 +vpmuldq %ymm1,%ymm13,%ymm13 +vpsubq %ymm10,%ymm2,%ymm2 +vpsubq %ymm11,%ymm3,%ymm3 +vpsubq %ymm12,%ymm4,%ymm4 +vpsubq %ymm13,%ymm5,%ymm5 +vpsrlq $32,%ymm2,%ymm2 +vmovshdup %ymm4,%ymm4 + +#store +vpblendd $0x55,%ymm2,%ymm3,%ymm2 +vpblendd $0x55,%ymm4,%ymm5,%ymm4 +vmovdqa %ymm2,(%rdi) +vmovdqa %ymm4,32(%rdi) + +ret + +.macro pointwise off +#load +vmovdqa \off(%rsi),%ymm6 +vmovdqa \off+32(%rsi),%ymm8 +vmovdqa \off(%rdx),%ymm10 +vmovdqa \off+32(%rdx),%ymm12 +vpsrlq $32,%ymm6,%ymm7 +vpsrlq $32,%ymm8,%ymm9 +vmovshdup %ymm10,%ymm11 +vmovshdup %ymm12,%ymm13 + +#mul +vpmuldq %ymm6,%ymm10,%ymm6 +vpmuldq %ymm7,%ymm11,%ymm7 +vpmuldq %ymm8,%ymm12,%ymm8 +vpmuldq %ymm9,%ymm13,%ymm9 +.endm + +.macro acc +vpaddq %ymm6,%ymm2,%ymm2 +vpaddq %ymm7,%ymm3,%ymm3 +vpaddq %ymm8,%ymm4,%ymm4 +vpaddq %ymm9,%ymm5,%ymm5 +.endm + +.global cdecl(PQCLEAN_DILITHIUM2_AVX2_pointwise_acc_avx) +.global _cdecl(PQCLEAN_DILITHIUM2_AVX2_pointwise_acc_avx) +cdecl(PQCLEAN_DILITHIUM2_AVX2_pointwise_acc_avx): +_cdecl(PQCLEAN_DILITHIUM2_AVX2_pointwise_acc_avx): +#consts +vmovdqa _8XQINV*4(%rcx),%ymm0 +vmovdqa _8XQ*4(%rcx),%ymm1 + +xor %eax,%eax +_looptop2: +pointwise 0 + +#mov +vmovdqa %ymm6,%ymm2 +vmovdqa %ymm7,%ymm3 +vmovdqa %ymm8,%ymm4 +vmovdqa %ymm9,%ymm5 + +pointwise 1024 +acc + +pointwise 2048 +acc + +pointwise 3072 +acc + + + + +#reduce +vpmuldq %ymm0,%ymm2,%ymm6 +vpmuldq %ymm0,%ymm3,%ymm7 +vpmuldq %ymm0,%ymm4,%ymm8 +vpmuldq %ymm0,%ymm5,%ymm9 +vpmuldq %ymm1,%ymm6,%ymm6 +vpmuldq %ymm1,%ymm7,%ymm7 +vpmuldq %ymm1,%ymm8,%ymm8 +vpmuldq %ymm1,%ymm9,%ymm9 +vpsubq %ymm6,%ymm2,%ymm2 +vpsubq %ymm7,%ymm3,%ymm3 +vpsubq %ymm8,%ymm4,%ymm4 +vpsubq %ymm9,%ymm5,%ymm5 +vpsrlq $32,%ymm2,%ymm2 +vmovshdup %ymm4,%ymm4 + +#store +vpblendd $0xAA,%ymm3,%ymm2,%ymm2 +vpblendd $0xAA,%ymm5,%ymm4,%ymm4 + +vmovdqa %ymm2,(%rdi) +vmovdqa %ymm4,32(%rdi) + +add $64,%rsi +add $64,%rdx +add $64,%rdi +add $1,%eax +cmp $16,%eax +jb _looptop2 + +ret diff --git a/crypto_sign/dilithium/dilithium2/avx2/poly.c b/crypto_sign/dilithium/dilithium2/avx2/poly.c new file mode 100644 index 00000000..113e5fca --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/poly.c @@ -0,0 +1,1027 @@ +#include "align.h" +#include "consts.h" +#include "fips202x4.h" +#include "ntt.h" +#include "params.h" +#include "poly.h" +#include "rejsample.h" +#include "rounding.h" +#include "symmetric.h" +#include +#include +#include + +#define DBENCH_START() +#define DBENCH_STOP(t) + +#define _mm256_blendv_epi32(a,b,mask) \ + _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(a), \ + _mm256_castsi256_ps(b), \ + _mm256_castsi256_ps(mask))) + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_poly_reduce +* +* Description: Inplace reduction of all coefficients of polynomial to +* representative in [-6283009,6283007]. Assumes input +* coefficients to be at most 2^31 - 2^22 - 1 in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_poly_reduce(poly *a) { + unsigned int i; + __m256i f, g; + const __m256i q = _mm256_load_si256(&PQCLEAN_DILITHIUM2_AVX2_qdata.vec[_8XQ / 8]); + const __m256i off = _mm256_set1_epi32(1 << 22); + DBENCH_START(); + + for (i = 0; i < N / 8; i++) { + f = _mm256_load_si256(&a->vec[i]); + g = _mm256_add_epi32(f, off); + g = _mm256_srai_epi32(g, 23); + g = _mm256_mullo_epi32(g, q); + f = _mm256_sub_epi32(f, g); + _mm256_store_si256(&a->vec[i], f); + } + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: poly_addq +* +* Description: For all coefficients of in/out polynomial add Q if +* coefficient is negative. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_poly_caddq(poly *a) { + unsigned int i; + __m256i f, g; + const __m256i q = _mm256_load_si256(&PQCLEAN_DILITHIUM2_AVX2_qdata.vec[_8XQ / 8]); + const __m256i zero = _mm256_setzero_si256(); + DBENCH_START(); + + for (i = 0; i < N / 8; i++) { + f = _mm256_load_si256(&a->vec[i]); + g = _mm256_blendv_epi32(zero, q, f); + f = _mm256_add_epi32(f, g); + _mm256_store_si256(&a->vec[i], f); + } + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_poly_freeze +* +* Description: Inplace reduction of all coefficients of polynomial to +* positive standard representatives. Assumes input +* coefficients to be at most 2^31 - 2^22 + 1 in +* absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_poly_freeze(poly *a) { + DBENCH_START(); + + PQCLEAN_DILITHIUM2_AVX2_poly_reduce(a); + PQCLEAN_DILITHIUM2_AVX2_poly_caddq(a); + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_poly_add +* +* Description: Add polynomials. No modular reduction is performed. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first summand +* - const poly *b: pointer to second summand +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_poly_add(poly *c, const poly *a, const poly *b) { + unsigned int i; + __m256i f, g; + DBENCH_START(); + + for (i = 0; i < N / 8; i++) { + f = _mm256_load_si256(&a->vec[i]); + g = _mm256_load_si256(&b->vec[i]); + f = _mm256_add_epi32(f, g); + _mm256_store_si256(&c->vec[i], f); + } + + DBENCH_STOP(*tadd); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_poly_sub +* +* Description: Subtract polynomials. No modular reduction is +* performed. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial to be +* subtraced from first input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_poly_sub(poly *c, const poly *a, const poly *b) { + unsigned int i; + __m256i f, g; + DBENCH_START(); + + for (i = 0; i < N / 8; i++) { + f = _mm256_load_si256(&a->vec[i]); + g = _mm256_load_si256(&b->vec[i]); + f = _mm256_sub_epi32(f, g); + _mm256_store_si256(&c->vec[i], f); + } + + DBENCH_STOP(*tadd); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_poly_shiftl +* +* Description: Multiply polynomial by 2^D without modular reduction. Assumes +* input coefficients to be less than 2^{31-D} in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_poly_shiftl(poly *a) { + unsigned int i; + __m256i f; + DBENCH_START(); + + for (i = 0; i < N / 8; i++) { + f = _mm256_load_si256(&a->vec[i]); + f = _mm256_slli_epi32(f, D); + _mm256_store_si256(&a->vec[i], f); + } + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_poly_ntt +* +* Description: Inplace forward NTT. Coefficients can grow by up to +* 8*Q in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_poly_ntt(poly *a) { + DBENCH_START(); + + PQCLEAN_DILITHIUM2_AVX2_ntt_avx(a->vec, PQCLEAN_DILITHIUM2_AVX2_qdata.vec); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_poly_invntt_tomont +* +* Description: Inplace inverse NTT and multiplication by 2^{32}. +* Input coefficients need to be less than Q in absolute +* value and output coefficients are again bounded by Q. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_poly_invntt_tomont(poly *a) { + DBENCH_START(); + + PQCLEAN_DILITHIUM2_AVX2_invntt_avx(a->vec, PQCLEAN_DILITHIUM2_AVX2_qdata.vec); + + DBENCH_STOP(*tmul); +} + +void PQCLEAN_DILITHIUM2_AVX2_poly_nttunpack(poly *a) { + DBENCH_START(); + + PQCLEAN_DILITHIUM2_AVX2_nttunpack_avx(a->vec); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_poly_pointwise_montgomery +* +* Description: Pointwise multiplication of polynomials in NTT domain +* representation and multiplication of resulting polynomial +* by 2^{-32}. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_poly_pointwise_montgomery(poly *c, const poly *a, const poly *b) { + DBENCH_START(); + + PQCLEAN_DILITHIUM2_AVX2_pointwise_avx(c->vec, a->vec, b->vec, PQCLEAN_DILITHIUM2_AVX2_qdata.vec); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_poly_power2round +* +* Description: For all coefficients c of the input polynomial, +* compute c0, c1 such that c mod^+ Q = c1*2^D + c0 +* with -2^{D-1} < c0 <= 2^{D-1}. Assumes coefficients to be +* positive standard representatives. +* +* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 +* - poly *a0: pointer to output polynomial with coefficients c0 +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_poly_power2round(poly *a1, poly *a0, const poly *a) { + DBENCH_START(); + + PQCLEAN_DILITHIUM2_AVX2_power2round_avx(a1->vec, a0->vec, a->vec); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_poly_decompose +* +* Description: For all coefficients c of the input polynomial, +* compute high and low bits c0, c1 such c mod^+ Q = c1*ALPHA + c0 +* with -ALPHA/2 < c0 <= ALPHA/2 except if c1 = (Q-1)/ALPHA where we +* set c1 = 0 and -ALPHA/2 <= c0 = c mod Q - Q < 0. +* Assumes coefficients to be positive standard representatives. +* +* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 +* - poly *a0: pointer to output polynomial with coefficients c0 +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_poly_decompose(poly *a1, poly *a0, const poly *a) { + DBENCH_START(); + + PQCLEAN_DILITHIUM2_AVX2_decompose_avx(a1->vec, a0->vec, a->vec); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_poly_make_hint +* +* Description: Compute hint array. The coefficients of which are the +* indices of the coefficients of the input polynomial +* whose low bits overflow into the high bits. +* +* Arguments: - uint8_t *h: pointer to output hint array (preallocated of length N) +* - const poly *a0: pointer to low part of input polynomial +* - const poly *a1: pointer to high part of input polynomial +* +* Returns number of hints, i.e. length of hint array. +**************************************************/ +unsigned int PQCLEAN_DILITHIUM2_AVX2_poly_make_hint(uint8_t hint[N], const poly *a0, const poly *a1) { + unsigned int r; + DBENCH_START(); + + r = PQCLEAN_DILITHIUM2_AVX2_make_hint_avx(hint, a0->vec, a1->vec); + + DBENCH_STOP(*tround); + return r; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_poly_use_hint +* +* Description: Use hint polynomial to correct the high bits of a polynomial. +* +* Arguments: - poly *b: pointer to output polynomial with corrected high bits +* - const poly *a: pointer to input polynomial +* - const poly *h: pointer to input hint polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_poly_use_hint(poly *b, const poly *a, const poly *h) { + DBENCH_START(); + + PQCLEAN_DILITHIUM2_AVX2_use_hint_avx(b->vec, a->vec, h->vec); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_poly_chknorm +* +* Description: Check infinity norm of polynomial against given bound. +* Assumes input polynomial to be reduced by PQCLEAN_DILITHIUM2_AVX2_poly_reduce(). +* +* Arguments: - const poly *a: pointer to polynomial +* - int32_t B: norm bound +* +* Returns 0 if norm is strictly smaller than B <= (Q-1)/8 and 1 otherwise. +**************************************************/ +int PQCLEAN_DILITHIUM2_AVX2_poly_chknorm(const poly *a, int32_t B) { + unsigned int i; + int r; + __m256i f, t; + const __m256i bound = _mm256_set1_epi32(B - 1); + DBENCH_START(); + + if (B > (Q - 1) / 8) { + return 1; + } + + t = _mm256_setzero_si256(); + for (i = 0; i < N / 8; i++) { + f = _mm256_load_si256(&a->vec[i]); + f = _mm256_abs_epi32(f); + f = _mm256_cmpgt_epi32(f, bound); + t = _mm256_or_si256(t, f); + } + + r = 1 - _mm256_testz_si256(t, t); + DBENCH_STOP(*tsample); + return r; +} + +/************************************************* +* Name: rej_uniform +* +* Description: Sample uniformly random coefficients in [0, Q-1] by +* performing rejection sampling on array of random bytes. +* +* Arguments: - int32_t *a: pointer to output array (allocated) +* - unsigned int len: number of coefficients to be sampled +* - const uint8_t *buf: array of random bytes +* - unsigned int buflen: length of array of random bytes +* +* Returns number of sampled coefficients. Can be smaller than len if not enough +* random bytes were given. +**************************************************/ +static unsigned int rej_uniform(int32_t *a, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) { + unsigned int ctr, pos; + uint32_t t; + DBENCH_START(); + + ctr = pos = 0; + while (ctr < len && pos + 3 <= buflen) { + t = buf[pos++]; + t |= (uint32_t)buf[pos++] << 8; + t |= (uint32_t)buf[pos++] << 16; + t &= 0x7FFFFF; + + if (t < Q) { + a[ctr++] = t; + } + } + + DBENCH_STOP(*tsample); + return ctr; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_poly_uniform +* +* Description: Sample polynomial with uniformly random coefficients +* in [0,Q-1] by performing rejection sampling on the +* output stream of SHAKE256(seed|nonce) or AES256CTR(seed,nonce). +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length SEEDBYTES +* - uint16_t nonce: 2-byte nonce +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_poly_uniform_preinit(poly *a, stream128_state *state) { + unsigned int ctr; + /* PQCLEAN_DILITHIUM2_AVX2_rej_uniform_avx reads up to 8 additional bytes */ + ALIGNED_UINT8(REJ_UNIFORM_BUFLEN + 8) buf; + + stream128_squeezeblocks(buf.coeffs, REJ_UNIFORM_NBLOCKS, state); + ctr = PQCLEAN_DILITHIUM2_AVX2_rej_uniform_avx(a->coeffs, buf.coeffs); + + while (ctr < N) { + /* length of buf is always divisible by 3; hence, no bytes left */ + stream128_squeezeblocks(buf.coeffs, 1, state); + ctr += rej_uniform(a->coeffs + ctr, N - ctr, buf.coeffs, STREAM128_BLOCKBYTES); + } +} + +void PQCLEAN_DILITHIUM2_AVX2_poly_uniform(poly *a, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + stream128_state state; + stream128_init(&state, seed, nonce); + PQCLEAN_DILITHIUM2_AVX2_poly_uniform_preinit(a, &state); + stream128_release(&state); +} + +void PQCLEAN_DILITHIUM2_AVX2_poly_uniform_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[32], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3) { + unsigned int ctr0, ctr1, ctr2, ctr3; + ALIGNED_UINT8(REJ_UNIFORM_BUFLEN + 8) buf[4]; + keccakx4_state state; + __m256i f; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + + buf[0].coeffs[SEEDBYTES + 0] = nonce0; + buf[0].coeffs[SEEDBYTES + 1] = nonce0 >> 8; + buf[1].coeffs[SEEDBYTES + 0] = nonce1; + buf[1].coeffs[SEEDBYTES + 1] = nonce1 >> 8; + buf[2].coeffs[SEEDBYTES + 0] = nonce2; + buf[2].coeffs[SEEDBYTES + 1] = nonce2 >> 8; + buf[3].coeffs[SEEDBYTES + 0] = nonce3; + buf[3].coeffs[SEEDBYTES + 1] = nonce3 >> 8; + + PQCLEAN_DILITHIUM2_AVX2_shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, SEEDBYTES + 2); + PQCLEAN_DILITHIUM2_AVX2_shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_NBLOCKS, &state); + + ctr0 = PQCLEAN_DILITHIUM2_AVX2_rej_uniform_avx(a0->coeffs, buf[0].coeffs); + ctr1 = PQCLEAN_DILITHIUM2_AVX2_rej_uniform_avx(a1->coeffs, buf[1].coeffs); + ctr2 = PQCLEAN_DILITHIUM2_AVX2_rej_uniform_avx(a2->coeffs, buf[2].coeffs); + ctr3 = PQCLEAN_DILITHIUM2_AVX2_rej_uniform_avx(a3->coeffs, buf[3].coeffs); + + while (ctr0 < N || ctr1 < N || ctr2 < N || ctr3 < N) { + PQCLEAN_DILITHIUM2_AVX2_shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); + + ctr0 += rej_uniform(a0->coeffs + ctr0, N - ctr0, buf[0].coeffs, SHAKE128_RATE); + ctr1 += rej_uniform(a1->coeffs + ctr1, N - ctr1, buf[1].coeffs, SHAKE128_RATE); + ctr2 += rej_uniform(a2->coeffs + ctr2, N - ctr2, buf[2].coeffs, SHAKE128_RATE); + ctr3 += rej_uniform(a3->coeffs + ctr3, N - ctr3, buf[3].coeffs, SHAKE128_RATE); + } +} + +/************************************************* +* Name: rej_eta +* +* Description: Sample uniformly random coefficients in [-ETA, ETA] by +* performing rejection sampling on array of random bytes. +* +* Arguments: - int32_t *a: pointer to output array (allocated) +* - unsigned int len: number of coefficients to be sampled +* - const uint8_t *buf: array of random bytes +* - unsigned int buflen: length of array of random bytes +* +* Returns number of sampled coefficients. Can be smaller than len if not enough +* random bytes were given. +**************************************************/ +static unsigned int rej_eta(int32_t *a, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) { + unsigned int ctr, pos; + uint32_t t0, t1; + DBENCH_START(); + + ctr = pos = 0; + while (ctr < len && pos < buflen) { + t0 = buf[pos] & 0x0F; + t1 = buf[pos++] >> 4; + + if (t0 < 15) { + t0 = t0 - (205 * t0 >> 10) * 5; + a[ctr++] = 2 - t0; + } + if (t1 < 15 && ctr < len) { + t1 = t1 - (205 * t1 >> 10) * 5; + a[ctr++] = 2 - t1; + } + } + + DBENCH_STOP(*tsample); + return ctr; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_poly_uniform_eta +* +* Description: Sample polynomial with uniformly random coefficients +* in [-ETA,ETA] by performing rejection sampling using the +* output stream of SHAKE256(seed|nonce) +* or AES256CTR(seed,nonce). +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length SEEDBYTES +* - uint16_t nonce: 2-byte nonce +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_poly_uniform_eta_preinit(poly *a, stream128_state *state) { + unsigned int ctr; + ALIGNED_UINT8(REJ_UNIFORM_BUFLEN * STREAM128_BLOCKBYTES) buf; + + stream128_squeezeblocks(buf.coeffs, REJ_UNIFORM_ETA_NBLOCKS, state); + ctr = PQCLEAN_DILITHIUM2_AVX2_rej_eta_avx(a->coeffs, buf.coeffs); + + while (ctr < N) { + stream128_squeezeblocks(buf.coeffs, 1, state); + ctr += rej_eta(a->coeffs + ctr, N - ctr, buf.coeffs, STREAM128_BLOCKBYTES); + } +} + +void PQCLEAN_DILITHIUM2_AVX2_poly_uniform_eta(poly *a, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + stream128_state state; + stream128_init(&state, seed, nonce); + PQCLEAN_DILITHIUM2_AVX2_poly_uniform_eta_preinit(a, &state); + stream128_release(&state); +} + +void PQCLEAN_DILITHIUM2_AVX2_poly_uniform_eta_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[32], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3) { + unsigned int ctr0, ctr1, ctr2, ctr3; + ALIGNED_UINT8(REJ_UNIFORM_ETA_BUFLEN) buf[4]; + + __m256i f; + keccakx4_state state; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + + buf[0].coeffs[SEEDBYTES + 0] = nonce0; + buf[0].coeffs[SEEDBYTES + 1] = nonce0 >> 8; + buf[1].coeffs[SEEDBYTES + 0] = nonce1; + buf[1].coeffs[SEEDBYTES + 1] = nonce1 >> 8; + buf[2].coeffs[SEEDBYTES + 0] = nonce2; + buf[2].coeffs[SEEDBYTES + 1] = nonce2 >> 8; + buf[3].coeffs[SEEDBYTES + 0] = nonce3; + buf[3].coeffs[SEEDBYTES + 1] = nonce3 >> 8; + + PQCLEAN_DILITHIUM2_AVX2_shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, SEEDBYTES + 2); + PQCLEAN_DILITHIUM2_AVX2_shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_ETA_NBLOCKS, &state); + + ctr0 = PQCLEAN_DILITHIUM2_AVX2_rej_eta_avx(a0->coeffs, buf[0].coeffs); + ctr1 = PQCLEAN_DILITHIUM2_AVX2_rej_eta_avx(a1->coeffs, buf[1].coeffs); + ctr2 = PQCLEAN_DILITHIUM2_AVX2_rej_eta_avx(a2->coeffs, buf[2].coeffs); + ctr3 = PQCLEAN_DILITHIUM2_AVX2_rej_eta_avx(a3->coeffs, buf[3].coeffs); + + while (ctr0 < N || ctr1 < N || ctr2 < N || ctr3 < N) { + PQCLEAN_DILITHIUM2_AVX2_shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); + + ctr0 += rej_eta(a0->coeffs + ctr0, N - ctr0, buf[0].coeffs, SHAKE128_RATE); + ctr1 += rej_eta(a1->coeffs + ctr1, N - ctr1, buf[1].coeffs, SHAKE128_RATE); + ctr2 += rej_eta(a2->coeffs + ctr2, N - ctr2, buf[2].coeffs, SHAKE128_RATE); + ctr3 += rej_eta(a3->coeffs + ctr3, N - ctr3, buf[3].coeffs, SHAKE128_RATE); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_poly_uniform_gamma1 +* +* Description: Sample polynomial with uniformly random coefficients +* in [-(GAMMA1 - 1), GAMMA1] by unpacking output stream +* of SHAKE256(seed|nonce) or AES256CTR(seed,nonce). +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length CRHBYTES +* - uint16_t nonce: 16-bit nonce +**************************************************/ +#define POLY_UNIFORM_GAMMA1_NBLOCKS ((POLYZ_PACKEDBYTES+STREAM256_BLOCKBYTES-1)/STREAM256_BLOCKBYTES) +void PQCLEAN_DILITHIUM2_AVX2_poly_uniform_gamma1_preinit(poly *a, stream256_state *state) { + /* PQCLEAN_DILITHIUM2_AVX2_polyz_unpack reads 14 additional bytes */ + ALIGNED_UINT8(POLY_UNIFORM_GAMMA1_NBLOCKS * STREAM256_BLOCKBYTES + 14) buf; + stream256_squeezeblocks(buf.coeffs, POLY_UNIFORM_GAMMA1_NBLOCKS, state); + PQCLEAN_DILITHIUM2_AVX2_polyz_unpack(a, buf.coeffs); +} + +void PQCLEAN_DILITHIUM2_AVX2_poly_uniform_gamma1(poly *a, const uint8_t seed[CRHBYTES], uint16_t nonce) { + stream256_state state; + stream256_init(&state, seed, nonce); + PQCLEAN_DILITHIUM2_AVX2_poly_uniform_gamma1_preinit(a, &state); + stream256_release(&state); +} + +void PQCLEAN_DILITHIUM2_AVX2_poly_uniform_gamma1_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[48], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3) { + ALIGNED_UINT8(POLY_UNIFORM_GAMMA1_NBLOCKS * STREAM256_BLOCKBYTES + 14) buf[4]; + keccakx4_state state; + __m256i f; + __m128i g; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + g = _mm_loadu_si128((__m128i *)&seed[32]); + _mm_store_si128((__m128i *)&buf[0].vec[1], g); + _mm_store_si128((__m128i *)&buf[1].vec[1], g); + _mm_store_si128((__m128i *)&buf[2].vec[1], g); + _mm_store_si128((__m128i *)&buf[3].vec[1], g); + + buf[0].coeffs[CRHBYTES + 0] = nonce0; + buf[0].coeffs[CRHBYTES + 1] = nonce0 >> 8; + buf[1].coeffs[CRHBYTES + 0] = nonce1; + buf[1].coeffs[CRHBYTES + 1] = nonce1 >> 8; + buf[2].coeffs[CRHBYTES + 0] = nonce2; + buf[2].coeffs[CRHBYTES + 1] = nonce2 >> 8; + buf[3].coeffs[CRHBYTES + 0] = nonce3; + buf[3].coeffs[CRHBYTES + 1] = nonce3 >> 8; + + PQCLEAN_DILITHIUM2_AVX2_shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, CRHBYTES + 2); + PQCLEAN_DILITHIUM2_AVX2_shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, POLY_UNIFORM_GAMMA1_NBLOCKS, &state); + + PQCLEAN_DILITHIUM2_AVX2_polyz_unpack(a0, buf[0].coeffs); + PQCLEAN_DILITHIUM2_AVX2_polyz_unpack(a1, buf[1].coeffs); + PQCLEAN_DILITHIUM2_AVX2_polyz_unpack(a2, buf[2].coeffs); + PQCLEAN_DILITHIUM2_AVX2_polyz_unpack(a3, buf[3].coeffs); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_challenge +* +* Description: Implementation of H. Samples polynomial with TAU nonzero +* coefficients in {-1,1} using the output stream of +* SHAKE256(seed). +* +* Arguments: - poly *c: pointer to output polynomial +* - const uint8_t mu[]: byte array containing seed of length SEEDBYTES +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_poly_challenge(poly *restrict c, const uint8_t seed[SEEDBYTES]) { + unsigned int i, b, pos; + uint64_t signs; + ALIGNED_UINT8(SHAKE256_RATE) buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, seed, SEEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf.coeffs, SHAKE256_RATE, &state); + + memcpy(&signs, buf.coeffs, 8); + pos = 8; + + memset(c->vec, 0, sizeof(poly)); + for (i = N - TAU; i < N; ++i) { + do { + if (pos >= SHAKE256_RATE) { + shake256_inc_squeeze(buf.coeffs, SHAKE256_RATE, &state); + pos = 0; + } + + b = buf.coeffs[pos++]; + } while (b > i); + + c->coeffs[i] = c->coeffs[b]; + c->coeffs[b] = 1 - 2 * (signs & 1); + signs >>= 1; + } + shake256_inc_ctx_release(&state); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyeta_pack +* +* Description: Bit-pack polynomial with coefficients in [-ETA,ETA]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYETA_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_polyeta_pack(uint8_t r[POLYETA_PACKEDBYTES], const poly *restrict a) { + unsigned int i; + uint8_t t[8]; + DBENCH_START(); + + for (i = 0; i < N / 8; ++i) { + t[0] = ETA - a->coeffs[8 * i + 0]; + t[1] = ETA - a->coeffs[8 * i + 1]; + t[2] = ETA - a->coeffs[8 * i + 2]; + t[3] = ETA - a->coeffs[8 * i + 3]; + t[4] = ETA - a->coeffs[8 * i + 4]; + t[5] = ETA - a->coeffs[8 * i + 5]; + t[6] = ETA - a->coeffs[8 * i + 6]; + t[7] = ETA - a->coeffs[8 * i + 7]; + + r[3 * i + 0] = (t[0] >> 0) | (t[1] << 3) | (t[2] << 6); + r[3 * i + 1] = (t[2] >> 2) | (t[3] << 1) | (t[4] << 4) | (t[5] << 7); + r[3 * i + 2] = (t[5] >> 1) | (t[6] << 2) | (t[7] << 5); + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyeta_unpack +* +* Description: Unpack polynomial with coefficients in [-ETA,ETA]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_polyeta_unpack(poly *restrict r, const uint8_t a[POLYETA_PACKEDBYTES]) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 8; ++i) { + r->coeffs[8 * i + 0] = (a[3 * i + 0] >> 0) & 7; + r->coeffs[8 * i + 1] = (a[3 * i + 0] >> 3) & 7; + r->coeffs[8 * i + 2] = ((a[3 * i + 0] >> 6) | (a[3 * i + 1] << 2)) & 7; + r->coeffs[8 * i + 3] = (a[3 * i + 1] >> 1) & 7; + r->coeffs[8 * i + 4] = (a[3 * i + 1] >> 4) & 7; + r->coeffs[8 * i + 5] = ((a[3 * i + 1] >> 7) | (a[3 * i + 2] << 1)) & 7; + r->coeffs[8 * i + 6] = (a[3 * i + 2] >> 2) & 7; + r->coeffs[8 * i + 7] = (a[3 * i + 2] >> 5) & 7; + + r->coeffs[8 * i + 0] = ETA - r->coeffs[8 * i + 0]; + r->coeffs[8 * i + 1] = ETA - r->coeffs[8 * i + 1]; + r->coeffs[8 * i + 2] = ETA - r->coeffs[8 * i + 2]; + r->coeffs[8 * i + 3] = ETA - r->coeffs[8 * i + 3]; + r->coeffs[8 * i + 4] = ETA - r->coeffs[8 * i + 4]; + r->coeffs[8 * i + 5] = ETA - r->coeffs[8 * i + 5]; + r->coeffs[8 * i + 6] = ETA - r->coeffs[8 * i + 6]; + r->coeffs[8 * i + 7] = ETA - r->coeffs[8 * i + 7]; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyt1_pack +* +* Description: Bit-pack polynomial t1 with coefficients fitting in 10 bits. +* Input coefficients are assumed to be positive standard representatives. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYT1_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_polyt1_pack(uint8_t r[POLYT1_PACKEDBYTES], const poly *restrict a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 4; ++i) { + r[5 * i + 0] = (a->coeffs[4 * i + 0] >> 0); + r[5 * i + 1] = (a->coeffs[4 * i + 0] >> 8) | (a->coeffs[4 * i + 1] << 2); + r[5 * i + 2] = (a->coeffs[4 * i + 1] >> 6) | (a->coeffs[4 * i + 2] << 4); + r[5 * i + 3] = (a->coeffs[4 * i + 2] >> 4) | (a->coeffs[4 * i + 3] << 6); + r[5 * i + 4] = (a->coeffs[4 * i + 3] >> 2); + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyt1_unpack +* +* Description: Unpack polynomial t1 with 10-bit coefficients. +* Output coefficients are positive standard representatives. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_polyt1_unpack(poly *restrict r, const uint8_t a[POLYT1_PACKEDBYTES]) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 4; ++i) { + r->coeffs[4 * i + 0] = ((a[5 * i + 0] >> 0) | ((uint32_t)a[5 * i + 1] << 8)) & 0x3FF; + r->coeffs[4 * i + 1] = ((a[5 * i + 1] >> 2) | ((uint32_t)a[5 * i + 2] << 6)) & 0x3FF; + r->coeffs[4 * i + 2] = ((a[5 * i + 2] >> 4) | ((uint32_t)a[5 * i + 3] << 4)) & 0x3FF; + r->coeffs[4 * i + 3] = ((a[5 * i + 3] >> 6) | ((uint32_t)a[5 * i + 4] << 2)) & 0x3FF; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyt0_pack +* +* Description: Bit-pack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYT0_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_polyt0_pack(uint8_t r[POLYT0_PACKEDBYTES], const poly *restrict a) { + unsigned int i; + uint32_t t[8]; + DBENCH_START(); + + for (i = 0; i < N / 8; ++i) { + t[0] = (1 << (D - 1)) - a->coeffs[8 * i + 0]; + t[1] = (1 << (D - 1)) - a->coeffs[8 * i + 1]; + t[2] = (1 << (D - 1)) - a->coeffs[8 * i + 2]; + t[3] = (1 << (D - 1)) - a->coeffs[8 * i + 3]; + t[4] = (1 << (D - 1)) - a->coeffs[8 * i + 4]; + t[5] = (1 << (D - 1)) - a->coeffs[8 * i + 5]; + t[6] = (1 << (D - 1)) - a->coeffs[8 * i + 6]; + t[7] = (1 << (D - 1)) - a->coeffs[8 * i + 7]; + + r[13 * i + 0] = t[0]; + r[13 * i + 1] = t[0] >> 8; + r[13 * i + 1] |= t[1] << 5; + r[13 * i + 2] = t[1] >> 3; + r[13 * i + 3] = t[1] >> 11; + r[13 * i + 3] |= t[2] << 2; + r[13 * i + 4] = t[2] >> 6; + r[13 * i + 4] |= t[3] << 7; + r[13 * i + 5] = t[3] >> 1; + r[13 * i + 6] = t[3] >> 9; + r[13 * i + 6] |= t[4] << 4; + r[13 * i + 7] = t[4] >> 4; + r[13 * i + 8] = t[4] >> 12; + r[13 * i + 8] |= t[5] << 1; + r[13 * i + 9] = t[5] >> 7; + r[13 * i + 9] |= t[6] << 6; + r[13 * i + 10] = t[6] >> 2; + r[13 * i + 11] = t[6] >> 10; + r[13 * i + 11] |= t[7] << 3; + r[13 * i + 12] = t[7] >> 5; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyt0_unpack +* +* Description: Unpack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_polyt0_unpack(poly *restrict r, const uint8_t a[POLYT0_PACKEDBYTES]) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 8; ++i) { + r->coeffs[8 * i + 0] = a[13 * i + 0]; + r->coeffs[8 * i + 0] |= (uint32_t)a[13 * i + 1] << 8; + r->coeffs[8 * i + 0] &= 0x1FFF; + + r->coeffs[8 * i + 1] = a[13 * i + 1] >> 5; + r->coeffs[8 * i + 1] |= (uint32_t)a[13 * i + 2] << 3; + r->coeffs[8 * i + 1] |= (uint32_t)a[13 * i + 3] << 11; + r->coeffs[8 * i + 1] &= 0x1FFF; + + r->coeffs[8 * i + 2] = a[13 * i + 3] >> 2; + r->coeffs[8 * i + 2] |= (uint32_t)a[13 * i + 4] << 6; + r->coeffs[8 * i + 2] &= 0x1FFF; + + r->coeffs[8 * i + 3] = a[13 * i + 4] >> 7; + r->coeffs[8 * i + 3] |= (uint32_t)a[13 * i + 5] << 1; + r->coeffs[8 * i + 3] |= (uint32_t)a[13 * i + 6] << 9; + r->coeffs[8 * i + 3] &= 0x1FFF; + + r->coeffs[8 * i + 4] = a[13 * i + 6] >> 4; + r->coeffs[8 * i + 4] |= (uint32_t)a[13 * i + 7] << 4; + r->coeffs[8 * i + 4] |= (uint32_t)a[13 * i + 8] << 12; + r->coeffs[8 * i + 4] &= 0x1FFF; + + r->coeffs[8 * i + 5] = a[13 * i + 8] >> 1; + r->coeffs[8 * i + 5] |= (uint32_t)a[13 * i + 9] << 7; + r->coeffs[8 * i + 5] &= 0x1FFF; + + r->coeffs[8 * i + 6] = a[13 * i + 9] >> 6; + r->coeffs[8 * i + 6] |= (uint32_t)a[13 * i + 10] << 2; + r->coeffs[8 * i + 6] |= (uint32_t)a[13 * i + 11] << 10; + r->coeffs[8 * i + 6] &= 0x1FFF; + + r->coeffs[8 * i + 7] = a[13 * i + 11] >> 3; + r->coeffs[8 * i + 7] |= (uint32_t)a[13 * i + 12] << 5; + r->coeffs[8 * i + 7] &= 0x1FFF; + + r->coeffs[8 * i + 0] = (1 << (D - 1)) - r->coeffs[8 * i + 0]; + r->coeffs[8 * i + 1] = (1 << (D - 1)) - r->coeffs[8 * i + 1]; + r->coeffs[8 * i + 2] = (1 << (D - 1)) - r->coeffs[8 * i + 2]; + r->coeffs[8 * i + 3] = (1 << (D - 1)) - r->coeffs[8 * i + 3]; + r->coeffs[8 * i + 4] = (1 << (D - 1)) - r->coeffs[8 * i + 4]; + r->coeffs[8 * i + 5] = (1 << (D - 1)) - r->coeffs[8 * i + 5]; + r->coeffs[8 * i + 6] = (1 << (D - 1)) - r->coeffs[8 * i + 6]; + r->coeffs[8 * i + 7] = (1 << (D - 1)) - r->coeffs[8 * i + 7]; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyz_pack +* +* Description: Bit-pack polynomial with coefficients +* in [-(GAMMA1 - 1), GAMMA1]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYZ_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_polyz_pack(uint8_t r[POLYZ_PACKEDBYTES], const poly *restrict a) { + unsigned int i; + uint32_t t[4]; + DBENCH_START(); + + for (i = 0; i < N / 4; ++i) { + t[0] = GAMMA1 - a->coeffs[4 * i + 0]; + t[1] = GAMMA1 - a->coeffs[4 * i + 1]; + t[2] = GAMMA1 - a->coeffs[4 * i + 2]; + t[3] = GAMMA1 - a->coeffs[4 * i + 3]; + + r[9 * i + 0] = t[0]; + r[9 * i + 1] = t[0] >> 8; + r[9 * i + 2] = t[0] >> 16; + r[9 * i + 2] |= t[1] << 2; + r[9 * i + 3] = t[1] >> 6; + r[9 * i + 4] = t[1] >> 14; + r[9 * i + 4] |= t[2] << 4; + r[9 * i + 5] = t[2] >> 4; + r[9 * i + 6] = t[2] >> 12; + r[9 * i + 6] |= t[3] << 6; + r[9 * i + 7] = t[3] >> 2; + r[9 * i + 8] = t[3] >> 10; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyz_unpack +* +* Description: Unpack polynomial z with coefficients +* in [-(GAMMA1 - 1), GAMMA1]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_polyz_unpack(poly *restrict r, const uint8_t a[POLYZ_PACKEDBYTES + 14]) { + unsigned int i; + __m256i f; + const __m256i shufbidx = _mm256_set_epi8(-1, 9, 8, 7, -1, 7, 6, 5, -1, 5, 4, 3, -1, 3, 2, 1, + -1, 8, 7, 6, -1, 6, 5, 4, -1, 4, 3, 2, -1, 2, 1, 0); + const __m256i srlvdidx = _mm256_set_epi32(6, 4, 2, 0, 6, 4, 2, 0); + const __m256i mask = _mm256_set1_epi32(0x3FFFF); + const __m256i gamma1 = _mm256_set1_epi32(GAMMA1); + DBENCH_START(); + + for (i = 0; i < N / 8; i++) { + f = _mm256_loadu_si256((__m256i *)&a[18 * i]); + f = _mm256_permute4x64_epi64(f, 0x94); + f = _mm256_shuffle_epi8(f, shufbidx); + f = _mm256_srlv_epi32(f, srlvdidx); + f = _mm256_and_si256(f, mask); + f = _mm256_sub_epi32(gamma1, f); + _mm256_store_si256(&r->vec[i], f); + } + + DBENCH_STOP(*tpack); +} + + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyw1_pack +* +* Description: Bit-pack polynomial w1 with coefficients in [0,15] or [0,43]. +* Input coefficients are assumed to be positive standard representatives. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYW1_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_polyw1_pack(uint8_t r[POLYW1_PACKEDBYTES + 8], const poly *restrict a) { + unsigned int i; + __m256i f0, f1, f2, f3; + const __m256i shift1 = _mm256_set1_epi16((64 << 8) + 1); + const __m256i shift2 = _mm256_set1_epi32((4096 << 16) + 1); + const __m256i shufdidx1 = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0); + const __m256i shufdidx2 = _mm256_set_epi32(-1, -1, 6, 5, 4, 2, 1, 0); + const __m256i shufbidx = _mm256_set_epi8(-1, -1, -1, -1, 14, 13, 12, 10, 9, 8, 6, 5, 4, 2, 1, 0, + -1, -1, -1, -1, 14, 13, 12, 10, 9, 8, 6, 5, 4, 2, 1, 0); + DBENCH_START(); + + for (i = 0; i < N / 32; i++) { + f0 = _mm256_load_si256(&a->vec[4 * i + 0]); + f1 = _mm256_load_si256(&a->vec[4 * i + 1]); + f2 = _mm256_load_si256(&a->vec[4 * i + 2]); + f3 = _mm256_load_si256(&a->vec[4 * i + 3]); + f0 = _mm256_packus_epi32(f0, f1); + f1 = _mm256_packus_epi32(f2, f3); + f0 = _mm256_packus_epi16(f0, f1); + f0 = _mm256_maddubs_epi16(f0, shift1); + f0 = _mm256_madd_epi16(f0, shift2); + f0 = _mm256_permutevar8x32_epi32(f0, shufdidx1); + f0 = _mm256_shuffle_epi8(f0, shufbidx); + f0 = _mm256_permutevar8x32_epi32(f0, shufdidx2); + _mm256_storeu_si256((__m256i *)&r[24 * i], f0); + } + + DBENCH_STOP(*tpack); +} diff --git a/crypto_sign/dilithium/dilithium2/avx2/poly.h b/crypto_sign/dilithium/dilithium2/avx2/poly.h new file mode 100644 index 00000000..8310e4a1 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/poly.h @@ -0,0 +1,79 @@ +#ifndef PQCLEAN_DILITHIUM2_AVX2_POLY_H +#define PQCLEAN_DILITHIUM2_AVX2_POLY_H +#include "align.h" +#include "params.h" +#include "symmetric.h" +#include + +typedef ALIGNED_INT32(N) poly; + +void PQCLEAN_DILITHIUM2_AVX2_poly_reduce(poly *a); +void PQCLEAN_DILITHIUM2_AVX2_poly_caddq(poly *a); +void PQCLEAN_DILITHIUM2_AVX2_poly_freeze(poly *a); + +void PQCLEAN_DILITHIUM2_AVX2_poly_add(poly *c, const poly *a, const poly *b); +void PQCLEAN_DILITHIUM2_AVX2_poly_sub(poly *c, const poly *a, const poly *b); +void PQCLEAN_DILITHIUM2_AVX2_poly_shiftl(poly *a); + +void PQCLEAN_DILITHIUM2_AVX2_poly_ntt(poly *a); +void PQCLEAN_DILITHIUM2_AVX2_poly_invntt_tomont(poly *a); +void PQCLEAN_DILITHIUM2_AVX2_poly_nttunpack(poly *a); +void PQCLEAN_DILITHIUM2_AVX2_poly_pointwise_montgomery(poly *c, const poly *a, const poly *b); + +void PQCLEAN_DILITHIUM2_AVX2_poly_power2round(poly *a1, poly *a0, const poly *a); +void PQCLEAN_DILITHIUM2_AVX2_poly_decompose(poly *a1, poly *a0, const poly *a); +unsigned int PQCLEAN_DILITHIUM2_AVX2_poly_make_hint(uint8_t hint[N], const poly *a0, const poly *a1); +void PQCLEAN_DILITHIUM2_AVX2_poly_use_hint(poly *b, const poly *a, const poly *h); + +int PQCLEAN_DILITHIUM2_AVX2_poly_chknorm(const poly *a, int32_t B); +void PQCLEAN_DILITHIUM2_AVX2_poly_uniform_preinit(poly *a, stream128_state *state); +void PQCLEAN_DILITHIUM2_AVX2_poly_uniform(poly *a, const uint8_t seed[SEEDBYTES], uint16_t nonce); +void PQCLEAN_DILITHIUM2_AVX2_poly_uniform_eta_preinit(poly *a, stream128_state *state); +void PQCLEAN_DILITHIUM2_AVX2_poly_uniform_eta(poly *a, const uint8_t seed[SEEDBYTES], uint16_t nonce); +void PQCLEAN_DILITHIUM2_AVX2_poly_uniform_gamma1_preinit(poly *a, stream256_state *state); +void PQCLEAN_DILITHIUM2_AVX2_poly_uniform_gamma1(poly *a, const uint8_t seed[CRHBYTES], uint16_t nonce); +void PQCLEAN_DILITHIUM2_AVX2_poly_challenge(poly *c, const uint8_t seed[SEEDBYTES]); + +void PQCLEAN_DILITHIUM2_AVX2_poly_uniform_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[SEEDBYTES], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3); +void PQCLEAN_DILITHIUM2_AVX2_poly_uniform_eta_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[SEEDBYTES], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3); +void PQCLEAN_DILITHIUM2_AVX2_poly_uniform_gamma1_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[CRHBYTES], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3); + +void PQCLEAN_DILITHIUM2_AVX2_polyeta_pack(uint8_t r[POLYETA_PACKEDBYTES], const poly *a); +void PQCLEAN_DILITHIUM2_AVX2_polyeta_unpack(poly *r, const uint8_t a[POLYETA_PACKEDBYTES]); + +void PQCLEAN_DILITHIUM2_AVX2_polyt1_pack(uint8_t r[POLYT1_PACKEDBYTES], const poly *a); +void PQCLEAN_DILITHIUM2_AVX2_polyt1_unpack(poly *r, const uint8_t a[POLYT1_PACKEDBYTES]); + +void PQCLEAN_DILITHIUM2_AVX2_polyt0_pack(uint8_t r[POLYT0_PACKEDBYTES], const poly *a); +void PQCLEAN_DILITHIUM2_AVX2_polyt0_unpack(poly *r, const uint8_t a[POLYT0_PACKEDBYTES]); + +void PQCLEAN_DILITHIUM2_AVX2_polyz_pack(uint8_t r[POLYZ_PACKEDBYTES], const poly *a); +void PQCLEAN_DILITHIUM2_AVX2_polyz_unpack(poly *r, const uint8_t a[POLYZ_PACKEDBYTES + 14]); + +void PQCLEAN_DILITHIUM2_AVX2_polyw1_pack(uint8_t r[POLYW1_PACKEDBYTES + 8], const poly *a); + +#endif diff --git a/crypto_sign/dilithium/dilithium2/avx2/polyvec.c b/crypto_sign/dilithium/dilithium2/avx2/polyvec.c new file mode 100644 index 00000000..9b989246 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/polyvec.c @@ -0,0 +1,474 @@ +#include "consts.h" +#include "ntt.h" +#include "params.h" +#include "poly.h" +#include "polyvec.h" +#include + +#define UNUSED(x) (void)x + +/************************************************* +* Name: expand_mat +* +* Description: Implementation of ExpandA. Generates matrix A with uniformly +* random coefficients a_{i,j} by performing rejection +* sampling on the output stream of SHAKE128(rho|j|i) +* or AES256CTR(rho,j|i). +* +* Arguments: - polyvecl mat[K]: output matrix +* - const uint8_t rho[]: byte array containing seed rho +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]) { + PQCLEAN_DILITHIUM2_AVX2_polyvec_matrix_expand_row0(&mat[0], NULL, rho); + PQCLEAN_DILITHIUM2_AVX2_polyvec_matrix_expand_row1(&mat[1], NULL, rho); + PQCLEAN_DILITHIUM2_AVX2_polyvec_matrix_expand_row2(&mat[2], NULL, rho); + PQCLEAN_DILITHIUM2_AVX2_polyvec_matrix_expand_row3(&mat[3], NULL, rho); +} + +void PQCLEAN_DILITHIUM2_AVX2_polyvec_matrix_expand_row0(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + UNUSED(rowb); + PQCLEAN_DILITHIUM2_AVX2_poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 0, 1, 2, 3); + PQCLEAN_DILITHIUM2_AVX2_poly_nttunpack(&rowa->vec[0]); + PQCLEAN_DILITHIUM2_AVX2_poly_nttunpack(&rowa->vec[1]); + PQCLEAN_DILITHIUM2_AVX2_poly_nttunpack(&rowa->vec[2]); + PQCLEAN_DILITHIUM2_AVX2_poly_nttunpack(&rowa->vec[3]); +} + +void PQCLEAN_DILITHIUM2_AVX2_polyvec_matrix_expand_row1(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + UNUSED(rowb); + PQCLEAN_DILITHIUM2_AVX2_poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 256, 257, 258, 259); + PQCLEAN_DILITHIUM2_AVX2_poly_nttunpack(&rowa->vec[0]); + PQCLEAN_DILITHIUM2_AVX2_poly_nttunpack(&rowa->vec[1]); + PQCLEAN_DILITHIUM2_AVX2_poly_nttunpack(&rowa->vec[2]); + PQCLEAN_DILITHIUM2_AVX2_poly_nttunpack(&rowa->vec[3]); +} + +void PQCLEAN_DILITHIUM2_AVX2_polyvec_matrix_expand_row2(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + UNUSED(rowb); + PQCLEAN_DILITHIUM2_AVX2_poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 512, 513, 514, 515); + PQCLEAN_DILITHIUM2_AVX2_poly_nttunpack(&rowa->vec[0]); + PQCLEAN_DILITHIUM2_AVX2_poly_nttunpack(&rowa->vec[1]); + PQCLEAN_DILITHIUM2_AVX2_poly_nttunpack(&rowa->vec[2]); + PQCLEAN_DILITHIUM2_AVX2_poly_nttunpack(&rowa->vec[3]); +} + +void PQCLEAN_DILITHIUM2_AVX2_polyvec_matrix_expand_row3(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + UNUSED(rowb); + PQCLEAN_DILITHIUM2_AVX2_poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 768, 769, 770, 771); + PQCLEAN_DILITHIUM2_AVX2_poly_nttunpack(&rowa->vec[0]); + PQCLEAN_DILITHIUM2_AVX2_poly_nttunpack(&rowa->vec[1]); + PQCLEAN_DILITHIUM2_AVX2_poly_nttunpack(&rowa->vec[2]); + PQCLEAN_DILITHIUM2_AVX2_poly_nttunpack(&rowa->vec[3]); +} + + +void PQCLEAN_DILITHIUM2_AVX2_polyvec_matrix_pointwise_montgomery(polyveck *t, const polyvecl mat[K], const polyvecl *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_AVX2_polyvecl_pointwise_acc_montgomery(&t->vec[i], &mat[i], v); + } +} + +/**************************************************************/ +/************ Vectors of polynomials of length L **************/ +/**************************************************************/ + +void PQCLEAN_DILITHIUM2_AVX2_polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM2_AVX2_poly_uniform_eta(&v->vec[i], seed, nonce++); + } +} + +void PQCLEAN_DILITHIUM2_AVX2_polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM2_AVX2_poly_uniform_gamma1(&v->vec[i], seed, L * nonce + i); + } +} + +void PQCLEAN_DILITHIUM2_AVX2_polyvecl_reduce(polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM2_AVX2_poly_reduce(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyvecl_freeze +* +* Description: Reduce coefficients of polynomials in vector of length L +* to standard representatives. +* +* Arguments: - polyvecl *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_polyvecl_freeze(polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM2_AVX2_poly_freeze(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyvecl_add +* +* Description: Add vectors of polynomials of length L. +* No modular reduction is performed. +* +* Arguments: - polyvecl *w: pointer to output vector +* - const polyvecl *u: pointer to first summand +* - const polyvecl *v: pointer to second summand +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM2_AVX2_poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyvecl_ntt +* +* Description: Forward NTT of all polynomials in vector of length L. Output +* coefficients can be up to 16*Q larger than input coefficients. +* +* Arguments: - polyvecl *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_polyvecl_ntt(polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM2_AVX2_poly_ntt(&v->vec[i]); + } +} + +void PQCLEAN_DILITHIUM2_AVX2_polyvecl_invntt_tomont(polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM2_AVX2_poly_invntt_tomont(&v->vec[i]); + } +} + +void PQCLEAN_DILITHIUM2_AVX2_polyvecl_pointwise_poly_montgomery(polyvecl *r, const poly *a, const polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM2_AVX2_poly_pointwise_montgomery(&r->vec[i], a, &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyvecl_pointwise_acc_montgomery +* +* Description: Pointwise multiply vectors of polynomials of length L, multiply +* resulting vector by 2^{-32} and add (accumulate) polynomials +* in it. Input/output vectors are in NTT domain representation. +* +* Arguments: - poly *w: output polynomial +* - const polyvecl *u: pointer to first input vector +* - const polyvecl *v: pointer to second input vector +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_polyvecl_pointwise_acc_montgomery(poly *w, const polyvecl *u, const polyvecl *v) { + PQCLEAN_DILITHIUM2_AVX2_pointwise_acc_avx(w->vec, u->vec->vec, v->vec->vec, PQCLEAN_DILITHIUM2_AVX2_qdata.vec); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyvecl_chknorm +* +* Description: Check infinity norm of polynomials in vector of length L. +* Assumes input polyvecl to be reduced by PQCLEAN_DILITHIUM2_AVX2_polyvecl_reduce(). +* +* Arguments: - const polyvecl *v: pointer to vector +* - int32_t B: norm bound +* +* Returns 0 if norm of all polynomials is strictly smaller than B <= (Q-1)/8 +* and 1 otherwise. +**************************************************/ +int PQCLEAN_DILITHIUM2_AVX2_polyvecl_chknorm(const polyvecl *v, int32_t bound) { + unsigned int i; + + for (i = 0; i < L; ++i) { + if (PQCLEAN_DILITHIUM2_AVX2_poly_chknorm(&v->vec[i], bound)) { + return 1; + } + } + + return 0; +} + +/**************************************************************/ +/************ Vectors of polynomials of length K **************/ +/**************************************************************/ + +void PQCLEAN_DILITHIUM2_AVX2_polyveck_uniform_eta(polyveck *v, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_AVX2_poly_uniform_eta(&v->vec[i], seed, nonce++); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyveck_reduce +* +* Description: Reduce coefficients of polynomials in vector of length K +* to representatives in [-6283009,6283007]. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_polyveck_reduce(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_AVX2_poly_reduce(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyveck_caddq +* +* Description: For all coefficients of polynomials in vector of length K +* add Q if coefficient is negative. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_polyveck_caddq(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_AVX2_poly_caddq(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyveck_freeze +* +* Description: Reduce coefficients of polynomials in vector of length K +* to standard representatives. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_polyveck_freeze(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_AVX2_poly_freeze(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyveck_add +* +* Description: Add vectors of polynomials of length K. +* No modular reduction is performed. +* +* Arguments: - polyveck *w: pointer to output vector +* - const polyveck *u: pointer to first summand +* - const polyveck *v: pointer to second summand +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_polyveck_add(polyveck *w, const polyveck *u, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_AVX2_poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyveck_sub +* +* Description: Subtract vectors of polynomials of length K. +* No modular reduction is performed. +* +* Arguments: - polyveck *w: pointer to output vector +* - const polyveck *u: pointer to first input vector +* - const polyveck *v: pointer to second input vector to be +* subtracted from first input vector +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_AVX2_poly_sub(&w->vec[i], &u->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyveck_shiftl +* +* Description: Multiply vector of polynomials of Length K by 2^D without modular +* reduction. Assumes input coefficients to be less than 2^{31-D}. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_polyveck_shiftl(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_AVX2_poly_shiftl(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyveck_ntt +* +* Description: Forward NTT of all polynomials in vector of length K. Output +* coefficients can be up to 16*Q larger than input coefficients. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_polyveck_ntt(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_AVX2_poly_ntt(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyveck_invntt_tomont +* +* Description: Inverse NTT and multiplication by 2^{32} of polynomials +* in vector of length K. Input coefficients need to be less +* than 2*Q. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_polyveck_invntt_tomont(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_AVX2_poly_invntt_tomont(&v->vec[i]); + } +} + +void PQCLEAN_DILITHIUM2_AVX2_polyveck_pointwise_poly_montgomery(polyveck *r, const poly *a, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_AVX2_poly_pointwise_montgomery(&r->vec[i], a, &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyveck_chknorm +* +* Description: Check infinity norm of polynomials in vector of length K. +* Assumes input polyveck to be reduced by PQCLEAN_DILITHIUM2_AVX2_polyveck_reduce(). +* +* Arguments: - const polyveck *v: pointer to vector +* - int32_t B: norm bound +* +* Returns 0 if norm of all polynomials are strictly smaller than B <= (Q-1)/8 +* and 1 otherwise. +**************************************************/ +int PQCLEAN_DILITHIUM2_AVX2_polyveck_chknorm(const polyveck *v, int32_t bound) { + unsigned int i; + + for (i = 0; i < K; ++i) { + if (PQCLEAN_DILITHIUM2_AVX2_poly_chknorm(&v->vec[i], bound)) { + return 1; + } + } + + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyveck_power2round +* +* Description: For all coefficients a of polynomials in vector of length K, +* compute a0, a1 such that a mod^+ Q = a1*2^D + a0 +* with -2^{D-1} < a0 <= 2^{D-1}. Assumes coefficients to be +* standard representatives. +* +* Arguments: - polyveck *v1: pointer to output vector of polynomials with +* coefficients a1 +* - polyveck *v0: pointer to output vector of polynomials with +* coefficients a0 +* - const polyveck *v: pointer to input vector +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_AVX2_poly_power2round(&v1->vec[i], &v0->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyveck_decompose +* +* Description: For all coefficients a of polynomials in vector of length K, +* compute high and low bits a0, a1 such a mod^+ Q = a1*ALPHA + a0 +* with -ALPHA/2 < a0 <= ALPHA/2 except a1 = (Q-1)/ALPHA where we +* set a1 = 0 and -ALPHA/2 <= a0 = a mod Q - Q < 0. +* Assumes coefficients to be standard representatives. +* +* Arguments: - polyveck *v1: pointer to output vector of polynomials with +* coefficients a1 +* - polyveck *v0: pointer to output vector of polynomials with +* coefficients a0 +* - const polyveck *v: pointer to input vector +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_AVX2_poly_decompose(&v1->vec[i], &v0->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyveck_make_hint +* +* Description: Compute hint vector. +* +* Arguments: - uint8_t *hint: pointer to output hint array +* - const polyveck *v0: pointer to low part of input vector +* - const polyveck *v1: pointer to high part of input vector +* +* Returns number of 1 bits. +**************************************************/ +unsigned int PQCLEAN_DILITHIUM2_AVX2_polyveck_make_hint(uint8_t *hint, const polyveck *v0, const polyveck *v1) { + unsigned int i, n = 0; + + for (i = 0; i < K; ++i) { + n += PQCLEAN_DILITHIUM2_AVX2_poly_make_hint(&hint[n], &v0->vec[i], &v1->vec[i]); + } + + return n; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_polyveck_use_hint +* +* Description: Use hint vector to correct the high bits of input vector. +* +* Arguments: - polyveck *w: pointer to output vector of polynomials with +* corrected high bits +* - const polyveck *u: pointer to input vector +* - const polyveck *h: pointer to input hint vector +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_polyveck_use_hint(polyveck *w, const polyveck *u, const polyveck *h) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_AVX2_poly_use_hint(&w->vec[i], &u->vec[i], &h->vec[i]); + } +} + +void PQCLEAN_DILITHIUM2_AVX2_polyveck_pack_w1(uint8_t r[K * POLYW1_PACKEDBYTES], const polyveck *w1) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_AVX2_polyw1_pack(&r[i * POLYW1_PACKEDBYTES], &w1->vec[i]); + } +} diff --git a/crypto_sign/dilithium/dilithium2/avx2/polyvec.h b/crypto_sign/dilithium/dilithium2/avx2/polyvec.h new file mode 100644 index 00000000..ee8b05ff --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/polyvec.h @@ -0,0 +1,72 @@ +#ifndef PQCLEAN_DILITHIUM2_AVX2_POLYVEC_H +#define PQCLEAN_DILITHIUM2_AVX2_POLYVEC_H +#include "params.h" +#include "poly.h" +#include + +/* Vectors of polynomials of length L */ +typedef struct { + poly vec[L]; +} polyvecl; + +void PQCLEAN_DILITHIUM2_AVX2_polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[SEEDBYTES], uint16_t nonce); + +void PQCLEAN_DILITHIUM2_AVX2_polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[SEEDBYTES], uint16_t nonce); + +void PQCLEAN_DILITHIUM2_AVX2_polyvecl_reduce(polyvecl *v); + +void PQCLEAN_DILITHIUM2_AVX2_polyvecl_freeze(polyvecl *v); + +void PQCLEAN_DILITHIUM2_AVX2_polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v); + +void PQCLEAN_DILITHIUM2_AVX2_polyvecl_ntt(polyvecl *v); +void PQCLEAN_DILITHIUM2_AVX2_polyvecl_invntt_tomont(polyvecl *v); +void PQCLEAN_DILITHIUM2_AVX2_polyvecl_pointwise_poly_montgomery(polyvecl *r, const poly *a, const polyvecl *v); +void PQCLEAN_DILITHIUM2_AVX2_polyvecl_pointwise_acc_montgomery(poly *w, + const polyvecl *u, + const polyvecl *v); + +int PQCLEAN_DILITHIUM2_AVX2_polyvecl_chknorm(const polyvecl *v, int32_t B); + +/* Vectors of polynomials of length K */ +typedef struct { + poly vec[K]; +} polyveck; + +void PQCLEAN_DILITHIUM2_AVX2_polyveck_uniform_eta(polyveck *v, const uint8_t seed[SEEDBYTES], uint16_t nonce); + +void PQCLEAN_DILITHIUM2_AVX2_polyveck_reduce(polyveck *v); +void PQCLEAN_DILITHIUM2_AVX2_polyveck_caddq(polyveck *v); +void PQCLEAN_DILITHIUM2_AVX2_polyveck_freeze(polyveck *v); + +void PQCLEAN_DILITHIUM2_AVX2_polyveck_add(polyveck *w, const polyveck *u, const polyveck *v); +void PQCLEAN_DILITHIUM2_AVX2_polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v); +void PQCLEAN_DILITHIUM2_AVX2_polyveck_shiftl(polyveck *v); + +void PQCLEAN_DILITHIUM2_AVX2_polyveck_ntt(polyveck *v); +void PQCLEAN_DILITHIUM2_AVX2_polyveck_invntt_tomont(polyveck *v); +void PQCLEAN_DILITHIUM2_AVX2_polyveck_pointwise_poly_montgomery(polyveck *r, const poly *a, const polyveck *v); + +int PQCLEAN_DILITHIUM2_AVX2_polyveck_chknorm(const polyveck *v, int32_t B); + +void PQCLEAN_DILITHIUM2_AVX2_polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v); +void PQCLEAN_DILITHIUM2_AVX2_polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v); +unsigned int PQCLEAN_DILITHIUM2_AVX2_polyveck_make_hint(uint8_t *hint, const polyveck *v0, const polyveck *v1); +void PQCLEAN_DILITHIUM2_AVX2_polyveck_use_hint(polyveck *w, const polyveck *u, const polyveck *h); + +void PQCLEAN_DILITHIUM2_AVX2_polyveck_pack_w1(uint8_t r[K * POLYW1_PACKEDBYTES], const polyveck *w1); + +void PQCLEAN_DILITHIUM2_AVX2_polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]); + +void PQCLEAN_DILITHIUM2_AVX2_polyvec_matrix_expand_row0(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +void PQCLEAN_DILITHIUM2_AVX2_polyvec_matrix_expand_row1(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +void PQCLEAN_DILITHIUM2_AVX2_polyvec_matrix_expand_row2(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +void PQCLEAN_DILITHIUM2_AVX2_polyvec_matrix_expand_row3(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +void PQCLEAN_DILITHIUM2_AVX2_polyvec_matrix_expand_row4(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +void PQCLEAN_DILITHIUM2_AVX2_polyvec_matrix_expand_row5(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +void PQCLEAN_DILITHIUM2_AVX2_polyvec_matrix_expand_row6(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +void PQCLEAN_DILITHIUM2_AVX2_polyvec_matrix_expand_row7(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); + +void PQCLEAN_DILITHIUM2_AVX2_polyvec_matrix_pointwise_montgomery(polyveck *t, const polyvecl mat[K], const polyvecl *v); + +#endif diff --git a/crypto_sign/dilithium/dilithium2/avx2/rejsample.c b/crypto_sign/dilithium/dilithium2/avx2/rejsample.c new file mode 100644 index 00000000..1c8352e6 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/rejsample.c @@ -0,0 +1,408 @@ +#include "params.h" +#include "rejsample.h" +#include "symmetric.h" +#include +#include + +const uint8_t PQCLEAN_DILITHIUM2_AVX2_idxlut[256][8] = { + { 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0}, + { 1, 0, 0, 0, 0, 0, 0, 0}, + { 0, 1, 0, 0, 0, 0, 0, 0}, + { 2, 0, 0, 0, 0, 0, 0, 0}, + { 0, 2, 0, 0, 0, 0, 0, 0}, + { 1, 2, 0, 0, 0, 0, 0, 0}, + { 0, 1, 2, 0, 0, 0, 0, 0}, + { 3, 0, 0, 0, 0, 0, 0, 0}, + { 0, 3, 0, 0, 0, 0, 0, 0}, + { 1, 3, 0, 0, 0, 0, 0, 0}, + { 0, 1, 3, 0, 0, 0, 0, 0}, + { 2, 3, 0, 0, 0, 0, 0, 0}, + { 0, 2, 3, 0, 0, 0, 0, 0}, + { 1, 2, 3, 0, 0, 0, 0, 0}, + { 0, 1, 2, 3, 0, 0, 0, 0}, + { 4, 0, 0, 0, 0, 0, 0, 0}, + { 0, 4, 0, 0, 0, 0, 0, 0}, + { 1, 4, 0, 0, 0, 0, 0, 0}, + { 0, 1, 4, 0, 0, 0, 0, 0}, + { 2, 4, 0, 0, 0, 0, 0, 0}, + { 0, 2, 4, 0, 0, 0, 0, 0}, + { 1, 2, 4, 0, 0, 0, 0, 0}, + { 0, 1, 2, 4, 0, 0, 0, 0}, + { 3, 4, 0, 0, 0, 0, 0, 0}, + { 0, 3, 4, 0, 0, 0, 0, 0}, + { 1, 3, 4, 0, 0, 0, 0, 0}, + { 0, 1, 3, 4, 0, 0, 0, 0}, + { 2, 3, 4, 0, 0, 0, 0, 0}, + { 0, 2, 3, 4, 0, 0, 0, 0}, + { 1, 2, 3, 4, 0, 0, 0, 0}, + { 0, 1, 2, 3, 4, 0, 0, 0}, + { 5, 0, 0, 0, 0, 0, 0, 0}, + { 0, 5, 0, 0, 0, 0, 0, 0}, + { 1, 5, 0, 0, 0, 0, 0, 0}, + { 0, 1, 5, 0, 0, 0, 0, 0}, + { 2, 5, 0, 0, 0, 0, 0, 0}, + { 0, 2, 5, 0, 0, 0, 0, 0}, + { 1, 2, 5, 0, 0, 0, 0, 0}, + { 0, 1, 2, 5, 0, 0, 0, 0}, + { 3, 5, 0, 0, 0, 0, 0, 0}, + { 0, 3, 5, 0, 0, 0, 0, 0}, + { 1, 3, 5, 0, 0, 0, 0, 0}, + { 0, 1, 3, 5, 0, 0, 0, 0}, + { 2, 3, 5, 0, 0, 0, 0, 0}, + { 0, 2, 3, 5, 0, 0, 0, 0}, + { 1, 2, 3, 5, 0, 0, 0, 0}, + { 0, 1, 2, 3, 5, 0, 0, 0}, + { 4, 5, 0, 0, 0, 0, 0, 0}, + { 0, 4, 5, 0, 0, 0, 0, 0}, + { 1, 4, 5, 0, 0, 0, 0, 0}, + { 0, 1, 4, 5, 0, 0, 0, 0}, + { 2, 4, 5, 0, 0, 0, 0, 0}, + { 0, 2, 4, 5, 0, 0, 0, 0}, + { 1, 2, 4, 5, 0, 0, 0, 0}, + { 0, 1, 2, 4, 5, 0, 0, 0}, + { 3, 4, 5, 0, 0, 0, 0, 0}, + { 0, 3, 4, 5, 0, 0, 0, 0}, + { 1, 3, 4, 5, 0, 0, 0, 0}, + { 0, 1, 3, 4, 5, 0, 0, 0}, + { 2, 3, 4, 5, 0, 0, 0, 0}, + { 0, 2, 3, 4, 5, 0, 0, 0}, + { 1, 2, 3, 4, 5, 0, 0, 0}, + { 0, 1, 2, 3, 4, 5, 0, 0}, + { 6, 0, 0, 0, 0, 0, 0, 0}, + { 0, 6, 0, 0, 0, 0, 0, 0}, + { 1, 6, 0, 0, 0, 0, 0, 0}, + { 0, 1, 6, 0, 0, 0, 0, 0}, + { 2, 6, 0, 0, 0, 0, 0, 0}, + { 0, 2, 6, 0, 0, 0, 0, 0}, + { 1, 2, 6, 0, 0, 0, 0, 0}, + { 0, 1, 2, 6, 0, 0, 0, 0}, + { 3, 6, 0, 0, 0, 0, 0, 0}, + { 0, 3, 6, 0, 0, 0, 0, 0}, + { 1, 3, 6, 0, 0, 0, 0, 0}, + { 0, 1, 3, 6, 0, 0, 0, 0}, + { 2, 3, 6, 0, 0, 0, 0, 0}, + { 0, 2, 3, 6, 0, 0, 0, 0}, + { 1, 2, 3, 6, 0, 0, 0, 0}, + { 0, 1, 2, 3, 6, 0, 0, 0}, + { 4, 6, 0, 0, 0, 0, 0, 0}, + { 0, 4, 6, 0, 0, 0, 0, 0}, + { 1, 4, 6, 0, 0, 0, 0, 0}, + { 0, 1, 4, 6, 0, 0, 0, 0}, + { 2, 4, 6, 0, 0, 0, 0, 0}, + { 0, 2, 4, 6, 0, 0, 0, 0}, + { 1, 2, 4, 6, 0, 0, 0, 0}, + { 0, 1, 2, 4, 6, 0, 0, 0}, + { 3, 4, 6, 0, 0, 0, 0, 0}, + { 0, 3, 4, 6, 0, 0, 0, 0}, + { 1, 3, 4, 6, 0, 0, 0, 0}, + { 0, 1, 3, 4, 6, 0, 0, 0}, + { 2, 3, 4, 6, 0, 0, 0, 0}, + { 0, 2, 3, 4, 6, 0, 0, 0}, + { 1, 2, 3, 4, 6, 0, 0, 0}, + { 0, 1, 2, 3, 4, 6, 0, 0}, + { 5, 6, 0, 0, 0, 0, 0, 0}, + { 0, 5, 6, 0, 0, 0, 0, 0}, + { 1, 5, 6, 0, 0, 0, 0, 0}, + { 0, 1, 5, 6, 0, 0, 0, 0}, + { 2, 5, 6, 0, 0, 0, 0, 0}, + { 0, 2, 5, 6, 0, 0, 0, 0}, + { 1, 2, 5, 6, 0, 0, 0, 0}, + { 0, 1, 2, 5, 6, 0, 0, 0}, + { 3, 5, 6, 0, 0, 0, 0, 0}, + { 0, 3, 5, 6, 0, 0, 0, 0}, + { 1, 3, 5, 6, 0, 0, 0, 0}, + { 0, 1, 3, 5, 6, 0, 0, 0}, + { 2, 3, 5, 6, 0, 0, 0, 0}, + { 0, 2, 3, 5, 6, 0, 0, 0}, + { 1, 2, 3, 5, 6, 0, 0, 0}, + { 0, 1, 2, 3, 5, 6, 0, 0}, + { 4, 5, 6, 0, 0, 0, 0, 0}, + { 0, 4, 5, 6, 0, 0, 0, 0}, + { 1, 4, 5, 6, 0, 0, 0, 0}, + { 0, 1, 4, 5, 6, 0, 0, 0}, + { 2, 4, 5, 6, 0, 0, 0, 0}, + { 0, 2, 4, 5, 6, 0, 0, 0}, + { 1, 2, 4, 5, 6, 0, 0, 0}, + { 0, 1, 2, 4, 5, 6, 0, 0}, + { 3, 4, 5, 6, 0, 0, 0, 0}, + { 0, 3, 4, 5, 6, 0, 0, 0}, + { 1, 3, 4, 5, 6, 0, 0, 0}, + { 0, 1, 3, 4, 5, 6, 0, 0}, + { 2, 3, 4, 5, 6, 0, 0, 0}, + { 0, 2, 3, 4, 5, 6, 0, 0}, + { 1, 2, 3, 4, 5, 6, 0, 0}, + { 0, 1, 2, 3, 4, 5, 6, 0}, + { 7, 0, 0, 0, 0, 0, 0, 0}, + { 0, 7, 0, 0, 0, 0, 0, 0}, + { 1, 7, 0, 0, 0, 0, 0, 0}, + { 0, 1, 7, 0, 0, 0, 0, 0}, + { 2, 7, 0, 0, 0, 0, 0, 0}, + { 0, 2, 7, 0, 0, 0, 0, 0}, + { 1, 2, 7, 0, 0, 0, 0, 0}, + { 0, 1, 2, 7, 0, 0, 0, 0}, + { 3, 7, 0, 0, 0, 0, 0, 0}, + { 0, 3, 7, 0, 0, 0, 0, 0}, + { 1, 3, 7, 0, 0, 0, 0, 0}, + { 0, 1, 3, 7, 0, 0, 0, 0}, + { 2, 3, 7, 0, 0, 0, 0, 0}, + { 0, 2, 3, 7, 0, 0, 0, 0}, + { 1, 2, 3, 7, 0, 0, 0, 0}, + { 0, 1, 2, 3, 7, 0, 0, 0}, + { 4, 7, 0, 0, 0, 0, 0, 0}, + { 0, 4, 7, 0, 0, 0, 0, 0}, + { 1, 4, 7, 0, 0, 0, 0, 0}, + { 0, 1, 4, 7, 0, 0, 0, 0}, + { 2, 4, 7, 0, 0, 0, 0, 0}, + { 0, 2, 4, 7, 0, 0, 0, 0}, + { 1, 2, 4, 7, 0, 0, 0, 0}, + { 0, 1, 2, 4, 7, 0, 0, 0}, + { 3, 4, 7, 0, 0, 0, 0, 0}, + { 0, 3, 4, 7, 0, 0, 0, 0}, + { 1, 3, 4, 7, 0, 0, 0, 0}, + { 0, 1, 3, 4, 7, 0, 0, 0}, + { 2, 3, 4, 7, 0, 0, 0, 0}, + { 0, 2, 3, 4, 7, 0, 0, 0}, + { 1, 2, 3, 4, 7, 0, 0, 0}, + { 0, 1, 2, 3, 4, 7, 0, 0}, + { 5, 7, 0, 0, 0, 0, 0, 0}, + { 0, 5, 7, 0, 0, 0, 0, 0}, + { 1, 5, 7, 0, 0, 0, 0, 0}, + { 0, 1, 5, 7, 0, 0, 0, 0}, + { 2, 5, 7, 0, 0, 0, 0, 0}, + { 0, 2, 5, 7, 0, 0, 0, 0}, + { 1, 2, 5, 7, 0, 0, 0, 0}, + { 0, 1, 2, 5, 7, 0, 0, 0}, + { 3, 5, 7, 0, 0, 0, 0, 0}, + { 0, 3, 5, 7, 0, 0, 0, 0}, + { 1, 3, 5, 7, 0, 0, 0, 0}, + { 0, 1, 3, 5, 7, 0, 0, 0}, + { 2, 3, 5, 7, 0, 0, 0, 0}, + { 0, 2, 3, 5, 7, 0, 0, 0}, + { 1, 2, 3, 5, 7, 0, 0, 0}, + { 0, 1, 2, 3, 5, 7, 0, 0}, + { 4, 5, 7, 0, 0, 0, 0, 0}, + { 0, 4, 5, 7, 0, 0, 0, 0}, + { 1, 4, 5, 7, 0, 0, 0, 0}, + { 0, 1, 4, 5, 7, 0, 0, 0}, + { 2, 4, 5, 7, 0, 0, 0, 0}, + { 0, 2, 4, 5, 7, 0, 0, 0}, + { 1, 2, 4, 5, 7, 0, 0, 0}, + { 0, 1, 2, 4, 5, 7, 0, 0}, + { 3, 4, 5, 7, 0, 0, 0, 0}, + { 0, 3, 4, 5, 7, 0, 0, 0}, + { 1, 3, 4, 5, 7, 0, 0, 0}, + { 0, 1, 3, 4, 5, 7, 0, 0}, + { 2, 3, 4, 5, 7, 0, 0, 0}, + { 0, 2, 3, 4, 5, 7, 0, 0}, + { 1, 2, 3, 4, 5, 7, 0, 0}, + { 0, 1, 2, 3, 4, 5, 7, 0}, + { 6, 7, 0, 0, 0, 0, 0, 0}, + { 0, 6, 7, 0, 0, 0, 0, 0}, + { 1, 6, 7, 0, 0, 0, 0, 0}, + { 0, 1, 6, 7, 0, 0, 0, 0}, + { 2, 6, 7, 0, 0, 0, 0, 0}, + { 0, 2, 6, 7, 0, 0, 0, 0}, + { 1, 2, 6, 7, 0, 0, 0, 0}, + { 0, 1, 2, 6, 7, 0, 0, 0}, + { 3, 6, 7, 0, 0, 0, 0, 0}, + { 0, 3, 6, 7, 0, 0, 0, 0}, + { 1, 3, 6, 7, 0, 0, 0, 0}, + { 0, 1, 3, 6, 7, 0, 0, 0}, + { 2, 3, 6, 7, 0, 0, 0, 0}, + { 0, 2, 3, 6, 7, 0, 0, 0}, + { 1, 2, 3, 6, 7, 0, 0, 0}, + { 0, 1, 2, 3, 6, 7, 0, 0}, + { 4, 6, 7, 0, 0, 0, 0, 0}, + { 0, 4, 6, 7, 0, 0, 0, 0}, + { 1, 4, 6, 7, 0, 0, 0, 0}, + { 0, 1, 4, 6, 7, 0, 0, 0}, + { 2, 4, 6, 7, 0, 0, 0, 0}, + { 0, 2, 4, 6, 7, 0, 0, 0}, + { 1, 2, 4, 6, 7, 0, 0, 0}, + { 0, 1, 2, 4, 6, 7, 0, 0}, + { 3, 4, 6, 7, 0, 0, 0, 0}, + { 0, 3, 4, 6, 7, 0, 0, 0}, + { 1, 3, 4, 6, 7, 0, 0, 0}, + { 0, 1, 3, 4, 6, 7, 0, 0}, + { 2, 3, 4, 6, 7, 0, 0, 0}, + { 0, 2, 3, 4, 6, 7, 0, 0}, + { 1, 2, 3, 4, 6, 7, 0, 0}, + { 0, 1, 2, 3, 4, 6, 7, 0}, + { 5, 6, 7, 0, 0, 0, 0, 0}, + { 0, 5, 6, 7, 0, 0, 0, 0}, + { 1, 5, 6, 7, 0, 0, 0, 0}, + { 0, 1, 5, 6, 7, 0, 0, 0}, + { 2, 5, 6, 7, 0, 0, 0, 0}, + { 0, 2, 5, 6, 7, 0, 0, 0}, + { 1, 2, 5, 6, 7, 0, 0, 0}, + { 0, 1, 2, 5, 6, 7, 0, 0}, + { 3, 5, 6, 7, 0, 0, 0, 0}, + { 0, 3, 5, 6, 7, 0, 0, 0}, + { 1, 3, 5, 6, 7, 0, 0, 0}, + { 0, 1, 3, 5, 6, 7, 0, 0}, + { 2, 3, 5, 6, 7, 0, 0, 0}, + { 0, 2, 3, 5, 6, 7, 0, 0}, + { 1, 2, 3, 5, 6, 7, 0, 0}, + { 0, 1, 2, 3, 5, 6, 7, 0}, + { 4, 5, 6, 7, 0, 0, 0, 0}, + { 0, 4, 5, 6, 7, 0, 0, 0}, + { 1, 4, 5, 6, 7, 0, 0, 0}, + { 0, 1, 4, 5, 6, 7, 0, 0}, + { 2, 4, 5, 6, 7, 0, 0, 0}, + { 0, 2, 4, 5, 6, 7, 0, 0}, + { 1, 2, 4, 5, 6, 7, 0, 0}, + { 0, 1, 2, 4, 5, 6, 7, 0}, + { 3, 4, 5, 6, 7, 0, 0, 0}, + { 0, 3, 4, 5, 6, 7, 0, 0}, + { 1, 3, 4, 5, 6, 7, 0, 0}, + { 0, 1, 3, 4, 5, 6, 7, 0}, + { 2, 3, 4, 5, 6, 7, 0, 0}, + { 0, 2, 3, 4, 5, 6, 7, 0}, + { 1, 2, 3, 4, 5, 6, 7, 0}, + { 0, 1, 2, 3, 4, 5, 6, 7} +}; + +unsigned int PQCLEAN_DILITHIUM2_AVX2_rej_uniform_avx(int32_t *restrict r, const uint8_t buf[REJ_UNIFORM_BUFLEN + 8]) { + unsigned int ctr, pos; + uint32_t good; + __m256i d, tmp; + const __m256i bound = _mm256_set1_epi32(Q); + const __m256i mask = _mm256_set1_epi32(0x7FFFFF); + const __m256i idx8 = _mm256_set_epi8(-1, 15, 14, 13, -1, 12, 11, 10, + -1, 9, 8, 7, -1, 6, 5, 4, + -1, 11, 10, 9, -1, 8, 7, 6, + -1, 5, 4, 3, -1, 2, 1, 0); + + ctr = pos = 0; + while (pos <= REJ_UNIFORM_BUFLEN - 24) { + d = _mm256_loadu_si256((__m256i *)&buf[pos]); + d = _mm256_permute4x64_epi64(d, 0x94); + d = _mm256_shuffle_epi8(d, idx8); + d = _mm256_and_si256(d, mask); + pos += 24; + + tmp = _mm256_sub_epi32(d, bound); + good = _mm256_movemask_ps((__m256)tmp); + tmp = _mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i *)&PQCLEAN_DILITHIUM2_AVX2_idxlut[good])); + d = _mm256_permutevar8x32_epi32(d, tmp); + + _mm256_storeu_si256((__m256i *)&r[ctr], d); + ctr += _mm_popcnt_u32(good); + + if (ctr > N - 8) { + break; + } + } + + uint32_t t; + while (ctr < N && pos <= REJ_UNIFORM_BUFLEN - 3) { + t = buf[pos++]; + t |= (uint32_t)buf[pos++] << 8; + t |= (uint32_t)buf[pos++] << 16; + t &= 0x7FFFFF; + + if (t < Q) { + r[ctr++] = t; + } + } + + return ctr; +} + +unsigned int PQCLEAN_DILITHIUM2_AVX2_rej_eta_avx(int32_t *restrict r, const uint8_t buf[REJ_UNIFORM_ETA_BUFLEN]) { + unsigned int ctr, pos; + uint32_t good; + __m256i f0, f1, f2; + __m128i g0, g1; + const __m256i mask = _mm256_set1_epi8(15); + const __m256i eta = _mm256_set1_epi8(ETA); + const __m256i bound = mask; + const __m256i v = _mm256_set1_epi32(-6560); + const __m256i p = _mm256_set1_epi32(5); + + ctr = pos = 0; + while (ctr <= N - 8 && pos <= REJ_UNIFORM_ETA_BUFLEN - 16) { + f0 = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i *)&buf[pos])); + f1 = _mm256_slli_epi16(f0, 4); + f0 = _mm256_or_si256(f0, f1); + f0 = _mm256_and_si256(f0, mask); + + f1 = _mm256_sub_epi8(f0, bound); + f0 = _mm256_sub_epi8(eta, f0); + good = _mm256_movemask_epi8(f1); + + g0 = _mm256_castsi256_si128(f0); + g1 = _mm_loadl_epi64((__m128i *)&PQCLEAN_DILITHIUM2_AVX2_idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0, g1); + f1 = _mm256_cvtepi8_epi32(g1); + f2 = _mm256_mulhrs_epi16(f1, v); + f2 = _mm256_mullo_epi16(f2, p); + f1 = _mm256_add_epi32(f1, f2); + _mm256_storeu_si256((__m256i *)&r[ctr], f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if (ctr > N - 8) { + break; + } + g0 = _mm_bsrli_si128(g0, 8); + g1 = _mm_loadl_epi64((__m128i *)&PQCLEAN_DILITHIUM2_AVX2_idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0, g1); + f1 = _mm256_cvtepi8_epi32(g1); + f2 = _mm256_mulhrs_epi16(f1, v); + f2 = _mm256_mullo_epi16(f2, p); + f1 = _mm256_add_epi32(f1, f2); + _mm256_storeu_si256((__m256i *)&r[ctr], f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if (ctr > N - 8) { + break; + } + g0 = _mm256_extracti128_si256(f0, 1); + g1 = _mm_loadl_epi64((__m128i *)&PQCLEAN_DILITHIUM2_AVX2_idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0, g1); + f1 = _mm256_cvtepi8_epi32(g1); + f2 = _mm256_mulhrs_epi16(f1, v); + f2 = _mm256_mullo_epi16(f2, p); + f1 = _mm256_add_epi32(f1, f2); + _mm256_storeu_si256((__m256i *)&r[ctr], f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if (ctr > N - 8) { + break; + } + g0 = _mm_bsrli_si128(g0, 8); + g1 = _mm_loadl_epi64((__m128i *)&PQCLEAN_DILITHIUM2_AVX2_idxlut[good]); + g1 = _mm_shuffle_epi8(g0, g1); + f1 = _mm256_cvtepi8_epi32(g1); + f2 = _mm256_mulhrs_epi16(f1, v); + f2 = _mm256_mullo_epi16(f2, p); + f1 = _mm256_add_epi32(f1, f2); + _mm256_storeu_si256((__m256i *)&r[ctr], f1); + ctr += _mm_popcnt_u32(good); + pos += 4; + } + + uint32_t t0, t1; + while (ctr < N && pos < REJ_UNIFORM_ETA_BUFLEN) { + t0 = buf[pos] & 0x0F; + t1 = buf[pos++] >> 4; + + if (t0 < 15) { + t0 = t0 - (205 * t0 >> 10) * 5; + r[ctr++] = 2 - t0; + } + if (t1 < 15 && ctr < N) { + t1 = t1 - (205 * t1 >> 10) * 5; + r[ctr++] = 2 - t1; + } + } + + return ctr; +} diff --git a/crypto_sign/dilithium/dilithium2/avx2/rejsample.h b/crypto_sign/dilithium/dilithium2/avx2/rejsample.h new file mode 100644 index 00000000..f7f3cbb3 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/rejsample.h @@ -0,0 +1,19 @@ +#ifndef PQCLEAN_DILITHIUM2_AVX2_REJSAMPLE_H +#define PQCLEAN_DILITHIUM2_AVX2_REJSAMPLE_H +#include "params.h" +#include "symmetric.h" +#include + +#define REJ_UNIFORM_NBLOCKS ((768+STREAM128_BLOCKBYTES-1)/STREAM128_BLOCKBYTES) +#define REJ_UNIFORM_BUFLEN (REJ_UNIFORM_NBLOCKS*STREAM128_BLOCKBYTES) + +#define REJ_UNIFORM_ETA_NBLOCKS ((137+STREAM128_BLOCKBYTES-1)/STREAM128_BLOCKBYTES) +#define REJ_UNIFORM_ETA_BUFLEN (REJ_UNIFORM_ETA_NBLOCKS*STREAM128_BLOCKBYTES) + +extern const uint8_t PQCLEAN_DILITHIUM2_AVX2_idxlut[256][8]; + +unsigned int PQCLEAN_DILITHIUM2_AVX2_rej_uniform_avx(int32_t *r, const uint8_t buf[REJ_UNIFORM_BUFLEN + 8]); + +unsigned int PQCLEAN_DILITHIUM2_AVX2_rej_eta_avx(int32_t *r, const uint8_t buf[REJ_UNIFORM_BUFLEN]); + +#endif diff --git a/crypto_sign/dilithium/dilithium2/avx2/rounding.c b/crypto_sign/dilithium/dilithium2/avx2/rounding.c new file mode 100644 index 00000000..6a13031c --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/rounding.c @@ -0,0 +1,157 @@ +#include "consts.h" +#include "params.h" +#include "rejsample.h" +#include "rounding.h" +#include +#include +#include + +#define _mm256_blendv_epi32(a,b,mask) \ + _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(a), \ + _mm256_castsi256_ps(b), \ + _mm256_castsi256_ps(mask))) + +/************************************************* +* Name: power2round +* +* Description: For finite field elements a, compute a0, a1 such that +* a mod^+ Q = a1*2^D + a0 with -2^{D-1} < a0 <= 2^{D-1}. +* Assumes a to be positive standard representative. +* +* Arguments: - __m256i *a1: output array of length N/8 with high bits +* - __m256i *a0: output array of length N/8 with low bits a0 +* - const __m256i *a: input array of length N/8 +* +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_power2round_avx(__m256i *a1, __m256i *a0, const __m256i *a) { + unsigned int i; + __m256i f, f0, f1; + const __m256i mask = _mm256_set1_epi32(-(1 << D)); + const __m256i half = _mm256_set1_epi32((1 << (D - 1)) - 1); + + for (i = 0; i < N / 8; ++i) { + f = _mm256_load_si256(&a[i]); + f1 = _mm256_add_epi32(f, half); + f0 = _mm256_and_si256(f1, mask); + f1 = _mm256_srli_epi32(f1, D); + f0 = _mm256_sub_epi32(f, f0); + _mm256_store_si256(&a1[i], f1); + _mm256_store_si256(&a0[i], f0); + } +} + +/************************************************* +* Name: decompose +* +* Description: For finite field element a, compute high and low parts a0, a1 such +* that a mod^+ Q = a1*ALPHA + a0 with -ALPHA/2 < a0 <= ALPHA/2 except +* if a1 = (Q-1)/ALPHA where we set a1 = 0 and +* -ALPHA/2 <= a0 = a mod Q - Q < 0. Assumes a to be positive standard +* representative. +* +* Arguments: - __m256i *a1: output array of length N/8 with high parts +* - __m256i *a0: output array of length N/8 with low parts a0 +* - const __m256i *a: input array of length N/8 +* +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_decompose_avx(__m256i *a1, __m256i *a0, const __m256i *a) { + unsigned int i; + __m256i f, f0, f1, t; + const __m256i q = _mm256_load_si256(&PQCLEAN_DILITHIUM2_AVX2_qdata.vec[_8XQ / 8]); + const __m256i hq = _mm256_srli_epi32(q, 1); + const __m256i v = _mm256_set1_epi32(11275); + const __m256i alpha = _mm256_set1_epi32(2 * GAMMA2); + const __m256i off = _mm256_set1_epi32(127); + const __m256i shift = _mm256_set1_epi32(128); + const __m256i max = _mm256_set1_epi32(43); + const __m256i zero = _mm256_setzero_si256(); + + for (i = 0; i < N / 8; i++) { + f = _mm256_load_si256(&a[i]); + f1 = _mm256_add_epi32(f, off); + f1 = _mm256_srli_epi32(f1, 7); + f1 = _mm256_mulhi_epu16(f1, v); + f1 = _mm256_mulhrs_epi16(f1, shift); + t = _mm256_sub_epi32(max, f1); + f1 = _mm256_blendv_epi32(f1, zero, t); + f0 = _mm256_mullo_epi32(f1, alpha); + f0 = _mm256_sub_epi32(f, f0); + f = _mm256_cmpgt_epi32(f0, hq); + f = _mm256_and_si256(f, q); + f0 = _mm256_sub_epi32(f0, f); + _mm256_store_si256(&a1[i], f1); + _mm256_store_si256(&a0[i], f0); + } +} + +/************************************************* +* Name: make_hint +* +* Description: Compute indices of polynomial coefficients whose low bits +* overflow into the high bits. +* +* Arguments: - uint8_t *hint: hint array +* - const __m256i *a0: low bits of input elements +* - const __m256i *a1: high bits of input elements +* +* Returns number of overflowing low bits +**************************************************/ +unsigned int PQCLEAN_DILITHIUM2_AVX2_make_hint_avx(uint8_t hint[N], const __m256i *restrict a0, const __m256i *restrict a1) { + unsigned int i, n = 0; + __m256i f0, f1, g0, g1; + uint32_t bad; + uint64_t idx; + const __m256i low = _mm256_set1_epi32(-GAMMA2); + const __m256i high = _mm256_set1_epi32(GAMMA2); + + for (i = 0; i < N / 8; ++i) { + f0 = _mm256_load_si256(&a0[i]); + f1 = _mm256_load_si256(&a1[i]); + g0 = _mm256_abs_epi32(f0); + g0 = _mm256_cmpgt_epi32(g0, high); + g1 = _mm256_cmpeq_epi32(f0, low); + g1 = _mm256_sign_epi32(g1, f1); + g0 = _mm256_or_si256(g0, g1); + + bad = _mm256_movemask_ps((__m256)g0); + memcpy(&idx, PQCLEAN_DILITHIUM2_AVX2_idxlut[bad], 8); + idx += (uint64_t)0x0808080808080808 * i; + memcpy(&hint[n], &idx, 8); + n += _mm_popcnt_u32(bad); + } + + return n; +} + +/************************************************* +* Name: use_hint +* +* Description: Correct high parts according to hint. +* +* Arguments: - __m256i *b: output array of length N/8 with corrected high parts +* - const __m256i *a: input array of length N/8 +* - const __m256i *a: input array of length N/8 with hint bits +* +**************************************************/ +void PQCLEAN_DILITHIUM2_AVX2_use_hint_avx(__m256i *b, const __m256i *a, const __m256i *restrict hint) { + unsigned int i; + __m256i a0[N / 8]; + __m256i f, g, h, t; + const __m256i zero = _mm256_setzero_si256(); + const __m256i max = _mm256_set1_epi32(43); + + PQCLEAN_DILITHIUM2_AVX2_decompose_avx(b, a0, a); + for (i = 0; i < N / 8; i++) { + f = _mm256_load_si256(&a0[i]); + g = _mm256_load_si256(&b[i]); + h = _mm256_load_si256(&hint[i]); + t = _mm256_blendv_epi32(zero, h, f); + t = _mm256_slli_epi32(t, 1); + h = _mm256_sub_epi32(h, t); + g = _mm256_add_epi32(g, h); + g = _mm256_blendv_epi32(g, max, g); + f = _mm256_cmpgt_epi32(g, max); + g = _mm256_blendv_epi32(g, zero, f); + _mm256_store_si256(&b[i], g); + } +} diff --git a/crypto_sign/dilithium/dilithium2/avx2/rounding.h b/crypto_sign/dilithium/dilithium2/avx2/rounding.h new file mode 100644 index 00000000..6d200335 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/rounding.h @@ -0,0 +1,12 @@ +#ifndef PQCLEAN_DILITHIUM2_AVX2_ROUNDING_H +#define PQCLEAN_DILITHIUM2_AVX2_ROUNDING_H +#include "params.h" +#include +#include + +void PQCLEAN_DILITHIUM2_AVX2_power2round_avx(__m256i *a1, __m256i *a0, const __m256i *a); +void PQCLEAN_DILITHIUM2_AVX2_decompose_avx(__m256i *a1, __m256i *a0, const __m256i *a); +unsigned int PQCLEAN_DILITHIUM2_AVX2_make_hint_avx(uint8_t hint[N], const __m256i *a0, const __m256i *a1); +void PQCLEAN_DILITHIUM2_AVX2_use_hint_avx(__m256i *b, const __m256i *a, const __m256i *hint); + +#endif diff --git a/crypto_sign/dilithium/dilithium2/avx2/shuffle.S b/crypto_sign/dilithium/dilithium2/avx2/shuffle.S new file mode 100644 index 00000000..fd8eece7 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/shuffle.S @@ -0,0 +1,54 @@ +#include "cdecl.h" +.include "shuffle.inc" + +.text +nttunpack128_avx: +#load +vmovdqa (%rdi),%ymm4 +vmovdqa 32(%rdi),%ymm5 +vmovdqa 64(%rdi),%ymm6 +vmovdqa 96(%rdi),%ymm7 +vmovdqa 128(%rdi),%ymm8 +vmovdqa 160(%rdi),%ymm9 +vmovdqa 192(%rdi),%ymm10 +vmovdqa 224(%rdi),%ymm11 + +shuffle8 4,8,3,8 +shuffle8 5,9,4,9 +shuffle8 6,10,5,10 +shuffle8 7,11,6,11 + +shuffle4 3,5,7,5 +shuffle4 8,10,3,10 +shuffle4 4,6,8,6 +shuffle4 9,11,4,11 + +shuffle2 7,8,9,8 +shuffle2 5,6,7,6 +shuffle2 3,4,5,4 +shuffle2 10,11,3,11 + +#store +vmovdqa %ymm9,(%rdi) +vmovdqa %ymm8,32(%rdi) +vmovdqa %ymm7,64(%rdi) +vmovdqa %ymm6,96(%rdi) +vmovdqa %ymm5,128(%rdi) +vmovdqa %ymm4,160(%rdi) +vmovdqa %ymm3,192(%rdi) +vmovdqa %ymm11,224(%rdi) + +ret + +.global cdecl(PQCLEAN_DILITHIUM2_AVX2_nttunpack_avx) +.global _cdecl(PQCLEAN_DILITHIUM2_AVX2_nttunpack_avx) +cdecl(PQCLEAN_DILITHIUM2_AVX2_nttunpack_avx): +_cdecl(PQCLEAN_DILITHIUM2_AVX2_nttunpack_avx): +call nttunpack128_avx +add $256,%rdi +call nttunpack128_avx +add $256,%rdi +call nttunpack128_avx +add $256,%rdi +call nttunpack128_avx +ret diff --git a/crypto_sign/dilithium/dilithium2/avx2/shuffle.inc b/crypto_sign/dilithium/dilithium2/avx2/shuffle.inc new file mode 100644 index 00000000..73e9ffe0 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/shuffle.inc @@ -0,0 +1,25 @@ +.macro shuffle8 r0,r1,r2,r3 +vperm2i128 $0x20,%ymm\r1,%ymm\r0,%ymm\r2 +vperm2i128 $0x31,%ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle4 r0,r1,r2,r3 +vpunpcklqdq %ymm\r1,%ymm\r0,%ymm\r2 +vpunpckhqdq %ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle2 r0,r1,r2,r3 +#vpsllq $32,%ymm\r1,%ymm\r2 +vmovsldup %ymm\r1,%ymm\r2 +vpblendd $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 +vpsrlq $32,%ymm\r0,%ymm\r0 +#vmovshdup %ymm\r0,%ymm\r0 +vpblendd $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle1 r0,r1,r2,r3 +vpslld $16,%ymm\r1,%ymm\r2 +vpblendw $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 +vpsrld $16,%ymm\r0,%ymm\r0 +vpblendw $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 +.endm diff --git a/crypto_sign/dilithium/dilithium2/avx2/sign.c b/crypto_sign/dilithium/dilithium2/avx2/sign.c new file mode 100644 index 00000000..5dcf7261 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/sign.c @@ -0,0 +1,415 @@ +#include "align.h" +#include "fips202.h" +#include "packing.h" +#include "params.h" +#include "poly.h" +#include "polyvec.h" +#include "randombytes.h" +#include "sign.h" +#include "symmetric.h" +#include +#include + +static inline void polyvec_matrix_expand_row(polyvecl **row, polyvecl buf[2], const uint8_t rho[SEEDBYTES], unsigned int i) { + switch (i) { + case 0: + PQCLEAN_DILITHIUM2_AVX2_polyvec_matrix_expand_row0(buf, buf + 1, rho); + *row = buf; + break; + case 1: + PQCLEAN_DILITHIUM2_AVX2_polyvec_matrix_expand_row1(buf + 1, buf, rho); + *row = buf + 1; + break; + case 2: + PQCLEAN_DILITHIUM2_AVX2_polyvec_matrix_expand_row2(buf, buf + 1, rho); + *row = buf; + break; + case 3: + PQCLEAN_DILITHIUM2_AVX2_polyvec_matrix_expand_row3(buf + 1, buf, rho); + *row = buf + 1; + break; + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_crypto_sign_keypair +* +* Description: Generates public and private key. +* +* Arguments: - uint8_t *pk: pointer to output public key (allocated +* array of PQCLEAN_DILITHIUM2_AVX2_CRYPTO_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key (allocated +* array of PQCLEAN_DILITHIUM2_AVX2_CRYPTO_SECRETKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int PQCLEAN_DILITHIUM2_AVX2_crypto_sign_keypair(uint8_t *pk, uint8_t *sk) { + unsigned int i; + uint8_t seedbuf[3 * SEEDBYTES]; + const uint8_t *rho, *rhoprime, *key; + polyvecl rowbuf[2]; + polyvecl s1, *row = rowbuf; + polyveck s2; + poly t1, t0; + + /* Get randomness for rho, rhoprime and key */ + randombytes(seedbuf, SEEDBYTES); + shake256(seedbuf, 3 * SEEDBYTES, seedbuf, SEEDBYTES); + rho = seedbuf; + rhoprime = seedbuf + SEEDBYTES; + key = seedbuf + 2 * SEEDBYTES; + + /* Store rho, key */ + memcpy(pk, rho, SEEDBYTES); + memcpy(sk, rho, SEEDBYTES); + memcpy(sk + SEEDBYTES, key, SEEDBYTES); + + /* Sample short vectors s1 and s2 */ + PQCLEAN_DILITHIUM2_AVX2_poly_uniform_eta_4x(&s1.vec[0], &s1.vec[1], &s1.vec[2], &s1.vec[3], rhoprime, 0, 1, 2, 3); + PQCLEAN_DILITHIUM2_AVX2_poly_uniform_eta_4x(&s2.vec[0], &s2.vec[1], &s2.vec[2], &s2.vec[3], rhoprime, 4, 5, 6, 7); + + /* Pack secret vectors */ + for (i = 0; i < L; i++) { + PQCLEAN_DILITHIUM2_AVX2_polyeta_pack(sk + 2 * SEEDBYTES + CRHBYTES + i * POLYETA_PACKEDBYTES, &s1.vec[i]); + } + for (i = 0; i < K; i++) { + PQCLEAN_DILITHIUM2_AVX2_polyeta_pack(sk + 2 * SEEDBYTES + CRHBYTES + (L + i)*POLYETA_PACKEDBYTES, &s2.vec[i]); + } + + /* Transform s1 */ + PQCLEAN_DILITHIUM2_AVX2_polyvecl_ntt(&s1); + + + for (i = 0; i < K; i++) { + /* Expand matrix row */ + polyvec_matrix_expand_row(&row, rowbuf, rho, i); + + /* Compute inner-product */ + PQCLEAN_DILITHIUM2_AVX2_polyvecl_pointwise_acc_montgomery(&t1, row, &s1); + PQCLEAN_DILITHIUM2_AVX2_poly_invntt_tomont(&t1); + + /* Add error polynomial */ + PQCLEAN_DILITHIUM2_AVX2_poly_add(&t1, &t1, &s2.vec[i]); + + /* Round t and pack t1, t0 */ + PQCLEAN_DILITHIUM2_AVX2_poly_caddq(&t1); + PQCLEAN_DILITHIUM2_AVX2_poly_power2round(&t1, &t0, &t1); + PQCLEAN_DILITHIUM2_AVX2_polyt1_pack(pk + SEEDBYTES + i * POLYT1_PACKEDBYTES, &t1); + PQCLEAN_DILITHIUM2_AVX2_polyt0_pack(sk + 2 * SEEDBYTES + CRHBYTES + (L + K)*POLYETA_PACKEDBYTES + i * POLYT0_PACKEDBYTES, &t0); + } + + /* Compute CRH(rho, t1) and store in secret key */ + crh(sk + 2 * SEEDBYTES, pk, PQCLEAN_DILITHIUM2_AVX2_CRYPTO_PUBLICKEYBYTES); + + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_crypto_sign_signature +* +* Description: Computes signature. +* +* Arguments: - uint8_t *sig: pointer to output signature (of length PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES) +* - size_t *siglen: pointer to output length of signature +* - uint8_t *m: pointer to message to be signed +* - size_t mlen: length of message +* - uint8_t *sk: pointer to bit-packed secret key +* +* Returns 0 (success) +**************************************************/ +int PQCLEAN_DILITHIUM2_AVX2_crypto_sign_signature(uint8_t *sig, size_t *siglen, const uint8_t *m, size_t mlen, const uint8_t *sk) { + unsigned int i, n, pos; + uint8_t seedbuf[2 * SEEDBYTES + 3 * CRHBYTES]; + uint8_t *rho, *tr, *key, *mu, *rhoprime; + uint8_t hintbuf[N]; + uint8_t *hint = sig + SEEDBYTES + L * POLYZ_PACKEDBYTES; + uint64_t nonce = 0; + polyvecl mat[K], s1, z; + polyveck t0, s2, w1; + poly c, tmp; + union { + polyvecl y; + polyveck w0; + } tmpv; + shake256incctx state; + + rho = seedbuf; + tr = rho + SEEDBYTES; + key = tr + CRHBYTES; + mu = key + SEEDBYTES; + rhoprime = mu + CRHBYTES; + PQCLEAN_DILITHIUM2_AVX2_unpack_sk(rho, tr, key, &t0, &s1, &s2, sk); + + /* Compute CRH(tr, msg) */ + shake256_inc_init(&state); + shake256_inc_absorb(&state, tr, CRHBYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(mu, CRHBYTES, &state); + shake256_inc_ctx_release(&state); + + crh(rhoprime, key, SEEDBYTES + CRHBYTES); + + /* Expand matrix and transform vectors */ + PQCLEAN_DILITHIUM2_AVX2_polyvec_matrix_expand(mat, rho); + PQCLEAN_DILITHIUM2_AVX2_polyvecl_ntt(&s1); + PQCLEAN_DILITHIUM2_AVX2_polyveck_ntt(&s2); + PQCLEAN_DILITHIUM2_AVX2_polyveck_ntt(&t0); + + +rej: + /* Sample intermediate vector y */ + PQCLEAN_DILITHIUM2_AVX2_poly_uniform_gamma1_4x(&z.vec[0], &z.vec[1], &z.vec[2], &z.vec[3], + rhoprime, nonce, nonce + 1, nonce + 2, nonce + 3); + nonce += 4; + + /* Matrix-vector product */ + tmpv.y = z; + PQCLEAN_DILITHIUM2_AVX2_polyvecl_ntt(&tmpv.y); + PQCLEAN_DILITHIUM2_AVX2_polyvec_matrix_pointwise_montgomery(&w1, mat, &tmpv.y); + PQCLEAN_DILITHIUM2_AVX2_polyveck_invntt_tomont(&w1); + + /* Decompose w and call the random oracle */ + PQCLEAN_DILITHIUM2_AVX2_polyveck_caddq(&w1); + PQCLEAN_DILITHIUM2_AVX2_polyveck_decompose(&w1, &tmpv.w0, &w1); + PQCLEAN_DILITHIUM2_AVX2_polyveck_pack_w1(sig, &w1); + + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, sig, K * POLYW1_PACKEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(sig, SEEDBYTES, &state); + shake256_inc_ctx_release(&state); + PQCLEAN_DILITHIUM2_AVX2_poly_challenge(&c, sig); + PQCLEAN_DILITHIUM2_AVX2_poly_ntt(&c); + + /* Compute z, reject if it reveals secret */ + for (i = 0; i < L; i++) { + PQCLEAN_DILITHIUM2_AVX2_poly_pointwise_montgomery(&tmp, &c, &s1.vec[i]); + PQCLEAN_DILITHIUM2_AVX2_poly_invntt_tomont(&tmp); + PQCLEAN_DILITHIUM2_AVX2_poly_add(&z.vec[i], &z.vec[i], &tmp); + PQCLEAN_DILITHIUM2_AVX2_poly_reduce(&z.vec[i]); + if (PQCLEAN_DILITHIUM2_AVX2_poly_chknorm(&z.vec[i], GAMMA1 - BETA)) { + goto rej; + } + } + + /* Zero hint vector in signature */ + pos = 0; + memset(hint, 0, OMEGA); + + for (i = 0; i < K; i++) { + /* Check that subtracting cs2 does not change high bits of w and low bits + * do not reveal secret information */ + PQCLEAN_DILITHIUM2_AVX2_poly_pointwise_montgomery(&tmp, &c, &s2.vec[i]); + PQCLEAN_DILITHIUM2_AVX2_poly_invntt_tomont(&tmp); + PQCLEAN_DILITHIUM2_AVX2_poly_sub(&tmpv.w0.vec[i], &tmpv.w0.vec[i], &tmp); + PQCLEAN_DILITHIUM2_AVX2_poly_reduce(&tmpv.w0.vec[i]); + if (PQCLEAN_DILITHIUM2_AVX2_poly_chknorm(&tmpv.w0.vec[i], GAMMA2 - BETA)) { + goto rej; + } + + /* Compute hints */ + PQCLEAN_DILITHIUM2_AVX2_poly_pointwise_montgomery(&tmp, &c, &t0.vec[i]); + PQCLEAN_DILITHIUM2_AVX2_poly_invntt_tomont(&tmp); + PQCLEAN_DILITHIUM2_AVX2_poly_reduce(&tmp); + if (PQCLEAN_DILITHIUM2_AVX2_poly_chknorm(&tmp, GAMMA2)) { + goto rej; + } + + PQCLEAN_DILITHIUM2_AVX2_poly_add(&tmpv.w0.vec[i], &tmpv.w0.vec[i], &tmp); + n = PQCLEAN_DILITHIUM2_AVX2_poly_make_hint(hintbuf, &tmpv.w0.vec[i], &w1.vec[i]); + if (pos + n > OMEGA) { + goto rej; + } + + /* Store hints in signature */ + memcpy(&hint[pos], hintbuf, n); + hint[OMEGA + i] = pos = pos + n; + } + + /* Pack z into signature */ + for (i = 0; i < L; i++) { + PQCLEAN_DILITHIUM2_AVX2_polyz_pack(sig + SEEDBYTES + i * POLYZ_PACKEDBYTES, &z.vec[i]); + } + + *siglen = PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES; + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_crypto_sign +* +* Description: Compute signed message. +* +* Arguments: - uint8_t *sm: pointer to output signed message (allocated +* array with PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES + mlen bytes), +* can be equal to m +* - size_t *smlen: pointer to output length of signed +* message +* - const uint8_t *m: pointer to message to be signed +* - size_t mlen: length of message +* - const uint8_t *sk: pointer to bit-packed secret key +* +* Returns 0 (success) +**************************************************/ +int PQCLEAN_DILITHIUM2_AVX2_crypto_sign(uint8_t *sm, size_t *smlen, const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t i; + + for (i = 0; i < mlen; ++i) { + sm[PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES + mlen - 1 - i] = m[mlen - 1 - i]; + } + PQCLEAN_DILITHIUM2_AVX2_crypto_sign_signature(sm, smlen, sm + PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES, mlen, sk); + *smlen += mlen; + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_crypto_sign_verify +* +* Description: Verifies signature. +* +* Arguments: - uint8_t *m: pointer to input signature +* - size_t siglen: length of signature +* - const uint8_t *m: pointer to message +* - size_t mlen: length of message +* - const uint8_t *pk: pointer to bit-packed public key +* +* Returns 0 if signature could be verified correctly and -1 otherwise +**************************************************/ +int PQCLEAN_DILITHIUM2_AVX2_crypto_sign_verify(const uint8_t *sig, size_t siglen, const uint8_t *m, size_t mlen, const uint8_t *pk) { + unsigned int i, j, pos = 0; + /* PQCLEAN_DILITHIUM2_AVX2_polyw1_pack writes additional 14 bytes */ + ALIGNED_UINT8(K * POLYW1_PACKEDBYTES + 14) buf; + uint8_t mu[CRHBYTES]; + const uint8_t *hint = sig + SEEDBYTES + L * POLYZ_PACKEDBYTES; + polyvecl rowbuf[2]; + polyvecl *row = rowbuf; + polyvecl z; + poly c, w1, h; + shake256incctx state; + + if (siglen != PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES) { + return -1; + } + + /* Compute CRH(CRH(rho, t1), msg) */ + crh(mu, pk, PQCLEAN_DILITHIUM2_AVX2_CRYPTO_PUBLICKEYBYTES); + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(mu, CRHBYTES, &state); + shake256_inc_ctx_release(&state); + + /* Expand PQCLEAN_DILITHIUM2_AVX2_challenge */ + PQCLEAN_DILITHIUM2_AVX2_poly_challenge(&c, sig); + PQCLEAN_DILITHIUM2_AVX2_poly_ntt(&c); + + /* Unpack z; shortness follows from unpacking */ + for (i = 0; i < L; i++) { + PQCLEAN_DILITHIUM2_AVX2_polyz_unpack(&z.vec[i], sig + SEEDBYTES + i * POLYZ_PACKEDBYTES); + PQCLEAN_DILITHIUM2_AVX2_poly_ntt(&z.vec[i]); + } + + + for (i = 0; i < K; i++) { + /* Expand matrix row */ + polyvec_matrix_expand_row(&row, rowbuf, pk, i); + + /* Compute i-th row of Az - c2^Dt1 */ + PQCLEAN_DILITHIUM2_AVX2_polyvecl_pointwise_acc_montgomery(&w1, row, &z); + + PQCLEAN_DILITHIUM2_AVX2_polyt1_unpack(&h, pk + SEEDBYTES + i * POLYT1_PACKEDBYTES); + PQCLEAN_DILITHIUM2_AVX2_poly_shiftl(&h); + PQCLEAN_DILITHIUM2_AVX2_poly_ntt(&h); + PQCLEAN_DILITHIUM2_AVX2_poly_pointwise_montgomery(&h, &c, &h); + + PQCLEAN_DILITHIUM2_AVX2_poly_sub(&w1, &w1, &h); + PQCLEAN_DILITHIUM2_AVX2_poly_reduce(&w1); + PQCLEAN_DILITHIUM2_AVX2_poly_invntt_tomont(&w1); + + /* Get hint polynomial and reconstruct w1 */ + memset(h.vec, 0, sizeof(poly)); + if (hint[OMEGA + i] < pos || hint[OMEGA + i] > OMEGA) { + return -1; + } + + for (j = pos; j < hint[OMEGA + i]; ++j) { + /* Coefficients are ordered for strong unforgeability */ + if (j > pos && hint[j] <= hint[j - 1]) { + return -1; + } + h.coeffs[hint[j]] = 1; + } + pos = hint[OMEGA + i]; + + PQCLEAN_DILITHIUM2_AVX2_poly_caddq(&w1); + PQCLEAN_DILITHIUM2_AVX2_poly_use_hint(&w1, &w1, &h); + PQCLEAN_DILITHIUM2_AVX2_polyw1_pack(buf.coeffs + i * POLYW1_PACKEDBYTES, &w1); + } + + /* Extra indices are zero for strong unforgeability */ + for (j = pos; j < OMEGA; ++j) { + if (hint[j]) { + return -1; + } + } + + /* Call random oracle and verify PQCLEAN_DILITHIUM2_AVX2_challenge */ + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, buf.coeffs, K * POLYW1_PACKEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf.coeffs, SEEDBYTES, &state); + shake256_inc_ctx_release(&state); + for (i = 0; i < SEEDBYTES; ++i) { + if (buf.coeffs[i] != sig[i]) { + return -1; + } + } + + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_AVX2_crypto_sign_open +* +* Description: Verify signed message. +* +* Arguments: - uint8_t *m: pointer to output message (allocated +* array with smlen bytes), can be equal to sm +* - size_t *mlen: pointer to output length of message +* - const uint8_t *sm: pointer to signed message +* - size_t smlen: length of signed message +* - const uint8_t *pk: pointer to bit-packed public key +* +* Returns 0 if signed message could be verified correctly and -1 otherwise +**************************************************/ +int PQCLEAN_DILITHIUM2_AVX2_crypto_sign_open(uint8_t *m, size_t *mlen, const uint8_t *sm, size_t smlen, const uint8_t *pk) { + size_t i; + + if (smlen < PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES) { + goto badsig; + } + + *mlen = smlen - PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES; + if (PQCLEAN_DILITHIUM2_AVX2_crypto_sign_verify(sm, PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES, sm + PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES, *mlen, pk)) { + goto badsig; + } else { + /* All good, copy msg, return 0 */ + for (i = 0; i < *mlen; ++i) { + m[i] = sm[PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES + i]; + } + return 0; + } + +badsig: + /* Signature verification failed */ + *mlen = -1; + for (i = 0; i < smlen; ++i) { + m[i] = 0; + } + + return -1; +} diff --git a/crypto_sign/dilithium/dilithium2/avx2/sign.h b/crypto_sign/dilithium/dilithium2/avx2/sign.h new file mode 100644 index 00000000..94dc98b1 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/sign.h @@ -0,0 +1,29 @@ +#ifndef PQCLEAN_DILITHIUM2_AVX2_SIGN_H +#define PQCLEAN_DILITHIUM2_AVX2_SIGN_H +#include "params.h" +#include "poly.h" +#include "polyvec.h" +#include +#include + +void PQCLEAN_DILITHIUM2_AVX2_challenge(poly *c, const uint8_t seed[SEEDBYTES]); + +int PQCLEAN_DILITHIUM2_AVX2_crypto_sign_keypair(uint8_t *pk, uint8_t *sk); + +int PQCLEAN_DILITHIUM2_AVX2_crypto_sign_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int PQCLEAN_DILITHIUM2_AVX2_crypto_sign(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int PQCLEAN_DILITHIUM2_AVX2_crypto_sign_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +int PQCLEAN_DILITHIUM2_AVX2_crypto_sign_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + +#endif diff --git a/crypto_sign/dilithium/dilithium2/avx2/symmetric-shake.c b/crypto_sign/dilithium/dilithium2/avx2/symmetric-shake.c new file mode 100644 index 00000000..803b8121 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/symmetric-shake.c @@ -0,0 +1,26 @@ +#include "fips202.h" +#include "params.h" +#include "symmetric.h" +#include + +void PQCLEAN_DILITHIUM2_AVX2_dilithium_shake128_stream_init(shake128incctx *state, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + uint8_t t[2]; + t[0] = (uint8_t) nonce; + t[1] = (uint8_t) (nonce >> 8); + + shake128_inc_init(state); + shake128_inc_absorb(state, seed, SEEDBYTES); + shake128_inc_absorb(state, t, 2); + shake128_inc_finalize(state); +} + +void PQCLEAN_DILITHIUM2_AVX2_dilithium_shake256_stream_init(shake256incctx *state, const uint8_t seed[CRHBYTES], uint16_t nonce) { + uint8_t t[2]; + t[0] = (uint8_t) nonce; + t[1] = (uint8_t) (nonce >> 8); + + shake256_inc_init(state); + shake256_inc_absorb(state, seed, CRHBYTES); + shake256_inc_absorb(state, t, 2); + shake256_inc_finalize(state); +} diff --git a/crypto_sign/dilithium/dilithium2/avx2/symmetric.h b/crypto_sign/dilithium/dilithium2/avx2/symmetric.h new file mode 100644 index 00000000..deb8cf6c --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/avx2/symmetric.h @@ -0,0 +1,36 @@ +#ifndef PQCLEAN_DILITHIUM2_AVX2_SYMMETRIC_H +#define PQCLEAN_DILITHIUM2_AVX2_SYMMETRIC_H +#include "fips202.h" +#include "params.h" +#include + + + +typedef shake128incctx stream128_state; +typedef shake256incctx stream256_state; + +void PQCLEAN_DILITHIUM2_AVX2_dilithium_shake128_stream_init(shake128incctx *state, + const uint8_t seed[SEEDBYTES], + uint16_t nonce); + +void PQCLEAN_DILITHIUM2_AVX2_dilithium_shake256_stream_init(shake256incctx *state, + const uint8_t seed[CRHBYTES], + uint16_t nonce); + +#define STREAM128_BLOCKBYTES SHAKE128_RATE +#define STREAM256_BLOCKBYTES SHAKE256_RATE + +#define crh(OUT, IN, INBYTES) shake256(OUT, CRHBYTES, IN, INBYTES) +#define stream128_init(STATE, SEED, NONCE) \ + PQCLEAN_DILITHIUM2_AVX2_dilithium_shake128_stream_init(STATE, SEED, NONCE) +#define stream128_squeezeblocks(OUT, OUTBLOCKS, STATE) \ + shake128_inc_squeeze(OUT, (OUTBLOCKS)*(SHAKE128_RATE), STATE) +#define stream128_release(STATE) shake128_inc_ctx_release(STATE) +#define stream256_init(STATE, SEED, NONCE) \ + PQCLEAN_DILITHIUM2_AVX2_dilithium_shake256_stream_init(STATE, SEED, NONCE) +#define stream256_squeezeblocks(OUT, OUTBLOCKS, STATE) \ + shake256_inc_squeeze(OUT, (OUTBLOCKS)*(SHAKE256_RATE), STATE) +#define stream256_release(STATE) shake256_inc_ctx_release(STATE) + + +#endif diff --git a/crypto_sign/dilithium/dilithium2/clean/LICENSE b/crypto_sign/dilithium/dilithium2/clean/LICENSE new file mode 100644 index 00000000..08473af7 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/clean/LICENSE @@ -0,0 +1,5 @@ +Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/) + +For Keccak and AES we are using public-domain +code from sources and by authors listed in +comments on top of the respective files. diff --git a/crypto_sign/dilithium/dilithium2/clean/Makefile.Microsoft_nmake b/crypto_sign/dilithium/dilithium2/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..410bd6ac --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/clean/Makefile.Microsoft_nmake @@ -0,0 +1,23 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libdilithium2_clean.lib +OBJECTS=ntt.obj packing.obj poly.obj polyvec.obj reduce.obj rounding.obj sign.obj symmetric-shake.obj + +# Warning C4146 is raised when a unary minus operator is applied to an +# unsigned type; this has nonetheless been standard and portable for as +# long as there has been a C standard, and we need it for constant-time +# computations. Thus, we disable that spurious warning. +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX /wd4146 + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/dilithium/dilithium2/clean/api.h b/crypto_sign/dilithium/dilithium2/clean/api.h new file mode 100644 index 00000000..dabeeae8 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/clean/api.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_DILITHIUM2_CLEAN_API_H +#define PQCLEAN_DILITHIUM2_CLEAN_API_H + +#include +#include + +#define PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_PUBLICKEYBYTES 1312 +#define PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_SECRETKEYBYTES 2544 +#define PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES 2420 +#define PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_ALGNAME "Dilithium2" + + +int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_keypair(uint8_t *pk, uint8_t *sk); + +int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/dilithium/dilithium2/clean/ntt.c b/crypto_sign/dilithium/dilithium2/clean/ntt.c new file mode 100644 index 00000000..af3c776b --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/clean/ntt.c @@ -0,0 +1,98 @@ +#include "ntt.h" +#include "params.h" +#include "reduce.h" +#include + +static const int32_t zetas[N] = { + 0, 25847, -2608894, -518909, 237124, -777960, -876248, 466468, + 1826347, 2353451, -359251, -2091905, 3119733, -2884855, 3111497, 2680103, + 2725464, 1024112, -1079900, 3585928, -549488, -1119584, 2619752, -2108549, + -2118186, -3859737, -1399561, -3277672, 1757237, -19422, 4010497, 280005, + 2706023, 95776, 3077325, 3530437, -1661693, -3592148, -2537516, 3915439, + -3861115, -3043716, 3574422, -2867647, 3539968, -300467, 2348700, -539299, + -1699267, -1643818, 3505694, -3821735, 3507263, -2140649, -1600420, 3699596, + 811944, 531354, 954230, 3881043, 3900724, -2556880, 2071892, -2797779, + -3930395, -1528703, -3677745, -3041255, -1452451, 3475950, 2176455, -1585221, + -1257611, 1939314, -4083598, -1000202, -3190144, -3157330, -3632928, 126922, + 3412210, -983419, 2147896, 2715295, -2967645, -3693493, -411027, -2477047, + -671102, -1228525, -22981, -1308169, -381987, 1349076, 1852771, -1430430, + -3343383, 264944, 508951, 3097992, 44288, -1100098, 904516, 3958618, + -3724342, -8578, 1653064, -3249728, 2389356, -210977, 759969, -1316856, + 189548, -3553272, 3159746, -1851402, -2409325, -177440, 1315589, 1341330, + 1285669, -1584928, -812732, -1439742, -3019102, -3881060, -3628969, 3839961, + 2091667, 3407706, 2316500, 3817976, -3342478, 2244091, -2446433, -3562462, + 266997, 2434439, -1235728, 3513181, -3520352, -3759364, -1197226, -3193378, + 900702, 1859098, 909542, 819034, 495491, -1613174, -43260, -522500, + -655327, -3122442, 2031748, 3207046, -3556995, -525098, -768622, -3595838, + 342297, 286988, -2437823, 4108315, 3437287, -3342277, 1735879, 203044, + 2842341, 2691481, -2590150, 1265009, 4055324, 1247620, 2486353, 1595974, + -3767016, 1250494, 2635921, -3548272, -2994039, 1869119, 1903435, -1050970, + -1333058, 1237275, -3318210, -1430225, -451100, 1312455, 3306115, -1962642, + -1279661, 1917081, -2546312, -1374803, 1500165, 777191, 2235880, 3406031, + -542412, -2831860, -1671176, -1846953, -2584293, -3724270, 594136, -3776993, + -2013608, 2432395, 2454455, -164721, 1957272, 3369112, 185531, -1207385, + -3183426, 162844, 1616392, 3014001, 810149, 1652634, -3694233, -1799107, + -3038916, 3523897, 3866901, 269760, 2213111, -975884, 1717735, 472078, + -426683, 1723600, -1803090, 1910376, -1667432, -1104333, -260646, -3833893, + -2939036, -2235985, -420899, -2286327, 183443, -976891, 1612842, -3545687, + -554416, 3919660, -48306, -1362209, 3937738, 1400424, -846154, 1976782 +}; + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_ntt +* +* Description: Forward NTT, in-place. No modular reduction is performed after +* additions or subtractions. Output vector is in bitreversed order. +* +* Arguments: - uint32_t p[N]: input/output coefficient array +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_ntt(int32_t a[N]) { + unsigned int len, start, j, k; + int32_t zeta, t; + + k = 0; + for (len = 128; len > 0; len >>= 1) { + for (start = 0; start < N; start = j + len) { + zeta = zetas[++k]; + for (j = start; j < start + len; ++j) { + t = PQCLEAN_DILITHIUM2_CLEAN_montgomery_reduce((int64_t)zeta * a[j + len]); + a[j + len] = a[j] - t; + a[j] = a[j] + t; + } + } + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_invntt_tomont +* +* Description: Inverse NTT and multiplication by Montgomery factor 2^32. +* In-place. No modular reductions after additions or +* subtractions; input coefficients need to be smaller than +* Q in absolute value. Output coefficient are smaller than Q in +* absolute value. +* +* Arguments: - uint32_t p[N]: input/output coefficient array +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_invntt_tomont(int32_t a[N]) { + unsigned int start, len, j, k; + int32_t t, zeta; + const int32_t f = 41978; // mont^2/256 + + k = 256; + for (len = 1; len < N; len <<= 1) { + for (start = 0; start < N; start = j + len) { + zeta = -zetas[--k]; + for (j = start; j < start + len; ++j) { + t = a[j]; + a[j] = t + a[j + len]; + a[j + len] = t - a[j + len]; + a[j + len] = PQCLEAN_DILITHIUM2_CLEAN_montgomery_reduce((int64_t)zeta * a[j + len]); + } + } + } + + for (j = 0; j < N; ++j) { + a[j] = PQCLEAN_DILITHIUM2_CLEAN_montgomery_reduce((int64_t)f * a[j]); + } +} diff --git a/crypto_sign/dilithium/dilithium2/clean/ntt.h b/crypto_sign/dilithium/dilithium2/clean/ntt.h new file mode 100644 index 00000000..3b0ff001 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/clean/ntt.h @@ -0,0 +1,10 @@ +#ifndef PQCLEAN_DILITHIUM2_CLEAN_NTT_H +#define PQCLEAN_DILITHIUM2_CLEAN_NTT_H +#include "params.h" +#include + +void PQCLEAN_DILITHIUM2_CLEAN_ntt(int32_t a[N]); + +void PQCLEAN_DILITHIUM2_CLEAN_invntt_tomont(int32_t a[N]); + +#endif diff --git a/crypto_sign/dilithium/dilithium2/clean/packing.c b/crypto_sign/dilithium/dilithium2/clean/packing.c new file mode 100644 index 00000000..b54c9646 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/clean/packing.c @@ -0,0 +1,261 @@ +#include "packing.h" +#include "params.h" +#include "poly.h" +#include "polyvec.h" + + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_pack_pk +* +* Description: Bit-pack public key pk = (rho, t1). +* +* Arguments: - uint8_t pk[]: output byte array +* - const uint8_t rho[]: byte array containing rho +* - const polyveck *t1: pointer to vector t1 +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_pack_pk(uint8_t pk[PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_PUBLICKEYBYTES], + const uint8_t rho[SEEDBYTES], + const polyveck *t1) { + unsigned int i; + + for (i = 0; i < SEEDBYTES; ++i) { + pk[i] = rho[i]; + } + pk += SEEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_polyt1_pack(pk + i * POLYT1_PACKEDBYTES, &t1->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_unpack_pk +* +* Description: Unpack public key pk = (rho, t1). +* +* Arguments: - const uint8_t rho[]: output byte array for rho +* - const polyveck *t1: pointer to output vector t1 +* - uint8_t pk[]: byte array containing bit-packed pk +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_unpack_pk(uint8_t rho[SEEDBYTES], + polyveck *t1, + const uint8_t pk[PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_PUBLICKEYBYTES]) { + unsigned int i; + + for (i = 0; i < SEEDBYTES; ++i) { + rho[i] = pk[i]; + } + pk += SEEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_polyt1_unpack(&t1->vec[i], pk + i * POLYT1_PACKEDBYTES); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_pack_sk +* +* Description: Bit-pack secret key sk = (rho, tr, key, t0, s1, s2). +* +* Arguments: - uint8_t sk[]: output byte array +* - const uint8_t rho[]: byte array containing rho +* - const uint8_t tr[]: byte array containing tr +* - const uint8_t key[]: byte array containing key +* - const polyveck *t0: pointer to vector t0 +* - const polyvecl *s1: pointer to vector s1 +* - const polyveck *s2: pointer to vector s2 +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_pack_sk(uint8_t sk[PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_SECRETKEYBYTES], + const uint8_t rho[SEEDBYTES], + const uint8_t tr[CRHBYTES], + const uint8_t key[SEEDBYTES], + const polyveck *t0, + const polyvecl *s1, + const polyveck *s2) { + unsigned int i; + + for (i = 0; i < SEEDBYTES; ++i) { + sk[i] = rho[i]; + } + sk += SEEDBYTES; + + for (i = 0; i < SEEDBYTES; ++i) { + sk[i] = key[i]; + } + sk += SEEDBYTES; + + for (i = 0; i < CRHBYTES; ++i) { + sk[i] = tr[i]; + } + sk += CRHBYTES; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_polyeta_pack(sk + i * POLYETA_PACKEDBYTES, &s1->vec[i]); + } + sk += L * POLYETA_PACKEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_polyeta_pack(sk + i * POLYETA_PACKEDBYTES, &s2->vec[i]); + } + sk += K * POLYETA_PACKEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_polyt0_pack(sk + i * POLYT0_PACKEDBYTES, &t0->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_unpack_sk +* +* Description: Unpack secret key sk = (rho, tr, key, t0, s1, s2). +* +* Arguments: - const uint8_t rho[]: output byte array for rho +* - const uint8_t tr[]: output byte array for tr +* - const uint8_t key[]: output byte array for key +* - const polyveck *t0: pointer to output vector t0 +* - const polyvecl *s1: pointer to output vector s1 +* - const polyveck *s2: pointer to output vector s2 +* - uint8_t sk[]: byte array containing bit-packed sk +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_unpack_sk(uint8_t rho[SEEDBYTES], + uint8_t tr[CRHBYTES], + uint8_t key[SEEDBYTES], + polyveck *t0, + polyvecl *s1, + polyveck *s2, + const uint8_t sk[PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_SECRETKEYBYTES]) { + unsigned int i; + + for (i = 0; i < SEEDBYTES; ++i) { + rho[i] = sk[i]; + } + sk += SEEDBYTES; + + for (i = 0; i < SEEDBYTES; ++i) { + key[i] = sk[i]; + } + sk += SEEDBYTES; + + for (i = 0; i < CRHBYTES; ++i) { + tr[i] = sk[i]; + } + sk += CRHBYTES; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_polyeta_unpack(&s1->vec[i], sk + i * POLYETA_PACKEDBYTES); + } + sk += L * POLYETA_PACKEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_polyeta_unpack(&s2->vec[i], sk + i * POLYETA_PACKEDBYTES); + } + sk += K * POLYETA_PACKEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_polyt0_unpack(&t0->vec[i], sk + i * POLYT0_PACKEDBYTES); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_pack_sig +* +* Description: Bit-pack signature sig = (c, z, h). +* +* Arguments: - uint8_t sig[]: output byte array +* - const uint8_t *c: pointer to PQCLEAN_DILITHIUM2_CLEAN_challenge hash length SEEDBYTES +* - const polyvecl *z: pointer to vector z +* - const polyveck *h: pointer to hint vector h +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_pack_sig(uint8_t sig[PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES], + const uint8_t c[SEEDBYTES], + const polyvecl *z, + const polyveck *h) { + unsigned int i, j, k; + + for (i = 0; i < SEEDBYTES; ++i) { + sig[i] = c[i]; + } + sig += SEEDBYTES; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_polyz_pack(sig + i * POLYZ_PACKEDBYTES, &z->vec[i]); + } + sig += L * POLYZ_PACKEDBYTES; + + /* Encode h */ + for (i = 0; i < OMEGA + K; ++i) { + sig[i] = 0; + } + + k = 0; + for (i = 0; i < K; ++i) { + for (j = 0; j < N; ++j) { + if (h->vec[i].coeffs[j] != 0) { + sig[k++] = (uint8_t) j; + } + } + + sig[OMEGA + i] = (uint8_t) k; + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_unpack_sig +* +* Description: Unpack signature sig = (c, z, h). +* +* Arguments: - uint8_t *c: pointer to output PQCLEAN_DILITHIUM2_CLEAN_challenge hash +* - polyvecl *z: pointer to output vector z +* - polyveck *h: pointer to output hint vector h +* - const uint8_t sig[]: byte array containing +* bit-packed signature +* +* Returns 1 in case of malformed signature; otherwise 0. +**************************************************/ +int PQCLEAN_DILITHIUM2_CLEAN_unpack_sig(uint8_t c[SEEDBYTES], + polyvecl *z, + polyveck *h, + const uint8_t sig[PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES]) { + unsigned int i, j, k; + + for (i = 0; i < SEEDBYTES; ++i) { + c[i] = sig[i]; + } + sig += SEEDBYTES; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_polyz_unpack(&z->vec[i], sig + i * POLYZ_PACKEDBYTES); + } + sig += L * POLYZ_PACKEDBYTES; + + /* Decode h */ + k = 0; + for (i = 0; i < K; ++i) { + for (j = 0; j < N; ++j) { + h->vec[i].coeffs[j] = 0; + } + + if (sig[OMEGA + i] < k || sig[OMEGA + i] > OMEGA) { + return 1; + } + + for (j = k; j < sig[OMEGA + i]; ++j) { + /* Coefficients are ordered for strong unforgeability */ + if (j > k && sig[j] <= sig[j - 1]) { + return 1; + } + h->vec[i].coeffs[sig[j]] = 1; + } + + k = sig[OMEGA + i]; + } + + /* Extra indices are zero for strong unforgeability */ + for (j = k; j < OMEGA; ++j) { + if (sig[j]) { + return 1; + } + } + + return 0; +} diff --git a/crypto_sign/dilithium/dilithium2/clean/packing.h b/crypto_sign/dilithium/dilithium2/clean/packing.h new file mode 100644 index 00000000..44a088fd --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/clean/packing.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_DILITHIUM2_CLEAN_PACKING_H +#define PQCLEAN_DILITHIUM2_CLEAN_PACKING_H +#include "params.h" +#include "polyvec.h" +#include + +void PQCLEAN_DILITHIUM2_CLEAN_pack_pk(uint8_t pk[PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_PUBLICKEYBYTES], const uint8_t rho[SEEDBYTES], const polyveck *t1); + +void PQCLEAN_DILITHIUM2_CLEAN_pack_sk(uint8_t sk[PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_SECRETKEYBYTES], + const uint8_t rho[SEEDBYTES], + const uint8_t tr[CRHBYTES], + const uint8_t key[SEEDBYTES], + const polyveck *t0, + const polyvecl *s1, + const polyveck *s2); + +void PQCLEAN_DILITHIUM2_CLEAN_pack_sig(uint8_t sig[PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES], const uint8_t c[SEEDBYTES], const polyvecl *z, const polyveck *h); + +void PQCLEAN_DILITHIUM2_CLEAN_unpack_pk(uint8_t rho[SEEDBYTES], polyveck *t1, const uint8_t pk[PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_PUBLICKEYBYTES]); + +void PQCLEAN_DILITHIUM2_CLEAN_unpack_sk(uint8_t rho[SEEDBYTES], + uint8_t tr[CRHBYTES], + uint8_t key[SEEDBYTES], + polyveck *t0, + polyvecl *s1, + polyveck *s2, + const uint8_t sk[PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_SECRETKEYBYTES]); + +int PQCLEAN_DILITHIUM2_CLEAN_unpack_sig(uint8_t c[SEEDBYTES], polyvecl *z, polyveck *h, const uint8_t sig[PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES]); + +#endif diff --git a/crypto_sign/dilithium/dilithium2/clean/params.h b/crypto_sign/dilithium/dilithium2/clean/params.h new file mode 100644 index 00000000..93aa7897 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/clean/params.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_DILITHIUM2_CLEAN_PARAMS_H +#define PQCLEAN_DILITHIUM2_CLEAN_PARAMS_H + + + +#define SEEDBYTES 32 +#define CRHBYTES 48 +#define N 256 +#define Q 8380417 +#define D 13 +#define ROOT_OF_UNITY 1753 + +#define K 4 +#define L 4 +#define ETA 2 +#define TAU 39 +#define BETA 78 +#define GAMMA1 (1 << 17) +#define GAMMA2 ((Q-1)/88) +#define OMEGA 80 +#define PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_ALGNAME "Dilithium2" + + +#define POLYT1_PACKEDBYTES 320 +#define POLYT0_PACKEDBYTES 416 +#define POLYVECH_PACKEDBYTES (OMEGA + K) + +#define POLYZ_PACKEDBYTES 576 + +#define POLYW1_PACKEDBYTES 192 + +#define POLYETA_PACKEDBYTES 96 + +#define PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_PUBLICKEYBYTES (SEEDBYTES + K*POLYT1_PACKEDBYTES) +#define PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_SECRETKEYBYTES (2*SEEDBYTES + CRHBYTES \ + + L*POLYETA_PACKEDBYTES \ + + K*POLYETA_PACKEDBYTES \ + + K*POLYT0_PACKEDBYTES) +#define PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES (SEEDBYTES + L*POLYZ_PACKEDBYTES + POLYVECH_PACKEDBYTES) + +#endif diff --git a/crypto_sign/dilithium/dilithium2/clean/poly.c b/crypto_sign/dilithium/dilithium2/clean/poly.c new file mode 100644 index 00000000..f46027a5 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/clean/poly.c @@ -0,0 +1,867 @@ +#include "ntt.h" +#include "params.h" +#include "poly.h" +#include "reduce.h" +#include "rounding.h" +#include "symmetric.h" +#include + +#define DBENCH_START() +#define DBENCH_STOP(t) + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_poly_reduce +* +* Description: Inplace reduction of all coefficients of polynomial to +* representative in [-6283009,6283007]. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_poly_reduce(poly *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + a->coeffs[i] = PQCLEAN_DILITHIUM2_CLEAN_reduce32(a->coeffs[i]); + } + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_poly_caddq +* +* Description: For all coefficients of in/out polynomial add Q if +* coefficient is negative. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_poly_caddq(poly *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + a->coeffs[i] = PQCLEAN_DILITHIUM2_CLEAN_caddq(a->coeffs[i]); + } + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_poly_freeze +* +* Description: Inplace reduction of all coefficients of polynomial to +* standard representatives. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_poly_freeze(poly *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + a->coeffs[i] = PQCLEAN_DILITHIUM2_CLEAN_freeze(a->coeffs[i]); + } + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_poly_add +* +* Description: Add polynomials. No modular reduction is performed. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first summand +* - const poly *b: pointer to second summand +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_poly_add(poly *c, const poly *a, const poly *b) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + c->coeffs[i] = a->coeffs[i] + b->coeffs[i]; + } + + DBENCH_STOP(*tadd); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_poly_sub +* +* Description: Subtract polynomials. No modular reduction is +* performed. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial to be +* subtraced from first input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_poly_sub(poly *c, const poly *a, const poly *b) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + c->coeffs[i] = a->coeffs[i] - b->coeffs[i]; + } + + DBENCH_STOP(*tadd); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_poly_shiftl +* +* Description: Multiply polynomial by 2^D without modular reduction. Assumes +* input coefficients to be less than 2^{31-D} in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_poly_shiftl(poly *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + a->coeffs[i] <<= D; + } + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_poly_ntt +* +* Description: Inplace forward NTT. Coefficients can grow by +* 8*Q in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_poly_ntt(poly *a) { + DBENCH_START(); + + PQCLEAN_DILITHIUM2_CLEAN_ntt(a->coeffs); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_poly_invntt_tomont +* +* Description: Inplace inverse NTT and multiplication by 2^{32}. +* Input coefficients need to be less than Q in absolute +* value and output coefficients are again bounded by Q. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_poly_invntt_tomont(poly *a) { + DBENCH_START(); + + PQCLEAN_DILITHIUM2_CLEAN_invntt_tomont(a->coeffs); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_poly_pointwise_montgomery +* +* Description: Pointwise multiplication of polynomials in NTT domain +* representation and multiplication of resulting polynomial +* by 2^{-32}. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_poly_pointwise_montgomery(poly *c, const poly *a, const poly *b) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + c->coeffs[i] = PQCLEAN_DILITHIUM2_CLEAN_montgomery_reduce((int64_t)a->coeffs[i] * b->coeffs[i]); + } + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_poly_power2round +* +* Description: For all coefficients c of the input polynomial, +* compute c0, c1 such that c mod Q = c1*2^D + c0 +* with -2^{D-1} < c0 <= 2^{D-1}. Assumes coefficients to be +* standard representatives. +* +* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 +* - poly *a0: pointer to output polynomial with coefficients c0 +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_poly_power2round(poly *a1, poly *a0, const poly *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + a1->coeffs[i] = PQCLEAN_DILITHIUM2_CLEAN_power2round(&a0->coeffs[i], a->coeffs[i]); + } + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_poly_decompose +* +* Description: For all coefficients c of the input polynomial, +* compute high and low bits c0, c1 such c mod Q = c1*ALPHA + c0 +* with -ALPHA/2 < c0 <= ALPHA/2 except c1 = (Q-1)/ALPHA where we +* set c1 = 0 and -ALPHA/2 <= c0 = c mod Q - Q < 0. +* Assumes coefficients to be standard representatives. +* +* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 +* - poly *a0: pointer to output polynomial with coefficients c0 +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_poly_decompose(poly *a1, poly *a0, const poly *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + a1->coeffs[i] = PQCLEAN_DILITHIUM2_CLEAN_decompose(&a0->coeffs[i], a->coeffs[i]); + } + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_poly_make_hint +* +* Description: Compute hint polynomial. The coefficients of which indicate +* whether the low bits of the corresponding coefficient of +* the input polynomial overflow into the high bits. +* +* Arguments: - poly *h: pointer to output hint polynomial +* - const poly *a0: pointer to low part of input polynomial +* - const poly *a1: pointer to high part of input polynomial +* +* Returns number of 1 bits. +**************************************************/ +unsigned int PQCLEAN_DILITHIUM2_CLEAN_poly_make_hint(poly *h, const poly *a0, const poly *a1) { + unsigned int i, s = 0; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + h->coeffs[i] = PQCLEAN_DILITHIUM2_CLEAN_make_hint(a0->coeffs[i], a1->coeffs[i]); + s += h->coeffs[i]; + } + + DBENCH_STOP(*tround); + return s; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_poly_use_hint +* +* Description: Use hint polynomial to correct the high bits of a polynomial. +* +* Arguments: - poly *b: pointer to output polynomial with corrected high bits +* - const poly *a: pointer to input polynomial +* - const poly *h: pointer to input hint polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_poly_use_hint(poly *b, const poly *a, const poly *h) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + b->coeffs[i] = PQCLEAN_DILITHIUM2_CLEAN_use_hint(a->coeffs[i], h->coeffs[i]); + } + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_poly_chknorm +* +* Description: Check infinity norm of polynomial against given bound. +* Assumes input coefficients were reduced by PQCLEAN_DILITHIUM2_CLEAN_reduce32(). +* +* Arguments: - const poly *a: pointer to polynomial +* - int32_t B: norm bound +* +* Returns 0 if norm is strictly smaller than B <= (Q-1)/8 and 1 otherwise. +**************************************************/ +int PQCLEAN_DILITHIUM2_CLEAN_poly_chknorm(const poly *a, int32_t B) { + unsigned int i; + int32_t t; + DBENCH_START(); + + if (B > (Q - 1) / 8) { + return 1; + } + + /* It is ok to leak which coefficient violates the bound since + the probability for each coefficient is independent of secret + data but we must not leak the sign of the centralized representative. */ + for (i = 0; i < N; ++i) { + /* Absolute value */ + t = a->coeffs[i] >> 31; + t = a->coeffs[i] - (t & 2 * a->coeffs[i]); + + if (t >= B) { + DBENCH_STOP(*tsample); + return 1; + } + } + + DBENCH_STOP(*tsample); + return 0; +} + +/************************************************* +* Name: rej_uniform +* +* Description: Sample uniformly random coefficients in [0, Q-1] by +* performing rejection sampling on array of random bytes. +* +* Arguments: - int32_t *a: pointer to output array (allocated) +* - unsigned int len: number of coefficients to be sampled +* - const uint8_t *buf: array of random bytes +* - unsigned int buflen: length of array of random bytes +* +* Returns number of sampled coefficients. Can be smaller than len if not enough +* random bytes were given. +**************************************************/ +static unsigned int rej_uniform(int32_t *a, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) { + unsigned int ctr, pos; + uint32_t t; + DBENCH_START(); + + ctr = pos = 0; + while (ctr < len && pos + 3 <= buflen) { + t = buf[pos++]; + t |= (uint32_t)buf[pos++] << 8; + t |= (uint32_t)buf[pos++] << 16; + t &= 0x7FFFFF; + + if (t < Q) { + a[ctr++] = t; + } + } + + DBENCH_STOP(*tsample); + return ctr; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_poly_uniform +* +* Description: Sample polynomial with uniformly random coefficients +* in [0,Q-1] by performing rejection sampling on the +* output stream of SHAKE256(seed|nonce) or AES256CTR(seed,nonce). +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length SEEDBYTES +* - uint16_t nonce: 2-byte nonce +**************************************************/ +#define POLY_UNIFORM_NBLOCKS ((768 + STREAM128_BLOCKBYTES - 1)/STREAM128_BLOCKBYTES) +void PQCLEAN_DILITHIUM2_CLEAN_poly_uniform(poly *a, + const uint8_t seed[SEEDBYTES], + uint16_t nonce) { + unsigned int i, ctr, off; + unsigned int buflen = POLY_UNIFORM_NBLOCKS * STREAM128_BLOCKBYTES; + uint8_t buf[POLY_UNIFORM_NBLOCKS * STREAM128_BLOCKBYTES + 2]; + stream128_state state; + + stream128_init(&state, seed, nonce); + stream128_squeezeblocks(buf, POLY_UNIFORM_NBLOCKS, &state); + + ctr = rej_uniform(a->coeffs, N, buf, buflen); + + while (ctr < N) { + off = buflen % 3; + for (i = 0; i < off; ++i) { + buf[i] = buf[buflen - off + i]; + } + + stream128_squeezeblocks(buf + off, 1, &state); + buflen = STREAM128_BLOCKBYTES + off; + ctr += rej_uniform(a->coeffs + ctr, N - ctr, buf, buflen); + } + stream128_release(&state); +} + +/************************************************* +* Name: rej_eta +* +* Description: Sample uniformly random coefficients in [-ETA, ETA] by +* performing rejection sampling on array of random bytes. +* +* Arguments: - int32_t *a: pointer to output array (allocated) +* - unsigned int len: number of coefficients to be sampled +* - const uint8_t *buf: array of random bytes +* - unsigned int buflen: length of array of random bytes +* +* Returns number of sampled coefficients. Can be smaller than len if not enough +* random bytes were given. +**************************************************/ +static unsigned int rej_eta(int32_t *a, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) { + unsigned int ctr, pos; + uint32_t t0, t1; + DBENCH_START(); + + ctr = pos = 0; + while (ctr < len && pos < buflen) { + t0 = buf[pos] & 0x0F; + t1 = buf[pos++] >> 4; + + if (t0 < 15) { + t0 = t0 - (205 * t0 >> 10) * 5; + a[ctr++] = 2 - t0; + } + if (t1 < 15 && ctr < len) { + t1 = t1 - (205 * t1 >> 10) * 5; + a[ctr++] = 2 - t1; + } + } + + DBENCH_STOP(*tsample); + return ctr; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_poly_uniform_eta +* +* Description: Sample polynomial with uniformly random coefficients +* in [-ETA,ETA] by performing rejection sampling on the +* output stream from SHAKE256(seed|nonce) or AES256CTR(seed,nonce). +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length SEEDBYTES +* - uint16_t nonce: 2-byte nonce +**************************************************/ +#define POLY_UNIFORM_ETA_NBLOCKS ((136 + STREAM128_BLOCKBYTES - 1)/STREAM128_BLOCKBYTES) +void PQCLEAN_DILITHIUM2_CLEAN_poly_uniform_eta(poly *a, + const uint8_t seed[SEEDBYTES], + uint16_t nonce) { + unsigned int ctr; + unsigned int buflen = POLY_UNIFORM_ETA_NBLOCKS * STREAM128_BLOCKBYTES; + uint8_t buf[POLY_UNIFORM_ETA_NBLOCKS * STREAM128_BLOCKBYTES]; + stream128_state state; + + stream128_init(&state, seed, nonce); + stream128_squeezeblocks(buf, POLY_UNIFORM_ETA_NBLOCKS, &state); + + ctr = rej_eta(a->coeffs, N, buf, buflen); + + while (ctr < N) { + stream128_squeezeblocks(buf, 1, &state); + ctr += rej_eta(a->coeffs + ctr, N - ctr, buf, STREAM128_BLOCKBYTES); + } + stream128_release(&state); +} + +/************************************************* +* Name: poly_uniform_gamma1m1 +* +* Description: Sample polynomial with uniformly random coefficients +* in [-(GAMMA1 - 1), GAMMA1] by unpacking output stream +* of SHAKE256(seed|nonce) or AES256CTR(seed,nonce). +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length CRHBYTES +* - uint16_t nonce: 16-bit nonce +**************************************************/ +#define POLY_UNIFORM_GAMMA1_NBLOCKS ((POLYZ_PACKEDBYTES + STREAM256_BLOCKBYTES - 1)/STREAM256_BLOCKBYTES) +void PQCLEAN_DILITHIUM2_CLEAN_poly_uniform_gamma1(poly *a, + const uint8_t seed[CRHBYTES], + uint16_t nonce) { + uint8_t buf[POLY_UNIFORM_GAMMA1_NBLOCKS * STREAM256_BLOCKBYTES]; + stream256_state state; + + stream256_init(&state, seed, nonce); + stream256_squeezeblocks(buf, POLY_UNIFORM_GAMMA1_NBLOCKS, &state); + stream256_release(&state); + PQCLEAN_DILITHIUM2_CLEAN_polyz_unpack(a, buf); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_challenge +* +* Description: Implementation of H. Samples polynomial with TAU nonzero +* coefficients in {-1,1} using the output stream of +* SHAKE256(seed). +* +* Arguments: - poly *c: pointer to output polynomial +* - const uint8_t mu[]: byte array containing seed of length SEEDBYTES +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_poly_challenge(poly *c, const uint8_t seed[SEEDBYTES]) { + unsigned int i, b, pos; + uint64_t signs; + uint8_t buf[SHAKE256_RATE]; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, seed, SEEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, sizeof buf, &state); + + signs = 0; + for (i = 0; i < 8; ++i) { + signs |= (uint64_t)buf[i] << 8 * i; + } + pos = 8; + + for (i = 0; i < N; ++i) { + c->coeffs[i] = 0; + } + for (i = N - TAU; i < N; ++i) { + do { + if (pos >= SHAKE256_RATE) { + shake256_inc_squeeze(buf, sizeof buf, &state); + pos = 0; + } + + b = buf[pos++]; + } while (b > i); + + c->coeffs[i] = c->coeffs[b]; + c->coeffs[b] = 1 - 2 * (signs & 1); + signs >>= 1; + } + shake256_inc_ctx_release(&state); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyeta_pack +* +* Description: Bit-pack polynomial with coefficients in [-ETA,ETA]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYETA_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_polyeta_pack(uint8_t *r, const poly *a) { + unsigned int i; + uint8_t t[8]; + DBENCH_START(); + + for (i = 0; i < N / 8; ++i) { + t[0] = (uint8_t) (ETA - a->coeffs[8 * i + 0]); + t[1] = (uint8_t) (ETA - a->coeffs[8 * i + 1]); + t[2] = (uint8_t) (ETA - a->coeffs[8 * i + 2]); + t[3] = (uint8_t) (ETA - a->coeffs[8 * i + 3]); + t[4] = (uint8_t) (ETA - a->coeffs[8 * i + 4]); + t[5] = (uint8_t) (ETA - a->coeffs[8 * i + 5]); + t[6] = (uint8_t) (ETA - a->coeffs[8 * i + 6]); + t[7] = (uint8_t) (ETA - a->coeffs[8 * i + 7]); + + r[3 * i + 0] = (t[0] >> 0) | (t[1] << 3) | (t[2] << 6); + r[3 * i + 1] = (t[2] >> 2) | (t[3] << 1) | (t[4] << 4) | (t[5] << 7); + r[3 * i + 2] = (t[5] >> 1) | (t[6] << 2) | (t[7] << 5); + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyeta_unpack +* +* Description: Unpack polynomial with coefficients in [-ETA,ETA]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_polyeta_unpack(poly *r, const uint8_t *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 8; ++i) { + r->coeffs[8 * i + 0] = (a[3 * i + 0] >> 0) & 7; + r->coeffs[8 * i + 1] = (a[3 * i + 0] >> 3) & 7; + r->coeffs[8 * i + 2] = ((a[3 * i + 0] >> 6) | (a[3 * i + 1] << 2)) & 7; + r->coeffs[8 * i + 3] = (a[3 * i + 1] >> 1) & 7; + r->coeffs[8 * i + 4] = (a[3 * i + 1] >> 4) & 7; + r->coeffs[8 * i + 5] = ((a[3 * i + 1] >> 7) | (a[3 * i + 2] << 1)) & 7; + r->coeffs[8 * i + 6] = (a[3 * i + 2] >> 2) & 7; + r->coeffs[8 * i + 7] = (a[3 * i + 2] >> 5) & 7; + + r->coeffs[8 * i + 0] = ETA - r->coeffs[8 * i + 0]; + r->coeffs[8 * i + 1] = ETA - r->coeffs[8 * i + 1]; + r->coeffs[8 * i + 2] = ETA - r->coeffs[8 * i + 2]; + r->coeffs[8 * i + 3] = ETA - r->coeffs[8 * i + 3]; + r->coeffs[8 * i + 4] = ETA - r->coeffs[8 * i + 4]; + r->coeffs[8 * i + 5] = ETA - r->coeffs[8 * i + 5]; + r->coeffs[8 * i + 6] = ETA - r->coeffs[8 * i + 6]; + r->coeffs[8 * i + 7] = ETA - r->coeffs[8 * i + 7]; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyt1_pack +* +* Description: Bit-pack polynomial t1 with coefficients fitting in 10 bits. +* Input coefficients are assumed to be standard representatives. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYT1_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_polyt1_pack(uint8_t *r, const poly *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 4; ++i) { + r[5 * i + 0] = (uint8_t) (a->coeffs[4 * i + 0] >> 0); + r[5 * i + 1] = (uint8_t) ((a->coeffs[4 * i + 0] >> 8) | (a->coeffs[4 * i + 1] << 2)); + r[5 * i + 2] = (uint8_t) ((a->coeffs[4 * i + 1] >> 6) | (a->coeffs[4 * i + 2] << 4)); + r[5 * i + 3] = (uint8_t) ((a->coeffs[4 * i + 2] >> 4) | (a->coeffs[4 * i + 3] << 6)); + r[5 * i + 4] = (uint8_t) (a->coeffs[4 * i + 3] >> 2); + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyt1_unpack +* +* Description: Unpack polynomial t1 with 10-bit coefficients. +* Output coefficients are standard representatives. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_polyt1_unpack(poly *r, const uint8_t *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 4; ++i) { + r->coeffs[4 * i + 0] = ((a[5 * i + 0] >> 0) | ((uint32_t)a[5 * i + 1] << 8)) & 0x3FF; + r->coeffs[4 * i + 1] = ((a[5 * i + 1] >> 2) | ((uint32_t)a[5 * i + 2] << 6)) & 0x3FF; + r->coeffs[4 * i + 2] = ((a[5 * i + 2] >> 4) | ((uint32_t)a[5 * i + 3] << 4)) & 0x3FF; + r->coeffs[4 * i + 3] = ((a[5 * i + 3] >> 6) | ((uint32_t)a[5 * i + 4] << 2)) & 0x3FF; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyt0_pack +* +* Description: Bit-pack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYT0_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_polyt0_pack(uint8_t *r, const poly *a) { + unsigned int i; + uint32_t t[8]; + DBENCH_START(); + + for (i = 0; i < N / 8; ++i) { + t[0] = (1 << (D - 1)) - a->coeffs[8 * i + 0]; + t[1] = (1 << (D - 1)) - a->coeffs[8 * i + 1]; + t[2] = (1 << (D - 1)) - a->coeffs[8 * i + 2]; + t[3] = (1 << (D - 1)) - a->coeffs[8 * i + 3]; + t[4] = (1 << (D - 1)) - a->coeffs[8 * i + 4]; + t[5] = (1 << (D - 1)) - a->coeffs[8 * i + 5]; + t[6] = (1 << (D - 1)) - a->coeffs[8 * i + 6]; + t[7] = (1 << (D - 1)) - a->coeffs[8 * i + 7]; + + r[13 * i + 0] = (uint8_t) t[0]; + r[13 * i + 1] = (uint8_t) (t[0] >> 8); + r[13 * i + 1] |= (uint8_t) (t[1] << 5); + r[13 * i + 2] = (uint8_t) (t[1] >> 3); + r[13 * i + 3] = (uint8_t) (t[1] >> 11); + r[13 * i + 3] |= (uint8_t) (t[2] << 2); + r[13 * i + 4] = (uint8_t) (t[2] >> 6); + r[13 * i + 4] |= (uint8_t) (t[3] << 7); + r[13 * i + 5] = (uint8_t) (t[3] >> 1); + r[13 * i + 6] = (uint8_t) (t[3] >> 9); + r[13 * i + 6] |= (uint8_t) (t[4] << 4); + r[13 * i + 7] = (uint8_t) (t[4] >> 4); + r[13 * i + 8] = (uint8_t) (t[4] >> 12); + r[13 * i + 8] |= (uint8_t) (t[5] << 1); + r[13 * i + 9] = (uint8_t) (t[5] >> 7); + r[13 * i + 9] |= (uint8_t) (t[6] << 6); + r[13 * i + 10] = (uint8_t) (t[6] >> 2); + r[13 * i + 11] = (uint8_t) (t[6] >> 10); + r[13 * i + 11] |= (uint8_t) (t[7] << 3); + r[13 * i + 12] = (uint8_t) (t[7] >> 5); + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyt0_unpack +* +* Description: Unpack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_polyt0_unpack(poly *r, const uint8_t *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 8; ++i) { + r->coeffs[8 * i + 0] = a[13 * i + 0]; + r->coeffs[8 * i + 0] |= (uint32_t)a[13 * i + 1] << 8; + r->coeffs[8 * i + 0] &= 0x1FFF; + + r->coeffs[8 * i + 1] = a[13 * i + 1] >> 5; + r->coeffs[8 * i + 1] |= (uint32_t)a[13 * i + 2] << 3; + r->coeffs[8 * i + 1] |= (uint32_t)a[13 * i + 3] << 11; + r->coeffs[8 * i + 1] &= 0x1FFF; + + r->coeffs[8 * i + 2] = a[13 * i + 3] >> 2; + r->coeffs[8 * i + 2] |= (uint32_t)a[13 * i + 4] << 6; + r->coeffs[8 * i + 2] &= 0x1FFF; + + r->coeffs[8 * i + 3] = a[13 * i + 4] >> 7; + r->coeffs[8 * i + 3] |= (uint32_t)a[13 * i + 5] << 1; + r->coeffs[8 * i + 3] |= (uint32_t)a[13 * i + 6] << 9; + r->coeffs[8 * i + 3] &= 0x1FFF; + + r->coeffs[8 * i + 4] = a[13 * i + 6] >> 4; + r->coeffs[8 * i + 4] |= (uint32_t)a[13 * i + 7] << 4; + r->coeffs[8 * i + 4] |= (uint32_t)a[13 * i + 8] << 12; + r->coeffs[8 * i + 4] &= 0x1FFF; + + r->coeffs[8 * i + 5] = a[13 * i + 8] >> 1; + r->coeffs[8 * i + 5] |= (uint32_t)a[13 * i + 9] << 7; + r->coeffs[8 * i + 5] &= 0x1FFF; + + r->coeffs[8 * i + 6] = a[13 * i + 9] >> 6; + r->coeffs[8 * i + 6] |= (uint32_t)a[13 * i + 10] << 2; + r->coeffs[8 * i + 6] |= (uint32_t)a[13 * i + 11] << 10; + r->coeffs[8 * i + 6] &= 0x1FFF; + + r->coeffs[8 * i + 7] = a[13 * i + 11] >> 3; + r->coeffs[8 * i + 7] |= (uint32_t)a[13 * i + 12] << 5; + r->coeffs[8 * i + 7] &= 0x1FFF; + + r->coeffs[8 * i + 0] = (1 << (D - 1)) - r->coeffs[8 * i + 0]; + r->coeffs[8 * i + 1] = (1 << (D - 1)) - r->coeffs[8 * i + 1]; + r->coeffs[8 * i + 2] = (1 << (D - 1)) - r->coeffs[8 * i + 2]; + r->coeffs[8 * i + 3] = (1 << (D - 1)) - r->coeffs[8 * i + 3]; + r->coeffs[8 * i + 4] = (1 << (D - 1)) - r->coeffs[8 * i + 4]; + r->coeffs[8 * i + 5] = (1 << (D - 1)) - r->coeffs[8 * i + 5]; + r->coeffs[8 * i + 6] = (1 << (D - 1)) - r->coeffs[8 * i + 6]; + r->coeffs[8 * i + 7] = (1 << (D - 1)) - r->coeffs[8 * i + 7]; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyz_pack +* +* Description: Bit-pack polynomial with coefficients +* in [-(GAMMA1 - 1), GAMMA1]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYZ_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_polyz_pack(uint8_t *r, const poly *a) { + unsigned int i; + uint32_t t[4]; + DBENCH_START(); + + for (i = 0; i < N / 4; ++i) { + t[0] = GAMMA1 - a->coeffs[4 * i + 0]; + t[1] = GAMMA1 - a->coeffs[4 * i + 1]; + t[2] = GAMMA1 - a->coeffs[4 * i + 2]; + t[3] = GAMMA1 - a->coeffs[4 * i + 3]; + + r[9 * i + 0] = (uint8_t) t[0]; + r[9 * i + 1] = (uint8_t) (t[0] >> 8); + r[9 * i + 2] = (uint8_t) (t[0] >> 16); + r[9 * i + 2] |= (uint8_t) (t[1] << 2); + r[9 * i + 3] = (uint8_t) (t[1] >> 6); + r[9 * i + 4] = (uint8_t) (t[1] >> 14); + r[9 * i + 4] |= (uint8_t) (t[2] << 4); + r[9 * i + 5] = (uint8_t) (t[2] >> 4); + r[9 * i + 6] = (uint8_t) (t[2] >> 12); + r[9 * i + 6] |= (uint8_t) (t[3] << 6); + r[9 * i + 7] = (uint8_t) (t[3] >> 2); + r[9 * i + 8] = (uint8_t) (t[3] >> 10); + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyz_unpack +* +* Description: Unpack polynomial z with coefficients +* in [-(GAMMA1 - 1), GAMMA1]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_polyz_unpack(poly *r, const uint8_t *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 4; ++i) { + r->coeffs[4 * i + 0] = a[9 * i + 0]; + r->coeffs[4 * i + 0] |= (uint32_t)a[9 * i + 1] << 8; + r->coeffs[4 * i + 0] |= (uint32_t)a[9 * i + 2] << 16; + r->coeffs[4 * i + 0] &= 0x3FFFF; + + r->coeffs[4 * i + 1] = a[9 * i + 2] >> 2; + r->coeffs[4 * i + 1] |= (uint32_t)a[9 * i + 3] << 6; + r->coeffs[4 * i + 1] |= (uint32_t)a[9 * i + 4] << 14; + r->coeffs[4 * i + 1] &= 0x3FFFF; + + r->coeffs[4 * i + 2] = a[9 * i + 4] >> 4; + r->coeffs[4 * i + 2] |= (uint32_t)a[9 * i + 5] << 4; + r->coeffs[4 * i + 2] |= (uint32_t)a[9 * i + 6] << 12; + r->coeffs[4 * i + 2] &= 0x3FFFF; + + r->coeffs[4 * i + 3] = a[9 * i + 6] >> 6; + r->coeffs[4 * i + 3] |= (uint32_t)a[9 * i + 7] << 2; + r->coeffs[4 * i + 3] |= (uint32_t)a[9 * i + 8] << 10; + r->coeffs[4 * i + 3] &= 0x3FFFF; + + r->coeffs[4 * i + 0] = GAMMA1 - r->coeffs[4 * i + 0]; + r->coeffs[4 * i + 1] = GAMMA1 - r->coeffs[4 * i + 1]; + r->coeffs[4 * i + 2] = GAMMA1 - r->coeffs[4 * i + 2]; + r->coeffs[4 * i + 3] = GAMMA1 - r->coeffs[4 * i + 3]; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyw1_pack +* +* Description: Bit-pack polynomial w1 with coefficients in [0,15] or [0,43]. +* Input coefficients are assumed to be standard representatives. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYW1_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_polyw1_pack(uint8_t *r, const poly *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 4; ++i) { + r[3 * i + 0] = (uint8_t) a->coeffs[4 * i + 0]; + r[3 * i + 0] |= (uint8_t) (a->coeffs[4 * i + 1] << 6); + r[3 * i + 1] = (uint8_t) (a->coeffs[4 * i + 1] >> 2); + r[3 * i + 1] |= (uint8_t) (a->coeffs[4 * i + 2] << 4); + r[3 * i + 2] = (uint8_t) (a->coeffs[4 * i + 2] >> 4); + r[3 * i + 2] |= (uint8_t) (a->coeffs[4 * i + 3] << 2); + } + + DBENCH_STOP(*tpack); +} diff --git a/crypto_sign/dilithium/dilithium2/clean/poly.h b/crypto_sign/dilithium/dilithium2/clean/poly.h new file mode 100644 index 00000000..cd5660c1 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/clean/poly.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_DILITHIUM2_CLEAN_POLY_H +#define PQCLEAN_DILITHIUM2_CLEAN_POLY_H +#include "params.h" +#include + +typedef struct { + int32_t coeffs[N]; +} poly; + +void PQCLEAN_DILITHIUM2_CLEAN_poly_reduce(poly *a); +void PQCLEAN_DILITHIUM2_CLEAN_poly_caddq(poly *a); +void PQCLEAN_DILITHIUM2_CLEAN_poly_freeze(poly *a); + +void PQCLEAN_DILITHIUM2_CLEAN_poly_add(poly *c, const poly *a, const poly *b); +void PQCLEAN_DILITHIUM2_CLEAN_poly_sub(poly *c, const poly *a, const poly *b); +void PQCLEAN_DILITHIUM2_CLEAN_poly_shiftl(poly *a); + +void PQCLEAN_DILITHIUM2_CLEAN_poly_ntt(poly *a); +void PQCLEAN_DILITHIUM2_CLEAN_poly_invntt_tomont(poly *a); +void PQCLEAN_DILITHIUM2_CLEAN_poly_pointwise_montgomery(poly *c, const poly *a, const poly *b); + +void PQCLEAN_DILITHIUM2_CLEAN_poly_power2round(poly *a1, poly *a0, const poly *a); +void PQCLEAN_DILITHIUM2_CLEAN_poly_decompose(poly *a1, poly *a0, const poly *a); +unsigned int PQCLEAN_DILITHIUM2_CLEAN_poly_make_hint(poly *h, const poly *a0, const poly *a1); +void PQCLEAN_DILITHIUM2_CLEAN_poly_use_hint(poly *b, const poly *a, const poly *h); + +int PQCLEAN_DILITHIUM2_CLEAN_poly_chknorm(const poly *a, int32_t B); +void PQCLEAN_DILITHIUM2_CLEAN_poly_uniform(poly *a, + const uint8_t seed[SEEDBYTES], + uint16_t nonce); +void PQCLEAN_DILITHIUM2_CLEAN_poly_uniform_eta(poly *a, + const uint8_t seed[SEEDBYTES], + uint16_t nonce); +void PQCLEAN_DILITHIUM2_CLEAN_poly_uniform_gamma1(poly *a, + const uint8_t seed[CRHBYTES], + uint16_t nonce); +void PQCLEAN_DILITHIUM2_CLEAN_poly_challenge(poly *c, const uint8_t seed[SEEDBYTES]); + +void PQCLEAN_DILITHIUM2_CLEAN_polyeta_pack(uint8_t *r, const poly *a); +void PQCLEAN_DILITHIUM2_CLEAN_polyeta_unpack(poly *r, const uint8_t *a); + +void PQCLEAN_DILITHIUM2_CLEAN_polyt1_pack(uint8_t *r, const poly *a); +void PQCLEAN_DILITHIUM2_CLEAN_polyt1_unpack(poly *r, const uint8_t *a); + +void PQCLEAN_DILITHIUM2_CLEAN_polyt0_pack(uint8_t *r, const poly *a); +void PQCLEAN_DILITHIUM2_CLEAN_polyt0_unpack(poly *r, const uint8_t *a); + +void PQCLEAN_DILITHIUM2_CLEAN_polyz_pack(uint8_t *r, const poly *a); +void PQCLEAN_DILITHIUM2_CLEAN_polyz_unpack(poly *r, const uint8_t *a); + +void PQCLEAN_DILITHIUM2_CLEAN_polyw1_pack(uint8_t *r, const poly *a); + +#endif diff --git a/crypto_sign/dilithium/dilithium2/clean/polyvec.c b/crypto_sign/dilithium/dilithium2/clean/polyvec.c new file mode 100644 index 00000000..d2bec78d --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/clean/polyvec.c @@ -0,0 +1,448 @@ +#include "params.h" +#include "poly.h" +#include "polyvec.h" +#include + +/************************************************* +* Name: expand_mat +* +* Description: Implementation of ExpandA. Generates matrix A with uniformly +* random coefficients a_{i,j} by performing rejection +* sampling on the output stream of SHAKE128(rho|j|i) +* or AES256CTR(rho,j|i). +* +* Arguments: - polyvecl mat[K]: output matrix +* - const uint8_t rho[]: byte array containing seed rho +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]) { + unsigned int i, j; + + for (i = 0; i < K; ++i) { + for (j = 0; j < L; ++j) { + PQCLEAN_DILITHIUM2_CLEAN_poly_uniform(&mat[i].vec[j], rho, (uint16_t) ((i << 8) + j)); + } + } +} + +void PQCLEAN_DILITHIUM2_CLEAN_polyvec_matrix_pointwise_montgomery(polyveck *t, const polyvecl mat[K], const polyvecl *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_polyvecl_pointwise_acc_montgomery(&t->vec[i], &mat[i], v); + } +} + +/**************************************************************/ +/************ Vectors of polynomials of length L **************/ +/**************************************************************/ + +void PQCLEAN_DILITHIUM2_CLEAN_polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_poly_uniform_eta(&v->vec[i], seed, nonce++); + } +} + +void PQCLEAN_DILITHIUM2_CLEAN_polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_poly_uniform_gamma1(&v->vec[i], seed, (uint16_t) (L * nonce + i)); + } +} + +void PQCLEAN_DILITHIUM2_CLEAN_polyvecl_reduce(polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_poly_reduce(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyvecl_freeze +* +* Description: Reduce coefficients of polynomials in vector of length L +* to standard representatives. +* +* Arguments: - polyvecl *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_polyvecl_freeze(polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_poly_freeze(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyvecl_add +* +* Description: Add vectors of polynomials of length L. +* No modular reduction is performed. +* +* Arguments: - polyvecl *w: pointer to output vector +* - const polyvecl *u: pointer to first summand +* - const polyvecl *v: pointer to second summand +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyvecl_ntt +* +* Description: Forward NTT of all polynomials in vector of length L. Output +* coefficients can be up to 16*Q larger than input coefficients. +* +* Arguments: - polyvecl *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_polyvecl_ntt(polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_poly_ntt(&v->vec[i]); + } +} + +void PQCLEAN_DILITHIUM2_CLEAN_polyvecl_invntt_tomont(polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_poly_invntt_tomont(&v->vec[i]); + } +} + +void PQCLEAN_DILITHIUM2_CLEAN_polyvecl_pointwise_poly_montgomery(polyvecl *r, const poly *a, const polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_poly_pointwise_montgomery(&r->vec[i], a, &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyvecl_pointwise_acc_montgomery +* +* Description: Pointwise multiply vectors of polynomials of length L, multiply +* resulting vector by 2^{-32} and add (accumulate) polynomials +* in it. Input/output vectors are in NTT domain representation. +* +* Arguments: - poly *w: output polynomial +* - const polyvecl *u: pointer to first input vector +* - const polyvecl *v: pointer to second input vector +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_polyvecl_pointwise_acc_montgomery(poly *w, + const polyvecl *u, + const polyvecl *v) { + unsigned int i; + poly t; + + PQCLEAN_DILITHIUM2_CLEAN_poly_pointwise_montgomery(w, &u->vec[0], &v->vec[0]); + for (i = 1; i < L; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_poly_pointwise_montgomery(&t, &u->vec[i], &v->vec[i]); + PQCLEAN_DILITHIUM2_CLEAN_poly_add(w, w, &t); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyvecl_chknorm +* +* Description: Check infinity norm of polynomials in vector of length L. +* Assumes input polyvecl to be reduced by PQCLEAN_DILITHIUM2_CLEAN_polyvecl_reduce(). +* +* Arguments: - const polyvecl *v: pointer to vector +* - int32_t B: norm bound +* +* Returns 0 if norm of all polynomials is strictly smaller than B <= (Q-1)/8 +* and 1 otherwise. +**************************************************/ +int PQCLEAN_DILITHIUM2_CLEAN_polyvecl_chknorm(const polyvecl *v, int32_t bound) { + unsigned int i; + + for (i = 0; i < L; ++i) { + if (PQCLEAN_DILITHIUM2_CLEAN_poly_chknorm(&v->vec[i], bound)) { + return 1; + } + } + + return 0; +} + +/**************************************************************/ +/************ Vectors of polynomials of length K **************/ +/**************************************************************/ + +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_uniform_eta(polyveck *v, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_poly_uniform_eta(&v->vec[i], seed, nonce++); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyveck_reduce +* +* Description: Reduce coefficients of polynomials in vector of length K +* to representatives in [-6283009,6283007]. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_reduce(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_poly_reduce(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyveck_caddq +* +* Description: For all coefficients of polynomials in vector of length K +* add Q if coefficient is negative. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_caddq(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_poly_caddq(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyveck_freeze +* +* Description: Reduce coefficients of polynomials in vector of length K +* to standard representatives. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_freeze(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_poly_freeze(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyveck_add +* +* Description: Add vectors of polynomials of length K. +* No modular reduction is performed. +* +* Arguments: - polyveck *w: pointer to output vector +* - const polyveck *u: pointer to first summand +* - const polyveck *v: pointer to second summand +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_add(polyveck *w, const polyveck *u, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyveck_sub +* +* Description: Subtract vectors of polynomials of length K. +* No modular reduction is performed. +* +* Arguments: - polyveck *w: pointer to output vector +* - const polyveck *u: pointer to first input vector +* - const polyveck *v: pointer to second input vector to be +* subtracted from first input vector +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_poly_sub(&w->vec[i], &u->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyveck_shiftl +* +* Description: Multiply vector of polynomials of Length K by 2^D without modular +* reduction. Assumes input coefficients to be less than 2^{31-D}. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_shiftl(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_poly_shiftl(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyveck_ntt +* +* Description: Forward NTT of all polynomials in vector of length K. Output +* coefficients can be up to 16*Q larger than input coefficients. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_ntt(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_poly_ntt(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyveck_invntt_tomont +* +* Description: Inverse NTT and multiplication by 2^{32} of polynomials +* in vector of length K. Input coefficients need to be less +* than 2*Q. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_invntt_tomont(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_poly_invntt_tomont(&v->vec[i]); + } +} + +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_pointwise_poly_montgomery(polyveck *r, const poly *a, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_poly_pointwise_montgomery(&r->vec[i], a, &v->vec[i]); + } +} + + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyveck_chknorm +* +* Description: Check infinity norm of polynomials in vector of length K. +* Assumes input polyveck to be reduced by PQCLEAN_DILITHIUM2_CLEAN_polyveck_reduce(). +* +* Arguments: - const polyveck *v: pointer to vector +* - int32_t B: norm bound +* +* Returns 0 if norm of all polynomials are strictly smaller than B <= (Q-1)/8 +* and 1 otherwise. +**************************************************/ +int PQCLEAN_DILITHIUM2_CLEAN_polyveck_chknorm(const polyveck *v, int32_t bound) { + unsigned int i; + + for (i = 0; i < K; ++i) { + if (PQCLEAN_DILITHIUM2_CLEAN_poly_chknorm(&v->vec[i], bound)) { + return 1; + } + } + + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyveck_power2round +* +* Description: For all coefficients a of polynomials in vector of length K, +* compute a0, a1 such that a mod^+ Q = a1*2^D + a0 +* with -2^{D-1} < a0 <= 2^{D-1}. Assumes coefficients to be +* standard representatives. +* +* Arguments: - polyveck *v1: pointer to output vector of polynomials with +* coefficients a1 +* - polyveck *v0: pointer to output vector of polynomials with +* coefficients a0 +* - const polyveck *v: pointer to input vector +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_poly_power2round(&v1->vec[i], &v0->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyveck_decompose +* +* Description: For all coefficients a of polynomials in vector of length K, +* compute high and low bits a0, a1 such a mod^+ Q = a1*ALPHA + a0 +* with -ALPHA/2 < a0 <= ALPHA/2 except a1 = (Q-1)/ALPHA where we +* set a1 = 0 and -ALPHA/2 <= a0 = a mod Q - Q < 0. +* Assumes coefficients to be standard representatives. +* +* Arguments: - polyveck *v1: pointer to output vector of polynomials with +* coefficients a1 +* - polyveck *v0: pointer to output vector of polynomials with +* coefficients a0 +* - const polyveck *v: pointer to input vector +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_poly_decompose(&v1->vec[i], &v0->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyveck_make_hint +* +* Description: Compute hint vector. +* +* Arguments: - polyveck *h: pointer to output vector +* - const polyveck *v0: pointer to low part of input vector +* - const polyveck *v1: pointer to high part of input vector +* +* Returns number of 1 bits. +**************************************************/ +unsigned int PQCLEAN_DILITHIUM2_CLEAN_polyveck_make_hint(polyveck *h, + const polyveck *v0, + const polyveck *v1) { + unsigned int i, s = 0; + + for (i = 0; i < K; ++i) { + s += PQCLEAN_DILITHIUM2_CLEAN_poly_make_hint(&h->vec[i], &v0->vec[i], &v1->vec[i]); + } + + return s; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_polyveck_use_hint +* +* Description: Use hint vector to correct the high bits of input vector. +* +* Arguments: - polyveck *w: pointer to output vector of polynomials with +* corrected high bits +* - const polyveck *u: pointer to input vector +* - const polyveck *h: pointer to input hint vector +**************************************************/ +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_use_hint(polyveck *w, const polyveck *u, const polyveck *h) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_poly_use_hint(&w->vec[i], &u->vec[i], &h->vec[i]); + } +} + +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_pack_w1(uint8_t r[K * POLYW1_PACKEDBYTES], const polyveck *w1) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM2_CLEAN_polyw1_pack(&r[i * POLYW1_PACKEDBYTES], &w1->vec[i]); + } +} diff --git a/crypto_sign/dilithium/dilithium2/clean/polyvec.h b/crypto_sign/dilithium/dilithium2/clean/polyvec.h new file mode 100644 index 00000000..6d0eb473 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/clean/polyvec.h @@ -0,0 +1,68 @@ +#ifndef PQCLEAN_DILITHIUM2_CLEAN_POLYVEC_H +#define PQCLEAN_DILITHIUM2_CLEAN_POLYVEC_H +#include "params.h" +#include "poly.h" +#include + +/* Vectors of polynomials of length L */ +typedef struct { + poly vec[L]; +} polyvecl; + +void PQCLEAN_DILITHIUM2_CLEAN_polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[SEEDBYTES], uint16_t nonce); + +void PQCLEAN_DILITHIUM2_CLEAN_polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce); + +void PQCLEAN_DILITHIUM2_CLEAN_polyvecl_reduce(polyvecl *v); + +void PQCLEAN_DILITHIUM2_CLEAN_polyvecl_freeze(polyvecl *v); + +void PQCLEAN_DILITHIUM2_CLEAN_polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v); + +void PQCLEAN_DILITHIUM2_CLEAN_polyvecl_ntt(polyvecl *v); +void PQCLEAN_DILITHIUM2_CLEAN_polyvecl_invntt_tomont(polyvecl *v); +void PQCLEAN_DILITHIUM2_CLEAN_polyvecl_pointwise_poly_montgomery(polyvecl *r, const poly *a, const polyvecl *v); +void PQCLEAN_DILITHIUM2_CLEAN_polyvecl_pointwise_acc_montgomery(poly *w, + const polyvecl *u, + const polyvecl *v); + + +int PQCLEAN_DILITHIUM2_CLEAN_polyvecl_chknorm(const polyvecl *v, int32_t B); + + + +/* Vectors of polynomials of length K */ +typedef struct { + poly vec[K]; +} polyveck; + +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_uniform_eta(polyveck *v, const uint8_t seed[SEEDBYTES], uint16_t nonce); + +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_reduce(polyveck *v); +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_caddq(polyveck *v); +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_freeze(polyveck *v); + +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_add(polyveck *w, const polyveck *u, const polyveck *v); +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v); +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_shiftl(polyveck *v); + +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_ntt(polyveck *v); +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_invntt_tomont(polyveck *v); +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_pointwise_poly_montgomery(polyveck *r, const poly *a, const polyveck *v); + +int PQCLEAN_DILITHIUM2_CLEAN_polyveck_chknorm(const polyveck *v, int32_t B); + +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v); +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v); +unsigned int PQCLEAN_DILITHIUM2_CLEAN_polyveck_make_hint(polyveck *h, + const polyveck *v0, + const polyveck *v1); +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_use_hint(polyveck *w, const polyveck *u, const polyveck *h); + +void PQCLEAN_DILITHIUM2_CLEAN_polyveck_pack_w1(uint8_t r[K * POLYW1_PACKEDBYTES], const polyveck *w1); + +void PQCLEAN_DILITHIUM2_CLEAN_polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]); + +void PQCLEAN_DILITHIUM2_CLEAN_polyvec_matrix_pointwise_montgomery(polyveck *t, const polyvecl mat[K], const polyvecl *v); + +#endif diff --git a/crypto_sign/dilithium/dilithium2/clean/reduce.c b/crypto_sign/dilithium/dilithium2/clean/reduce.c new file mode 100644 index 00000000..4d3946ff --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/clean/reduce.c @@ -0,0 +1,69 @@ +#include "params.h" +#include "reduce.h" +#include + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_montgomery_reduce +* +* Description: For finite field element a with -2^{31}Q <= a <= Q*2^31, +* compute r \equiv a*2^{-32} (mod Q) such that -Q < r < Q. +* +* Arguments: - int64_t: finite field element a +* +* Returns r. +**************************************************/ +int32_t PQCLEAN_DILITHIUM2_CLEAN_montgomery_reduce(int64_t a) { + int32_t t; + + t = (int32_t)((uint64_t)a * (uint64_t)QINV); + t = (a - (int64_t)t * Q) >> 32; + return t; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_reduce32 +* +* Description: For finite field element a with a <= 2^{31} - 2^{22} - 1, +* compute r \equiv a (mod Q) such that -6283009 <= r <= 6283007. +* +* Arguments: - int32_t: finite field element a +* +* Returns r. +**************************************************/ +int32_t PQCLEAN_DILITHIUM2_CLEAN_reduce32(int32_t a) { + int32_t t; + + t = (a + (1 << 22)) >> 23; + t = a - t * Q; + return t; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_caddq +* +* Description: Add Q if input coefficient is negative. +* +* Arguments: - int32_t: finite field element a +* +* Returns r. +**************************************************/ +int32_t PQCLEAN_DILITHIUM2_CLEAN_caddq(int32_t a) { + a += (a >> 31) & Q; + return a; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_freeze +* +* Description: For finite field element a, compute standard +* representative r = a mod^+ Q. +* +* Arguments: - int32_t: finite field element a +* +* Returns r. +**************************************************/ +int32_t PQCLEAN_DILITHIUM2_CLEAN_freeze(int32_t a) { + a = PQCLEAN_DILITHIUM2_CLEAN_reduce32(a); + a = PQCLEAN_DILITHIUM2_CLEAN_caddq(a); + return a; +} diff --git a/crypto_sign/dilithium/dilithium2/clean/reduce.h b/crypto_sign/dilithium/dilithium2/clean/reduce.h new file mode 100644 index 00000000..41cd3a31 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/clean/reduce.h @@ -0,0 +1,17 @@ +#ifndef PQCLEAN_DILITHIUM2_CLEAN_REDUCE_H +#define PQCLEAN_DILITHIUM2_CLEAN_REDUCE_H +#include "params.h" +#include + +#define MONT (-4186625) // 2^32 % Q +#define QINV 58728449 // q^(-1) mod 2^32 + +int32_t PQCLEAN_DILITHIUM2_CLEAN_montgomery_reduce(int64_t a); + +int32_t PQCLEAN_DILITHIUM2_CLEAN_reduce32(int32_t a); + +int32_t PQCLEAN_DILITHIUM2_CLEAN_caddq(int32_t a); + +int32_t PQCLEAN_DILITHIUM2_CLEAN_freeze(int32_t a); + +#endif diff --git a/crypto_sign/dilithium/dilithium2/clean/rounding.c b/crypto_sign/dilithium/dilithium2/clean/rounding.c new file mode 100644 index 00000000..9b49fb2f --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/clean/rounding.c @@ -0,0 +1,98 @@ +#include "params.h" +#include "rounding.h" +#include + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_power2round +* +* Description: For finite field element a, compute a0, a1 such that +* a mod^+ Q = a1*2^D + a0 with -2^{D-1} < a0 <= 2^{D-1}. +* Assumes a to be standard representative. +* +* Arguments: - int32_t a: input element +* - int32_t *a0: pointer to output element a0 +* +* Returns a1. +**************************************************/ +int32_t PQCLEAN_DILITHIUM2_CLEAN_power2round(int32_t *a0, int32_t a) { + int32_t a1; + + a1 = (a + (1 << (D - 1)) - 1) >> D; + *a0 = a - (a1 << D); + return a1; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_decompose +* +* Description: For finite field element a, compute high and low bits a0, a1 such +* that a mod^+ Q = a1*ALPHA + a0 with -ALPHA/2 < a0 <= ALPHA/2 except +* if a1 = (Q-1)/ALPHA where we set a1 = 0 and +* -ALPHA/2 <= a0 = a mod^+ Q - Q < 0. Assumes a to be standard +* representative. +* +* Arguments: - int32_t a: input element +* - int32_t *a0: pointer to output element a0 +* +* Returns a1. +**************************************************/ +int32_t PQCLEAN_DILITHIUM2_CLEAN_decompose(int32_t *a0, int32_t a) { + int32_t a1; + + a1 = (a + 127) >> 7; + a1 = (a1 * 11275 + (1 << 23)) >> 24; + a1 ^= ((43 - a1) >> 31) & a1; + + *a0 = a - a1 * 2 * GAMMA2; + *a0 -= (((Q - 1) / 2 - *a0) >> 31) & Q; + return a1; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_make_hint +* +* Description: Compute hint bit indicating whether the low bits of the +* input element overflow into the high bits. +* +* Arguments: - int32_t a0: low bits of input element +* - int32_t a1: high bits of input element +* +* Returns 1 if overflow. +**************************************************/ +unsigned int PQCLEAN_DILITHIUM2_CLEAN_make_hint(int32_t a0, int32_t a1) { + if (a0 > GAMMA2 || a0 < -GAMMA2 || (a0 == -GAMMA2 && a1 != 0)) { + return 1; + } + + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_use_hint +* +* Description: Correct high bits according to hint. +* +* Arguments: - int32_t a: input element +* - unsigned int hint: hint bit +* +* Returns corrected high bits. +**************************************************/ +int32_t PQCLEAN_DILITHIUM2_CLEAN_use_hint(int32_t a, unsigned int hint) { + int32_t a0, a1; + + a1 = PQCLEAN_DILITHIUM2_CLEAN_decompose(&a0, a); + if (hint == 0) { + return a1; + } + + if (a0 > 0) { + if (a1 == 43) { + return 0; + } + return a1 + 1; + } + if (a1 == 0) { + return 43; + } + return a1 - 1; +} diff --git a/crypto_sign/dilithium/dilithium2/clean/rounding.h b/crypto_sign/dilithium/dilithium2/clean/rounding.h new file mode 100644 index 00000000..8542a00e --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/clean/rounding.h @@ -0,0 +1,14 @@ +#ifndef PQCLEAN_DILITHIUM2_CLEAN_ROUNDING_H +#define PQCLEAN_DILITHIUM2_CLEAN_ROUNDING_H +#include "params.h" +#include + +int32_t PQCLEAN_DILITHIUM2_CLEAN_power2round(int32_t *a0, int32_t a); + +int32_t PQCLEAN_DILITHIUM2_CLEAN_decompose(int32_t *a0, int32_t a); + +unsigned int PQCLEAN_DILITHIUM2_CLEAN_make_hint(int32_t a0, int32_t a1); + +int32_t PQCLEAN_DILITHIUM2_CLEAN_use_hint(int32_t a, unsigned int hint); + +#endif diff --git a/crypto_sign/dilithium/dilithium2/clean/sign.c b/crypto_sign/dilithium/dilithium2/clean/sign.c new file mode 100644 index 00000000..0ea6d242 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/clean/sign.c @@ -0,0 +1,343 @@ +#include "fips202.h" +#include "packing.h" +#include "params.h" +#include "poly.h" +#include "polyvec.h" +#include "randombytes.h" +#include "sign.h" +#include "symmetric.h" +#include + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_keypair +* +* Description: Generates public and private key. +* +* Arguments: - uint8_t *pk: pointer to output public key (allocated +* array of PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key (allocated +* array of PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_SECRETKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_keypair(uint8_t *pk, uint8_t *sk) { + uint8_t seedbuf[3 * SEEDBYTES]; + uint8_t tr[CRHBYTES]; + const uint8_t *rho, *rhoprime, *key; + polyvecl mat[K]; + polyvecl s1, s1hat; + polyveck s2, t1, t0; + + /* Get randomness for rho, rhoprime and key */ + randombytes(seedbuf, SEEDBYTES); + shake256(seedbuf, 3 * SEEDBYTES, seedbuf, SEEDBYTES); + rho = seedbuf; + rhoprime = seedbuf + SEEDBYTES; + key = seedbuf + 2 * SEEDBYTES; + + /* Expand matrix */ + PQCLEAN_DILITHIUM2_CLEAN_polyvec_matrix_expand(mat, rho); + + /* Sample short vectors s1 and s2 */ + PQCLEAN_DILITHIUM2_CLEAN_polyvecl_uniform_eta(&s1, rhoprime, 0); + PQCLEAN_DILITHIUM2_CLEAN_polyveck_uniform_eta(&s2, rhoprime, L); + + /* Matrix-vector multiplication */ + s1hat = s1; + PQCLEAN_DILITHIUM2_CLEAN_polyvecl_ntt(&s1hat); + PQCLEAN_DILITHIUM2_CLEAN_polyvec_matrix_pointwise_montgomery(&t1, mat, &s1hat); + PQCLEAN_DILITHIUM2_CLEAN_polyveck_reduce(&t1); + PQCLEAN_DILITHIUM2_CLEAN_polyveck_invntt_tomont(&t1); + + /* Add error vector s2 */ + PQCLEAN_DILITHIUM2_CLEAN_polyveck_add(&t1, &t1, &s2); + + /* Extract t1 and write public key */ + PQCLEAN_DILITHIUM2_CLEAN_polyveck_caddq(&t1); + PQCLEAN_DILITHIUM2_CLEAN_polyveck_power2round(&t1, &t0, &t1); + PQCLEAN_DILITHIUM2_CLEAN_pack_pk(pk, rho, &t1); + + /* Compute CRH(rho, t1) and write secret key */ + crh(tr, pk, PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_PUBLICKEYBYTES); + PQCLEAN_DILITHIUM2_CLEAN_pack_sk(sk, rho, tr, key, &t0, &s1, &s2); + + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_signature +* +* Description: Computes signature. +* +* Arguments: - uint8_t *sig: pointer to output signature (of length PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES) +* - size_t *siglen: pointer to output length of signature +* - uint8_t *m: pointer to message to be signed +* - size_t mlen: length of message +* - uint8_t *sk: pointer to bit-packed secret key +* +* Returns 0 (success) +**************************************************/ +int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_signature(uint8_t *sig, + size_t *siglen, + const uint8_t *m, + size_t mlen, + const uint8_t *sk) { + unsigned int n; + uint8_t seedbuf[2 * SEEDBYTES + 3 * CRHBYTES]; + uint8_t *rho, *tr, *key, *mu, *rhoprime; + uint16_t nonce = 0; + polyvecl mat[K], s1, y, z; + polyveck t0, s2, w1, w0, h; + poly cp; + shake256incctx state; + + rho = seedbuf; + tr = rho + SEEDBYTES; + key = tr + CRHBYTES; + mu = key + SEEDBYTES; + rhoprime = mu + CRHBYTES; + PQCLEAN_DILITHIUM2_CLEAN_unpack_sk(rho, tr, key, &t0, &s1, &s2, sk); + + /* Compute CRH(tr, msg) */ + shake256_inc_init(&state); + shake256_inc_absorb(&state, tr, CRHBYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(mu, CRHBYTES, &state); + shake256_inc_ctx_release(&state); + + crh(rhoprime, key, SEEDBYTES + CRHBYTES); + + /* Expand matrix and transform vectors */ + PQCLEAN_DILITHIUM2_CLEAN_polyvec_matrix_expand(mat, rho); + PQCLEAN_DILITHIUM2_CLEAN_polyvecl_ntt(&s1); + PQCLEAN_DILITHIUM2_CLEAN_polyveck_ntt(&s2); + PQCLEAN_DILITHIUM2_CLEAN_polyveck_ntt(&t0); + +rej: + /* Sample intermediate vector y */ + PQCLEAN_DILITHIUM2_CLEAN_polyvecl_uniform_gamma1(&y, rhoprime, nonce++); + + /* Matrix-vector multiplication */ + z = y; + PQCLEAN_DILITHIUM2_CLEAN_polyvecl_ntt(&z); + PQCLEAN_DILITHIUM2_CLEAN_polyvec_matrix_pointwise_montgomery(&w1, mat, &z); + PQCLEAN_DILITHIUM2_CLEAN_polyveck_reduce(&w1); + PQCLEAN_DILITHIUM2_CLEAN_polyveck_invntt_tomont(&w1); + + /* Decompose w and call the random oracle */ + PQCLEAN_DILITHIUM2_CLEAN_polyveck_caddq(&w1); + PQCLEAN_DILITHIUM2_CLEAN_polyveck_decompose(&w1, &w0, &w1); + PQCLEAN_DILITHIUM2_CLEAN_polyveck_pack_w1(sig, &w1); + + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, sig, K * POLYW1_PACKEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(sig, SEEDBYTES, &state); + shake256_inc_ctx_release(&state); + PQCLEAN_DILITHIUM2_CLEAN_poly_challenge(&cp, sig); + PQCLEAN_DILITHIUM2_CLEAN_poly_ntt(&cp); + + /* Compute z, reject if it reveals secret */ + PQCLEAN_DILITHIUM2_CLEAN_polyvecl_pointwise_poly_montgomery(&z, &cp, &s1); + PQCLEAN_DILITHIUM2_CLEAN_polyvecl_invntt_tomont(&z); + PQCLEAN_DILITHIUM2_CLEAN_polyvecl_add(&z, &z, &y); + PQCLEAN_DILITHIUM2_CLEAN_polyvecl_reduce(&z); + if (PQCLEAN_DILITHIUM2_CLEAN_polyvecl_chknorm(&z, GAMMA1 - BETA)) { + goto rej; + } + + /* Check that subtracting cs2 does not change high bits of w and low bits + * do not reveal secret information */ + PQCLEAN_DILITHIUM2_CLEAN_polyveck_pointwise_poly_montgomery(&h, &cp, &s2); + PQCLEAN_DILITHIUM2_CLEAN_polyveck_invntt_tomont(&h); + PQCLEAN_DILITHIUM2_CLEAN_polyveck_sub(&w0, &w0, &h); + PQCLEAN_DILITHIUM2_CLEAN_polyveck_reduce(&w0); + if (PQCLEAN_DILITHIUM2_CLEAN_polyveck_chknorm(&w0, GAMMA2 - BETA)) { + goto rej; + } + + /* Compute hints for w1 */ + PQCLEAN_DILITHIUM2_CLEAN_polyveck_pointwise_poly_montgomery(&h, &cp, &t0); + PQCLEAN_DILITHIUM2_CLEAN_polyveck_invntt_tomont(&h); + PQCLEAN_DILITHIUM2_CLEAN_polyveck_reduce(&h); + if (PQCLEAN_DILITHIUM2_CLEAN_polyveck_chknorm(&h, GAMMA2)) { + goto rej; + } + + PQCLEAN_DILITHIUM2_CLEAN_polyveck_add(&w0, &w0, &h); + n = PQCLEAN_DILITHIUM2_CLEAN_polyveck_make_hint(&h, &w0, &w1); + if (n > OMEGA) { + goto rej; + } + + /* Write signature */ + PQCLEAN_DILITHIUM2_CLEAN_pack_sig(sig, sig, &z, &h); + *siglen = PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES; + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_crypto_sign +* +* Description: Compute signed message. +* +* Arguments: - uint8_t *sm: pointer to output signed message (allocated +* array with PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES + mlen bytes), +* can be equal to m +* - size_t *smlen: pointer to output length of signed +* message +* - const uint8_t *m: pointer to message to be signed +* - size_t mlen: length of message +* - const uint8_t *sk: pointer to bit-packed secret key +* +* Returns 0 (success) +**************************************************/ +int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign(uint8_t *sm, + size_t *smlen, + const uint8_t *m, + size_t mlen, + const uint8_t *sk) { + size_t i; + + for (i = 0; i < mlen; ++i) { + sm[PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES + mlen - 1 - i] = m[mlen - 1 - i]; + } + PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_signature(sm, smlen, sm + PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES, mlen, sk); + *smlen += mlen; + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_verify +* +* Description: Verifies signature. +* +* Arguments: - uint8_t *m: pointer to input signature +* - size_t siglen: length of signature +* - const uint8_t *m: pointer to message +* - size_t mlen: length of message +* - const uint8_t *pk: pointer to bit-packed public key +* +* Returns 0 if signature could be verified correctly and -1 otherwise +**************************************************/ +int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_verify(const uint8_t *sig, + size_t siglen, + const uint8_t *m, + size_t mlen, + const uint8_t *pk) { + unsigned int i; + uint8_t buf[K * POLYW1_PACKEDBYTES]; + uint8_t rho[SEEDBYTES]; + uint8_t mu[CRHBYTES]; + uint8_t c[SEEDBYTES]; + uint8_t c2[SEEDBYTES]; + poly cp; + polyvecl mat[K], z; + polyveck t1, w1, h; + shake256incctx state; + + if (siglen != PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES) { + return -1; + } + + PQCLEAN_DILITHIUM2_CLEAN_unpack_pk(rho, &t1, pk); + if (PQCLEAN_DILITHIUM2_CLEAN_unpack_sig(c, &z, &h, sig)) { + return -1; + } + if (PQCLEAN_DILITHIUM2_CLEAN_polyvecl_chknorm(&z, GAMMA1 - BETA)) { + return -1; + } + + /* Compute CRH(CRH(rho, t1), msg) */ + crh(mu, pk, PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_PUBLICKEYBYTES); + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(mu, CRHBYTES, &state); + shake256_inc_ctx_release(&state); + + /* Matrix-vector multiplication; compute Az - c2^dt1 */ + PQCLEAN_DILITHIUM2_CLEAN_poly_challenge(&cp, c); + PQCLEAN_DILITHIUM2_CLEAN_polyvec_matrix_expand(mat, rho); + + PQCLEAN_DILITHIUM2_CLEAN_polyvecl_ntt(&z); + PQCLEAN_DILITHIUM2_CLEAN_polyvec_matrix_pointwise_montgomery(&w1, mat, &z); + + PQCLEAN_DILITHIUM2_CLEAN_poly_ntt(&cp); + PQCLEAN_DILITHIUM2_CLEAN_polyveck_shiftl(&t1); + PQCLEAN_DILITHIUM2_CLEAN_polyveck_ntt(&t1); + PQCLEAN_DILITHIUM2_CLEAN_polyveck_pointwise_poly_montgomery(&t1, &cp, &t1); + + PQCLEAN_DILITHIUM2_CLEAN_polyveck_sub(&w1, &w1, &t1); + PQCLEAN_DILITHIUM2_CLEAN_polyveck_reduce(&w1); + PQCLEAN_DILITHIUM2_CLEAN_polyveck_invntt_tomont(&w1); + + /* Reconstruct w1 */ + PQCLEAN_DILITHIUM2_CLEAN_polyveck_caddq(&w1); + PQCLEAN_DILITHIUM2_CLEAN_polyveck_use_hint(&w1, &w1, &h); + PQCLEAN_DILITHIUM2_CLEAN_polyveck_pack_w1(buf, &w1); + + /* Call random oracle and verify PQCLEAN_DILITHIUM2_CLEAN_challenge */ + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, buf, K * POLYW1_PACKEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(c2, SEEDBYTES, &state); + shake256_inc_ctx_release(&state); + for (i = 0; i < SEEDBYTES; ++i) { + if (c[i] != c2[i]) { + return -1; + } + } + + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_open +* +* Description: Verify signed message. +* +* Arguments: - uint8_t *m: pointer to output message (allocated +* array with smlen bytes), can be equal to sm +* - size_t *mlen: pointer to output length of message +* - const uint8_t *sm: pointer to signed message +* - size_t smlen: length of signed message +* - const uint8_t *pk: pointer to bit-packed public key +* +* Returns 0 if signed message could be verified correctly and -1 otherwise +**************************************************/ +int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_open(uint8_t *m, + size_t *mlen, + const uint8_t *sm, + size_t smlen, + const uint8_t *pk) { + size_t i; + + if (smlen < PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES) { + goto badsig; + } + + *mlen = smlen - PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES; + if (PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_verify(sm, PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES, sm + PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES, *mlen, pk)) { + goto badsig; + } else { + /* All good, copy msg, return 0 */ + for (i = 0; i < *mlen; ++i) { + m[i] = sm[PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES + i]; + } + return 0; + } + +badsig: + /* Signature verification failed */ + *mlen = (size_t) -1; + for (i = 0; i < smlen; ++i) { + m[i] = 0; + } + + return -1; +} diff --git a/crypto_sign/dilithium/dilithium2/clean/sign.h b/crypto_sign/dilithium/dilithium2/clean/sign.h new file mode 100644 index 00000000..3b151d58 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/clean/sign.h @@ -0,0 +1,29 @@ +#ifndef PQCLEAN_DILITHIUM2_CLEAN_SIGN_H +#define PQCLEAN_DILITHIUM2_CLEAN_SIGN_H +#include "params.h" +#include "poly.h" +#include "polyvec.h" +#include +#include + +void PQCLEAN_DILITHIUM2_CLEAN_challenge(poly *c, const uint8_t seed[SEEDBYTES]); + +int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_keypair(uint8_t *pk, uint8_t *sk); + +int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + +#endif diff --git a/crypto_sign/dilithium/dilithium2/clean/symmetric-shake.c b/crypto_sign/dilithium/dilithium2/clean/symmetric-shake.c new file mode 100644 index 00000000..1decd901 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/clean/symmetric-shake.c @@ -0,0 +1,26 @@ +#include "fips202.h" +#include "params.h" +#include "symmetric.h" +#include + +void PQCLEAN_DILITHIUM2_CLEAN_dilithium_shake128_stream_init(shake128incctx *state, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + uint8_t t[2]; + t[0] = (uint8_t) nonce; + t[1] = (uint8_t) (nonce >> 8); + + shake128_inc_init(state); + shake128_inc_absorb(state, seed, SEEDBYTES); + shake128_inc_absorb(state, t, 2); + shake128_inc_finalize(state); +} + +void PQCLEAN_DILITHIUM2_CLEAN_dilithium_shake256_stream_init(shake256incctx *state, const uint8_t seed[CRHBYTES], uint16_t nonce) { + uint8_t t[2]; + t[0] = (uint8_t) nonce; + t[1] = (uint8_t) (nonce >> 8); + + shake256_inc_init(state); + shake256_inc_absorb(state, seed, CRHBYTES); + shake256_inc_absorb(state, t, 2); + shake256_inc_finalize(state); +} diff --git a/crypto_sign/dilithium/dilithium2/clean/symmetric.h b/crypto_sign/dilithium/dilithium2/clean/symmetric.h new file mode 100644 index 00000000..cbbb11d4 --- /dev/null +++ b/crypto_sign/dilithium/dilithium2/clean/symmetric.h @@ -0,0 +1,36 @@ +#ifndef PQCLEAN_DILITHIUM2_CLEAN_SYMMETRIC_H +#define PQCLEAN_DILITHIUM2_CLEAN_SYMMETRIC_H +#include "fips202.h" +#include "params.h" +#include + + + +typedef shake128incctx stream128_state; +typedef shake256incctx stream256_state; + +void PQCLEAN_DILITHIUM2_CLEAN_dilithium_shake128_stream_init(shake128incctx *state, + const uint8_t seed[SEEDBYTES], + uint16_t nonce); + +void PQCLEAN_DILITHIUM2_CLEAN_dilithium_shake256_stream_init(shake256incctx *state, + const uint8_t seed[CRHBYTES], + uint16_t nonce); + +#define STREAM128_BLOCKBYTES SHAKE128_RATE +#define STREAM256_BLOCKBYTES SHAKE256_RATE + +#define crh(OUT, IN, INBYTES) shake256(OUT, CRHBYTES, IN, INBYTES) +#define stream128_init(STATE, SEED, NONCE) \ + PQCLEAN_DILITHIUM2_CLEAN_dilithium_shake128_stream_init(STATE, SEED, NONCE) +#define stream128_squeezeblocks(OUT, OUTBLOCKS, STATE) \ + shake128_inc_squeeze(OUT, (OUTBLOCKS)*(SHAKE128_RATE), STATE) +#define stream128_release(STATE) shake128_inc_ctx_release(STATE) +#define stream256_init(STATE, SEED, NONCE) \ + PQCLEAN_DILITHIUM2_CLEAN_dilithium_shake256_stream_init(STATE, SEED, NONCE) +#define stream256_squeezeblocks(OUT, OUTBLOCKS, STATE) \ + shake256_inc_squeeze(OUT, (OUTBLOCKS)*(SHAKE256_RATE), STATE) +#define stream256_release(STATE) shake256_inc_ctx_release(STATE) + + +#endif diff --git a/crypto_sign/dilithium/dilithium3/META.yml b/crypto_sign/dilithium/dilithium3/META.yml new file mode 100644 index 00000000..3b84f249 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/META.yml @@ -0,0 +1,31 @@ +name: Dilithium3 +type: signature +claimed-nist-level: 3 +length-public-key: 1952 +length-secret-key: 4016 +length-signature: 3293 +nistkat-sha256: d0d4bb6945e14206d17b52f8a395d5a750ec8a73f2ea06b9f1cd226d225a9bfb +testvectors-sha256: 531b85dbecaeaf135ad9004c8e2d5ce163b8e72d9c3a537e15bd383cf5f38aa4 +principal-submitters: + - Vadim Lyubashevsky +auxiliary-submitters: + - Léo Ducas + - Eike Kiltz + - Tancrède Lepoint + - Peter Schwabe + - Gregor Seiler + - Damien Stehlé +implementations: + - name: clean + version: https://github.com/pq-crystals/dilithium/commit/1e63a1e880401166f105ab44ec67464c9714a315 via https://github.com/jschanck/package-pqclean/tree/b158a891/dilithium + - name: avx2 + version: https://github.com/pq-crystals/dilithium/commit/1e63a1e880401166f105ab44ec67464c9714a315 via https://github.com/jschanck/package-pqclean/tree/b158a891/dilithium + supported_platforms: + - architecture: x86_64 + operating_systems: + - Linux + - Darwin + required_flags: + - aes + - avx2 + - popcnt diff --git a/crypto_sign/dilithium/dilithium3/avx2/LICENSE b/crypto_sign/dilithium/dilithium3/avx2/LICENSE new file mode 100644 index 00000000..08473af7 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/LICENSE @@ -0,0 +1,5 @@ +Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/) + +For Keccak and AES we are using public-domain +code from sources and by authors listed in +comments on top of the respective files. diff --git a/crypto_sign/dilithium/dilithium3/avx2/align.h b/crypto_sign/dilithium/dilithium3/avx2/align.h new file mode 100644 index 00000000..668db1c0 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/align.h @@ -0,0 +1,19 @@ +#ifndef PQCLEAN_DILITHIUM3_AVX2_ALIGN_H +#define PQCLEAN_DILITHIUM3_AVX2_ALIGN_H + +#include +#include + +#define ALIGNED_UINT8(N) \ + union { \ + uint8_t coeffs[N]; \ + __m256i vec[((N)+31)/32]; \ + } + +#define ALIGNED_INT32(N) \ + union { \ + int32_t coeffs[N]; \ + __m256i vec[((N)+7)/8]; \ + } + +#endif diff --git a/crypto_sign/dilithium/dilithium3/avx2/api.h b/crypto_sign/dilithium/dilithium3/avx2/api.h new file mode 100644 index 00000000..f6cbffa8 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/api.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_DILITHIUM3_AVX2_API_H +#define PQCLEAN_DILITHIUM3_AVX2_API_H + +#include +#include + +#define PQCLEAN_DILITHIUM3_AVX2_CRYPTO_PUBLICKEYBYTES 1952 +#define PQCLEAN_DILITHIUM3_AVX2_CRYPTO_SECRETKEYBYTES 4016 +#define PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES 3293 + +#define PQCLEAN_DILITHIUM3_AVX2_CRYPTO_ALGNAME "Dilithium3" + + +int PQCLEAN_DILITHIUM3_AVX2_crypto_sign_keypair(uint8_t *pk, uint8_t *sk); + +int PQCLEAN_DILITHIUM3_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +int PQCLEAN_DILITHIUM3_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +int PQCLEAN_DILITHIUM3_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +int PQCLEAN_DILITHIUM3_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/dilithium/dilithium3/avx2/cdecl.h b/crypto_sign/dilithium/dilithium3/avx2/cdecl.h new file mode 100644 index 00000000..e1b6605e --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/cdecl.h @@ -0,0 +1,24 @@ +#ifndef PQCLEAN_DILITHIUM3_AVX2_CDECL_H +#define PQCLEAN_DILITHIUM3_AVX2_CDECL_H + + + +#define _8XQ 0 +#define _8XQINV 8 +#define _8XDIV_QINV 16 +#define _8XDIV 24 +#define _ZETAS_QINV 32 +#define _ZETAS 328 + +/* The C ABI on MacOS exports all symbols with a leading + * underscore. This means that any symbols we refer to from + * C files (functions) can't be found, and all symbols we + * refer to from ASM also can't be found (nttconsts.c). + * + * This define helps us get around this + */ + +#define _cdecl(s) _##s +#define cdecl(s) s + +#endif diff --git a/crypto_sign/dilithium/dilithium3/avx2/consts.c b/crypto_sign/dilithium/dilithium3/avx2/consts.c new file mode 100644 index 00000000..757fa122 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/consts.c @@ -0,0 +1,101 @@ +#include "consts.h" +#include "params.h" +#include + +#define QINV 58728449 // q^(-1) mod 2^32 +#define MONT (-4186625) // 2^32 mod q +#define DIV 41978 // mont^2/256 +#define DIV_QINV (-8395782) + +const qdata_t PQCLEAN_DILITHIUM3_AVX2_qdata = {{ +//#define _8XQ 0 + Q, Q, Q, Q, Q, Q, Q, Q, + +//#define _8XQINV 8 + QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV, + +//#define _8XDIV_QINV 16 + DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, + +//#define _8XDIV 24 + DIV, DIV, DIV, DIV, DIV, DIV, DIV, DIV, + +//#define _ZETAS_QINV 32 + -151046689, 1830765815, -1929875198, -1927777021, 1640767044, 1477910808, 1612161320, 1640734244, + 308362795, 308362795, 308362795, 308362795, -1815525077, -1815525077, -1815525077, -1815525077, + -1374673747, -1374673747, -1374673747, -1374673747, -1091570561, -1091570561, -1091570561, -1091570561, + -1929495947, -1929495947, -1929495947, -1929495947, 515185417, 515185417, 515185417, 515185417, + -285697463, -285697463, -285697463, -285697463, 625853735, 625853735, 625853735, 625853735, + 1727305304, 1727305304, 2082316400, 2082316400, -1364982364, -1364982364, 858240904, 858240904, + 1806278032, 1806278032, 222489248, 222489248, -346752664, -346752664, 684667771, 684667771, + 1654287830, 1654287830, -878576921, -878576921, -1257667337, -1257667337, -748618600, -748618600, + 329347125, 329347125, 1837364258, 1837364258, -1443016191, -1443016191, -1170414139, -1170414139, + -1846138265, -1631226336, -1404529459, 1838055109, 1594295555, -1076973524, -1898723372, -594436433, + -202001019, -475984260, -561427818, 1797021249, -1061813248, 2059733581, -1661512036, -1104976547, + -1750224323, -901666090, 418987550, 1831915353, -1925356481, 992097815, 879957084, 2024403852, + 1484874664, -1636082790, -285388938, -1983539117, -1495136972, -950076368, -1714807468, -952438995, + -1574918427, 1350681039, -2143979939, 1599739335, -1285853323, -993005454, -1440787840, 568627424, + -783134478, -588790216, 289871779, -1262003603, 2135294594, -1018755525, -889861155, 1665705315, + 1321868265, 1225434135, -1784632064, 666258756, 675310538, -1555941048, -1999506068, -1499481951, + -695180180, -1375177022, 1777179795, 334803717, -178766299, -518252220, 1957047970, 1146323031, + -654783359, -1974159335, 1651689966, 140455867, -1039411342, 1955560694, 1529189038, -2131021878, + -247357819, 1518161567, -86965173, 1708872713, 1787797779, 1638590967, -120646188, -1669960606, + -916321552, 1155548552, 2143745726, 1210558298, -1261461890, -318346816, 628664287, -1729304568, + 1422575624, 1424130038, -1185330464, 235321234, 168022240, 1206536194, 985155484, -894060583, + -898413, -1363460238, -605900043, 2027833504, 14253662, 1014493059, 863641633, 1819892093, + 2124962073, -1223601433, -1920467227, -1637785316, -1536588520, 694382729, 235104446, -1045062172, + 831969619, -300448763, 756955444, -260312805, 1554794072, 1339088280, -2040058690, -853476187, + -2047270596, -1723816713, -1591599803, -440824168, 1119856484, 1544891539, 155290192, -973777462, + 991903578, 912367099, -44694137, 1176904444, -421552614, -818371958, 1747917558, -325927722, + 908452108, 1851023419, -1176751719, -1354528380, -72690498, -314284737, 985022747, 963438279, + -1078959975, 604552167, -1021949428, 608791570, 173440395, -2126092136, -1316619236, -1039370342, + 6087993, -110126092, 565464272, -1758099917, -1600929361, 879867909, -1809756372, 400711272, + 1363007700, 30313375, -326425360, 1683520342, -517299994, 2027935492, -1372618620, 128353682, + -1123881663, 137583815, -635454918, -642772911, 45766801, 671509323, -2070602178, 419615363, + 1216882040, -270590488, -1276805128, 371462360, -1357098057, -384158533, 827959816, -596344473, + 702390549, -279505433, -260424530, -71875110, -1208667171, -1499603926, 2036925262, -540420426, + 746144248, -1420958686, 2032221021, 1904936414, 1257750362, 1926727420, 1931587462, 1258381762, + 885133339, 1629985060, 1967222129, 6363718, -1287922800, 1136965286, 1779436847, 1116720494, + 1042326957, 1405999311, 713994583, 940195359, -1542497137, 2061661095, -883155599, 1726753853, + -1547952704, 394851342, 283780712, 776003547, 1123958025, 201262505, 1934038751, 374860238, + +//#define _ZETAS 328 + -3975713, 25847, -2608894, -518909, 237124, -777960, -876248, 466468, + 1826347, 1826347, 1826347, 1826347, 2353451, 2353451, 2353451, 2353451, + -359251, -359251, -359251, -359251, -2091905, -2091905, -2091905, -2091905, + 3119733, 3119733, 3119733, 3119733, -2884855, -2884855, -2884855, -2884855, + 3111497, 3111497, 3111497, 3111497, 2680103, 2680103, 2680103, 2680103, + 2725464, 2725464, 1024112, 1024112, -1079900, -1079900, 3585928, 3585928, + -549488, -549488, -1119584, -1119584, 2619752, 2619752, -2108549, -2108549, + -2118186, -2118186, -3859737, -3859737, -1399561, -1399561, -3277672, -3277672, + 1757237, 1757237, -19422, -19422, 4010497, 4010497, 280005, 280005, + 2706023, 95776, 3077325, 3530437, -1661693, -3592148, -2537516, 3915439, + -3861115, -3043716, 3574422, -2867647, 3539968, -300467, 2348700, -539299, + -1699267, -1643818, 3505694, -3821735, 3507263, -2140649, -1600420, 3699596, + 811944, 531354, 954230, 3881043, 3900724, -2556880, 2071892, -2797779, + -3930395, -3677745, -1452451, 2176455, -1257611, -4083598, -3190144, -3632928, + 3412210, 2147896, -2967645, -411027, -671102, -22981, -381987, 1852771, + -3343383, 508951, 44288, 904516, -3724342, 1653064, 2389356, 759969, + 189548, 3159746, -2409325, 1315589, 1285669, -812732, -3019102, -3628969, + -1528703, -3041255, 3475950, -1585221, 1939314, -1000202, -3157330, 126922, + -983419, 2715295, -3693493, -2477047, -1228525, -1308169, 1349076, -1430430, + 264944, 3097992, -1100098, 3958618, -8578, -3249728, -210977, -1316856, + -3553272, -1851402, -177440, 1341330, -1584928, -1439742, -3881060, 3839961, + 2091667, -3342478, 266997, -3520352, 900702, 495491, -655327, -3556995, + 342297, 3437287, 2842341, 4055324, -3767016, -2994039, -1333058, -451100, + -1279661, 1500165, -542412, -2584293, -2013608, 1957272, -3183426, 810149, + -3038916, 2213111, -426683, -1667432, -2939036, 183443, -554416, 3937738, + 3407706, 2244091, 2434439, -3759364, 1859098, -1613174, -3122442, -525098, + 286988, -3342277, 2691481, 1247620, 1250494, 1869119, 1237275, 1312455, + 1917081, 777191, -2831860, -3724270, 2432395, 3369112, 162844, 1652634, + 3523897, -975884, 1723600, -1104333, -2235985, -976891, 3919660, 1400424, + 2316500, -2446433, -1235728, -1197226, 909542, -43260, 2031748, -768622, + -2437823, 1735879, -2590150, 2486353, 2635921, 1903435, -3318210, 3306115, + -2546312, 2235880, -1671176, 594136, 2454455, 185531, 1616392, -3694233, + 3866901, 1717735, -1803090, -260646, -420899, 1612842, -48306, -846154, + 3817976, -3562462, 3513181, -3193378, 819034, -522500, 3207046, -3595838, + 4108315, 203044, 1265009, 1595974, -3548272, -1050970, -1430225, -1962642, + -1374803, 3406031, -1846953, -3776993, -164721, -1207385, 3014001, -1799107, + 269760, 472078, 1910376, -3833893, -2286327, -3545687, -1362209, 1976782, + } +}; diff --git a/crypto_sign/dilithium/dilithium3/avx2/consts.h b/crypto_sign/dilithium/dilithium3/avx2/consts.h new file mode 100644 index 00000000..d244c443 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/consts.h @@ -0,0 +1,10 @@ +#ifndef PQCLEAN_DILITHIUM3_AVX2_CONSTS_H +#define PQCLEAN_DILITHIUM3_AVX2_CONSTS_H +#include "align.h" +#include "cdecl.h" + + +typedef ALIGNED_INT32(624) qdata_t; +extern const qdata_t PQCLEAN_DILITHIUM3_AVX2_qdata; + +#endif diff --git a/crypto_sign/dilithium/dilithium3/avx2/f1600x4.S b/crypto_sign/dilithium/dilithium3/avx2/f1600x4.S new file mode 100644 index 00000000..1aedd83f --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/f1600x4.S @@ -0,0 +1,909 @@ +/* Taken from Bas Westerbaan's new 4-way SHAKE implementation + * for Sphincs+ (https://github.com/sphincs/sphincsplus/pull/14/), + * but uses vpshufb for byte-granular rotations as in the Keccak Code Package. */ + +#include "cdecl.h" + +.data +.p2align 5 +rho8: +.byte 7,0,1,2,3,4,5,6,15,8,9,10,11,12,13,14,7,0,1,2,3,4,5,6,15,8,9,10,11,12,13,14 +rho56: +.byte 1,2,3,4,5,6,7,0,9,10,11,12,13,14,15,8,1,2,3,4,5,6,7,0,9,10,11,12,13,14,15,8 + +.text +.global cdecl(PQCLEAN_DILITHIUM3_AVX2_f1600x4) +.global _cdecl(PQCLEAN_DILITHIUM3_AVX2_f1600x4) +cdecl(PQCLEAN_DILITHIUM3_AVX2_f1600x4): +_cdecl(PQCLEAN_DILITHIUM3_AVX2_f1600x4): +vmovdqa rho8(%rip), %ymm0 +movq $6, %rax +looptop: +vmovdqa 0(%rdi), %ymm8 +vmovdqa 32(%rdi), %ymm9 +vmovdqa 64(%rdi), %ymm10 +vmovdqa 96(%rdi), %ymm11 +vmovdqa 128(%rdi), %ymm12 +vpxor 160(%rdi), %ymm8, %ymm8 +vpxor 192(%rdi), %ymm9, %ymm9 +vpxor 224(%rdi), %ymm10, %ymm10 +vpxor 256(%rdi), %ymm11, %ymm11 +vpxor 288(%rdi), %ymm12, %ymm12 +vpxor 320(%rdi), %ymm8, %ymm8 +vpxor 352(%rdi), %ymm9, %ymm9 +vpxor 384(%rdi), %ymm10, %ymm10 +vpxor 416(%rdi), %ymm11, %ymm11 +vpxor 448(%rdi), %ymm12, %ymm12 +vpxor 480(%rdi), %ymm8, %ymm8 +vpxor 512(%rdi), %ymm9, %ymm9 +vpxor 544(%rdi), %ymm10, %ymm10 +vpxor 576(%rdi), %ymm11, %ymm11 +vpxor 608(%rdi), %ymm12, %ymm12 +vpxor 640(%rdi), %ymm8, %ymm8 +vpxor 672(%rdi), %ymm9, %ymm9 +vpxor 704(%rdi), %ymm10, %ymm10 +vpxor 736(%rdi), %ymm11, %ymm11 +vpxor 768(%rdi), %ymm12, %ymm12 +vpsllq $1, %ymm9, %ymm13 +vpsllq $1, %ymm10, %ymm14 +vpsllq $1, %ymm11, %ymm15 +vpsllq $1, %ymm12, %ymm7 +vpsllq $1, %ymm8, %ymm6 +vpsrlq $63, %ymm9, %ymm5 +vpsrlq $63, %ymm10, %ymm4 +vpsrlq $63, %ymm11, %ymm3 +vpsrlq $63, %ymm12, %ymm2 +vpsrlq $63, %ymm8, %ymm1 +vpor %ymm13, %ymm5, %ymm5 +vpor %ymm14, %ymm4, %ymm4 +vpor %ymm15, %ymm3, %ymm3 +vpor %ymm7, %ymm2, %ymm2 +vpor %ymm6, %ymm1, %ymm1 +vpxor %ymm5, %ymm12, %ymm5 +vpxor %ymm4, %ymm8, %ymm4 +vpxor %ymm3, %ymm9, %ymm3 +vpxor %ymm2, %ymm10, %ymm2 +vpxor %ymm1, %ymm11, %ymm1 +vpxor 0(%rdi), %ymm5, %ymm8 +vpxor 192(%rdi), %ymm4, %ymm9 +vpxor 384(%rdi), %ymm3, %ymm10 +vpxor 576(%rdi), %ymm2, %ymm11 +vpxor 768(%rdi), %ymm1, %ymm12 +vpsllq $44, %ymm9, %ymm14 +vpsllq $43, %ymm10, %ymm15 +vpsllq $21, %ymm11, %ymm7 +vpsllq $14, %ymm12, %ymm6 +vpsrlq $20, %ymm9, %ymm9 +vpsrlq $21, %ymm10, %ymm10 +vpsrlq $43, %ymm11, %ymm11 +vpsrlq $50, %ymm12, %ymm12 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vpbroadcastq 0(%rsi), %ymm8 +vpxor %ymm8, %ymm13, %ymm13 +vmovdqa %ymm13, 0(%rdi) +vmovdqa %ymm14, 192(%rdi) +vmovdqa %ymm15, 384(%rdi) +vmovdqa %ymm7, 576(%rdi) +vmovdqa %ymm6, 768(%rdi) +vpxor 96(%rdi), %ymm2, %ymm8 +vpxor 288(%rdi), %ymm1, %ymm9 +vpxor 320(%rdi), %ymm5, %ymm10 +vpxor 512(%rdi), %ymm4, %ymm11 +vpxor 704(%rdi), %ymm3, %ymm12 +vpsllq $28, %ymm8, %ymm13 +vpsllq $20, %ymm9, %ymm14 +vpsllq $3, %ymm10, %ymm15 +vpsllq $45, %ymm11, %ymm7 +vpsllq $61, %ymm12, %ymm6 +vpsrlq $36, %ymm8, %ymm8 +vpsrlq $44, %ymm9, %ymm9 +vpsrlq $61, %ymm10, %ymm10 +vpsrlq $19, %ymm11, %ymm11 +vpsrlq $3, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 320(%rdi) +vmovdqa %ymm14, 512(%rdi) +vmovdqa %ymm15, 704(%rdi) +vmovdqa %ymm7, 96(%rdi) +vmovdqa %ymm6, 288(%rdi) +vpxor 32(%rdi), %ymm4, %ymm8 +vpxor 224(%rdi), %ymm3, %ymm9 +vpxor 416(%rdi), %ymm2, %ymm10 +vpxor 608(%rdi), %ymm1, %ymm11 +vpxor 640(%rdi), %ymm5, %ymm12 +vpsllq $1, %ymm8, %ymm13 +vpsllq $6, %ymm9, %ymm14 +vpsllq $25, %ymm10, %ymm15 +#vpsllq $8, %ymm11, %ymm7 +vpsllq $18, %ymm12, %ymm6 +vpsrlq $63, %ymm8, %ymm8 +vpsrlq $58, %ymm9, %ymm9 +vpsrlq $39, %ymm10, %ymm10 +#vpsrlq $56, %ymm11, %ymm11 +vpsrlq $46, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +#vpor %ymm7, %ymm11, %ymm11 +vpshufb %ymm0, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 640(%rdi) +vmovdqa %ymm14, 32(%rdi) +vmovdqa %ymm15, 224(%rdi) +vmovdqa %ymm7, 416(%rdi) +vmovdqa %ymm6, 608(%rdi) +vpxor 128(%rdi), %ymm1, %ymm8 +vpxor 160(%rdi), %ymm5, %ymm9 +vpxor 352(%rdi), %ymm4, %ymm10 +vpxor 544(%rdi), %ymm3, %ymm11 +vpxor 736(%rdi), %ymm2, %ymm12 +vpsllq $27, %ymm8, %ymm13 +vpsllq $36, %ymm9, %ymm14 +vpsllq $10, %ymm10, %ymm15 +vpsllq $15, %ymm11, %ymm7 +#vpsllq $56, %ymm12, %ymm6 +vpsrlq $37, %ymm8, %ymm8 +vpsrlq $28, %ymm9, %ymm9 +vpsrlq $54, %ymm10, %ymm10 +vpsrlq $49, %ymm11, %ymm11 +#vpsrlq $8, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +#vpor %ymm6, %ymm12, %ymm12 +vpshufb rho56(%rip), %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 160(%rdi) +vmovdqa %ymm14, 352(%rdi) +vmovdqa %ymm15, 544(%rdi) +vmovdqa %ymm7, 736(%rdi) +vmovdqa %ymm6, 128(%rdi) +vpxor 64(%rdi), %ymm3, %ymm8 +vpxor 256(%rdi), %ymm2, %ymm9 +vpxor 448(%rdi), %ymm1, %ymm10 +vpxor 480(%rdi), %ymm5, %ymm11 +vpxor 672(%rdi), %ymm4, %ymm12 +vpsllq $62, %ymm8, %ymm13 +vpsllq $55, %ymm9, %ymm14 +vpsllq $39, %ymm10, %ymm15 +vpsllq $41, %ymm11, %ymm7 +vpsllq $2, %ymm12, %ymm6 +vpsrlq $2, %ymm8, %ymm8 +vpsrlq $9, %ymm9, %ymm9 +vpsrlq $25, %ymm10, %ymm10 +vpsrlq $23, %ymm11, %ymm11 +vpsrlq $62, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 480(%rdi) +vmovdqa %ymm14, 672(%rdi) +vmovdqa %ymm15, 64(%rdi) +vmovdqa %ymm7, 256(%rdi) +vmovdqa %ymm6, 448(%rdi) +vmovdqa 0(%rdi), %ymm8 +vmovdqa 32(%rdi), %ymm9 +vmovdqa 64(%rdi), %ymm10 +vmovdqa 96(%rdi), %ymm11 +vmovdqa 128(%rdi), %ymm12 +vpxor 160(%rdi), %ymm8, %ymm8 +vpxor 192(%rdi), %ymm9, %ymm9 +vpxor 224(%rdi), %ymm10, %ymm10 +vpxor 256(%rdi), %ymm11, %ymm11 +vpxor 288(%rdi), %ymm12, %ymm12 +vpxor 320(%rdi), %ymm8, %ymm8 +vpxor 352(%rdi), %ymm9, %ymm9 +vpxor 384(%rdi), %ymm10, %ymm10 +vpxor 416(%rdi), %ymm11, %ymm11 +vpxor 448(%rdi), %ymm12, %ymm12 +vpxor 480(%rdi), %ymm8, %ymm8 +vpxor 512(%rdi), %ymm9, %ymm9 +vpxor 544(%rdi), %ymm10, %ymm10 +vpxor 576(%rdi), %ymm11, %ymm11 +vpxor 608(%rdi), %ymm12, %ymm12 +vpxor 640(%rdi), %ymm8, %ymm8 +vpxor 672(%rdi), %ymm9, %ymm9 +vpxor 704(%rdi), %ymm10, %ymm10 +vpxor 736(%rdi), %ymm11, %ymm11 +vpxor 768(%rdi), %ymm12, %ymm12 +vpsllq $1, %ymm9, %ymm13 +vpsllq $1, %ymm10, %ymm14 +vpsllq $1, %ymm11, %ymm15 +vpsllq $1, %ymm12, %ymm7 +vpsllq $1, %ymm8, %ymm6 +vpsrlq $63, %ymm9, %ymm5 +vpsrlq $63, %ymm10, %ymm4 +vpsrlq $63, %ymm11, %ymm3 +vpsrlq $63, %ymm12, %ymm2 +vpsrlq $63, %ymm8, %ymm1 +vpor %ymm13, %ymm5, %ymm5 +vpor %ymm14, %ymm4, %ymm4 +vpor %ymm15, %ymm3, %ymm3 +vpor %ymm7, %ymm2, %ymm2 +vpor %ymm6, %ymm1, %ymm1 +vpxor %ymm5, %ymm12, %ymm5 +vpxor %ymm4, %ymm8, %ymm4 +vpxor %ymm3, %ymm9, %ymm3 +vpxor %ymm2, %ymm10, %ymm2 +vpxor %ymm1, %ymm11, %ymm1 +vpxor 0(%rdi), %ymm5, %ymm8 +vpxor 512(%rdi), %ymm4, %ymm9 +vpxor 224(%rdi), %ymm3, %ymm10 +vpxor 736(%rdi), %ymm2, %ymm11 +vpxor 448(%rdi), %ymm1, %ymm12 +vpsllq $44, %ymm9, %ymm14 +vpsllq $43, %ymm10, %ymm15 +vpsllq $21, %ymm11, %ymm7 +vpsllq $14, %ymm12, %ymm6 +vpsrlq $20, %ymm9, %ymm9 +vpsrlq $21, %ymm10, %ymm10 +vpsrlq $43, %ymm11, %ymm11 +vpsrlq $50, %ymm12, %ymm12 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vpbroadcastq 8(%rsi), %ymm8 +vpxor %ymm8, %ymm13, %ymm13 +vmovdqa %ymm13, 0(%rdi) +vmovdqa %ymm14, 512(%rdi) +vmovdqa %ymm15, 224(%rdi) +vmovdqa %ymm7, 736(%rdi) +vmovdqa %ymm6, 448(%rdi) +vpxor 576(%rdi), %ymm2, %ymm8 +vpxor 288(%rdi), %ymm1, %ymm9 +vpxor 640(%rdi), %ymm5, %ymm10 +vpxor 352(%rdi), %ymm4, %ymm11 +vpxor 64(%rdi), %ymm3, %ymm12 +vpsllq $28, %ymm8, %ymm13 +vpsllq $20, %ymm9, %ymm14 +vpsllq $3, %ymm10, %ymm15 +vpsllq $45, %ymm11, %ymm7 +vpsllq $61, %ymm12, %ymm6 +vpsrlq $36, %ymm8, %ymm8 +vpsrlq $44, %ymm9, %ymm9 +vpsrlq $61, %ymm10, %ymm10 +vpsrlq $19, %ymm11, %ymm11 +vpsrlq $3, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 640(%rdi) +vmovdqa %ymm14, 352(%rdi) +vmovdqa %ymm15, 64(%rdi) +vmovdqa %ymm7, 576(%rdi) +vmovdqa %ymm6, 288(%rdi) +vpxor 192(%rdi), %ymm4, %ymm8 +vpxor 704(%rdi), %ymm3, %ymm9 +vpxor 416(%rdi), %ymm2, %ymm10 +vpxor 128(%rdi), %ymm1, %ymm11 +vpxor 480(%rdi), %ymm5, %ymm12 +vpsllq $1, %ymm8, %ymm13 +vpsllq $6, %ymm9, %ymm14 +vpsllq $25, %ymm10, %ymm15 +#vpsllq $8, %ymm11, %ymm7 +vpsllq $18, %ymm12, %ymm6 +vpsrlq $63, %ymm8, %ymm8 +vpsrlq $58, %ymm9, %ymm9 +vpsrlq $39, %ymm10, %ymm10 +#vpsrlq $56, %ymm11, %ymm11 +vpsrlq $46, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +#vpor %ymm7, %ymm11, %ymm11 +vpshufb %ymm0, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 480(%rdi) +vmovdqa %ymm14, 192(%rdi) +vmovdqa %ymm15, 704(%rdi) +vmovdqa %ymm7, 416(%rdi) +vmovdqa %ymm6, 128(%rdi) +vpxor 768(%rdi), %ymm1, %ymm8 +vpxor 320(%rdi), %ymm5, %ymm9 +vpxor 32(%rdi), %ymm4, %ymm10 +vpxor 544(%rdi), %ymm3, %ymm11 +vpxor 256(%rdi), %ymm2, %ymm12 +vpsllq $27, %ymm8, %ymm13 +vpsllq $36, %ymm9, %ymm14 +vpsllq $10, %ymm10, %ymm15 +vpsllq $15, %ymm11, %ymm7 +#vpsllq $56, %ymm12, %ymm6 +vpsrlq $37, %ymm8, %ymm8 +vpsrlq $28, %ymm9, %ymm9 +vpsrlq $54, %ymm10, %ymm10 +vpsrlq $49, %ymm11, %ymm11 +#vpsrlq $8, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +#vpor %ymm6, %ymm12, %ymm12 +vpshufb rho56(%rip), %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 320(%rdi) +vmovdqa %ymm14, 32(%rdi) +vmovdqa %ymm15, 544(%rdi) +vmovdqa %ymm7, 256(%rdi) +vmovdqa %ymm6, 768(%rdi) +vpxor 384(%rdi), %ymm3, %ymm8 +vpxor 96(%rdi), %ymm2, %ymm9 +vpxor 608(%rdi), %ymm1, %ymm10 +vpxor 160(%rdi), %ymm5, %ymm11 +vpxor 672(%rdi), %ymm4, %ymm12 +vpsllq $62, %ymm8, %ymm13 +vpsllq $55, %ymm9, %ymm14 +vpsllq $39, %ymm10, %ymm15 +vpsllq $41, %ymm11, %ymm7 +vpsllq $2, %ymm12, %ymm6 +vpsrlq $2, %ymm8, %ymm8 +vpsrlq $9, %ymm9, %ymm9 +vpsrlq $25, %ymm10, %ymm10 +vpsrlq $23, %ymm11, %ymm11 +vpsrlq $62, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 160(%rdi) +vmovdqa %ymm14, 672(%rdi) +vmovdqa %ymm15, 384(%rdi) +vmovdqa %ymm7, 96(%rdi) +vmovdqa %ymm6, 608(%rdi) +vmovdqa 0(%rdi), %ymm8 +vmovdqa 32(%rdi), %ymm9 +vmovdqa 64(%rdi), %ymm10 +vmovdqa 96(%rdi), %ymm11 +vmovdqa 128(%rdi), %ymm12 +vpxor 160(%rdi), %ymm8, %ymm8 +vpxor 192(%rdi), %ymm9, %ymm9 +vpxor 224(%rdi), %ymm10, %ymm10 +vpxor 256(%rdi), %ymm11, %ymm11 +vpxor 288(%rdi), %ymm12, %ymm12 +vpxor 320(%rdi), %ymm8, %ymm8 +vpxor 352(%rdi), %ymm9, %ymm9 +vpxor 384(%rdi), %ymm10, %ymm10 +vpxor 416(%rdi), %ymm11, %ymm11 +vpxor 448(%rdi), %ymm12, %ymm12 +vpxor 480(%rdi), %ymm8, %ymm8 +vpxor 512(%rdi), %ymm9, %ymm9 +vpxor 544(%rdi), %ymm10, %ymm10 +vpxor 576(%rdi), %ymm11, %ymm11 +vpxor 608(%rdi), %ymm12, %ymm12 +vpxor 640(%rdi), %ymm8, %ymm8 +vpxor 672(%rdi), %ymm9, %ymm9 +vpxor 704(%rdi), %ymm10, %ymm10 +vpxor 736(%rdi), %ymm11, %ymm11 +vpxor 768(%rdi), %ymm12, %ymm12 +vpsllq $1, %ymm9, %ymm13 +vpsllq $1, %ymm10, %ymm14 +vpsllq $1, %ymm11, %ymm15 +vpsllq $1, %ymm12, %ymm7 +vpsllq $1, %ymm8, %ymm6 +vpsrlq $63, %ymm9, %ymm5 +vpsrlq $63, %ymm10, %ymm4 +vpsrlq $63, %ymm11, %ymm3 +vpsrlq $63, %ymm12, %ymm2 +vpsrlq $63, %ymm8, %ymm1 +vpor %ymm13, %ymm5, %ymm5 +vpor %ymm14, %ymm4, %ymm4 +vpor %ymm15, %ymm3, %ymm3 +vpor %ymm7, %ymm2, %ymm2 +vpor %ymm6, %ymm1, %ymm1 +vpxor %ymm5, %ymm12, %ymm5 +vpxor %ymm4, %ymm8, %ymm4 +vpxor %ymm3, %ymm9, %ymm3 +vpxor %ymm2, %ymm10, %ymm2 +vpxor %ymm1, %ymm11, %ymm1 +vpxor 0(%rdi), %ymm5, %ymm8 +vpxor 352(%rdi), %ymm4, %ymm9 +vpxor 704(%rdi), %ymm3, %ymm10 +vpxor 256(%rdi), %ymm2, %ymm11 +vpxor 608(%rdi), %ymm1, %ymm12 +vpsllq $44, %ymm9, %ymm14 +vpsllq $43, %ymm10, %ymm15 +vpsllq $21, %ymm11, %ymm7 +vpsllq $14, %ymm12, %ymm6 +vpsrlq $20, %ymm9, %ymm9 +vpsrlq $21, %ymm10, %ymm10 +vpsrlq $43, %ymm11, %ymm11 +vpsrlq $50, %ymm12, %ymm12 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vpbroadcastq 16(%rsi), %ymm8 +vpxor %ymm8, %ymm13, %ymm13 +vmovdqa %ymm13, 0(%rdi) +vmovdqa %ymm14, 352(%rdi) +vmovdqa %ymm15, 704(%rdi) +vmovdqa %ymm7, 256(%rdi) +vmovdqa %ymm6, 608(%rdi) +vpxor 736(%rdi), %ymm2, %ymm8 +vpxor 288(%rdi), %ymm1, %ymm9 +vpxor 480(%rdi), %ymm5, %ymm10 +vpxor 32(%rdi), %ymm4, %ymm11 +vpxor 384(%rdi), %ymm3, %ymm12 +vpsllq $28, %ymm8, %ymm13 +vpsllq $20, %ymm9, %ymm14 +vpsllq $3, %ymm10, %ymm15 +vpsllq $45, %ymm11, %ymm7 +vpsllq $61, %ymm12, %ymm6 +vpsrlq $36, %ymm8, %ymm8 +vpsrlq $44, %ymm9, %ymm9 +vpsrlq $61, %ymm10, %ymm10 +vpsrlq $19, %ymm11, %ymm11 +vpsrlq $3, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 480(%rdi) +vmovdqa %ymm14, 32(%rdi) +vmovdqa %ymm15, 384(%rdi) +vmovdqa %ymm7, 736(%rdi) +vmovdqa %ymm6, 288(%rdi) +vpxor 512(%rdi), %ymm4, %ymm8 +vpxor 64(%rdi), %ymm3, %ymm9 +vpxor 416(%rdi), %ymm2, %ymm10 +vpxor 768(%rdi), %ymm1, %ymm11 +vpxor 160(%rdi), %ymm5, %ymm12 +vpsllq $1, %ymm8, %ymm13 +vpsllq $6, %ymm9, %ymm14 +vpsllq $25, %ymm10, %ymm15 +#vpsllq $8, %ymm11, %ymm7 +vpsllq $18, %ymm12, %ymm6 +vpsrlq $63, %ymm8, %ymm8 +vpsrlq $58, %ymm9, %ymm9 +vpsrlq $39, %ymm10, %ymm10 +#vpsrlq $56, %ymm11, %ymm11 +vpsrlq $46, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +#vpor %ymm7, %ymm11, %ymm11 +vpshufb %ymm0, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 160(%rdi) +vmovdqa %ymm14, 512(%rdi) +vmovdqa %ymm15, 64(%rdi) +vmovdqa %ymm7, 416(%rdi) +vmovdqa %ymm6, 768(%rdi) +vpxor 448(%rdi), %ymm1, %ymm8 +vpxor 640(%rdi), %ymm5, %ymm9 +vpxor 192(%rdi), %ymm4, %ymm10 +vpxor 544(%rdi), %ymm3, %ymm11 +vpxor 96(%rdi), %ymm2, %ymm12 +vpsllq $27, %ymm8, %ymm13 +vpsllq $36, %ymm9, %ymm14 +vpsllq $10, %ymm10, %ymm15 +vpsllq $15, %ymm11, %ymm7 +#vpsllq $56, %ymm12, %ymm6 +vpsrlq $37, %ymm8, %ymm8 +vpsrlq $28, %ymm9, %ymm9 +vpsrlq $54, %ymm10, %ymm10 +vpsrlq $49, %ymm11, %ymm11 +#vpsrlq $8, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +#vpor %ymm6, %ymm12, %ymm12 +vpshufb rho56(%rip), %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 640(%rdi) +vmovdqa %ymm14, 192(%rdi) +vmovdqa %ymm15, 544(%rdi) +vmovdqa %ymm7, 96(%rdi) +vmovdqa %ymm6, 448(%rdi) +vpxor 224(%rdi), %ymm3, %ymm8 +vpxor 576(%rdi), %ymm2, %ymm9 +vpxor 128(%rdi), %ymm1, %ymm10 +vpxor 320(%rdi), %ymm5, %ymm11 +vpxor 672(%rdi), %ymm4, %ymm12 +vpsllq $62, %ymm8, %ymm13 +vpsllq $55, %ymm9, %ymm14 +vpsllq $39, %ymm10, %ymm15 +vpsllq $41, %ymm11, %ymm7 +vpsllq $2, %ymm12, %ymm6 +vpsrlq $2, %ymm8, %ymm8 +vpsrlq $9, %ymm9, %ymm9 +vpsrlq $25, %ymm10, %ymm10 +vpsrlq $23, %ymm11, %ymm11 +vpsrlq $62, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 320(%rdi) +vmovdqa %ymm14, 672(%rdi) +vmovdqa %ymm15, 224(%rdi) +vmovdqa %ymm7, 576(%rdi) +vmovdqa %ymm6, 128(%rdi) +vmovdqa 0(%rdi), %ymm8 +vmovdqa 32(%rdi), %ymm9 +vmovdqa 64(%rdi), %ymm10 +vmovdqa 96(%rdi), %ymm11 +vmovdqa 128(%rdi), %ymm12 +vpxor 160(%rdi), %ymm8, %ymm8 +vpxor 192(%rdi), %ymm9, %ymm9 +vpxor 224(%rdi), %ymm10, %ymm10 +vpxor 256(%rdi), %ymm11, %ymm11 +vpxor 288(%rdi), %ymm12, %ymm12 +vpxor 320(%rdi), %ymm8, %ymm8 +vpxor 352(%rdi), %ymm9, %ymm9 +vpxor 384(%rdi), %ymm10, %ymm10 +vpxor 416(%rdi), %ymm11, %ymm11 +vpxor 448(%rdi), %ymm12, %ymm12 +vpxor 480(%rdi), %ymm8, %ymm8 +vpxor 512(%rdi), %ymm9, %ymm9 +vpxor 544(%rdi), %ymm10, %ymm10 +vpxor 576(%rdi), %ymm11, %ymm11 +vpxor 608(%rdi), %ymm12, %ymm12 +vpxor 640(%rdi), %ymm8, %ymm8 +vpxor 672(%rdi), %ymm9, %ymm9 +vpxor 704(%rdi), %ymm10, %ymm10 +vpxor 736(%rdi), %ymm11, %ymm11 +vpxor 768(%rdi), %ymm12, %ymm12 +vpsllq $1, %ymm9, %ymm13 +vpsllq $1, %ymm10, %ymm14 +vpsllq $1, %ymm11, %ymm15 +vpsllq $1, %ymm12, %ymm7 +vpsllq $1, %ymm8, %ymm6 +vpsrlq $63, %ymm9, %ymm5 +vpsrlq $63, %ymm10, %ymm4 +vpsrlq $63, %ymm11, %ymm3 +vpsrlq $63, %ymm12, %ymm2 +vpsrlq $63, %ymm8, %ymm1 +vpor %ymm13, %ymm5, %ymm5 +vpor %ymm14, %ymm4, %ymm4 +vpor %ymm15, %ymm3, %ymm3 +vpor %ymm7, %ymm2, %ymm2 +vpor %ymm6, %ymm1, %ymm1 +vpxor %ymm5, %ymm12, %ymm5 +vpxor %ymm4, %ymm8, %ymm4 +vpxor %ymm3, %ymm9, %ymm3 +vpxor %ymm2, %ymm10, %ymm2 +vpxor %ymm1, %ymm11, %ymm1 +vpxor 0(%rdi), %ymm5, %ymm8 +vpxor 32(%rdi), %ymm4, %ymm9 +vpxor 64(%rdi), %ymm3, %ymm10 +vpxor 96(%rdi), %ymm2, %ymm11 +vpxor 128(%rdi), %ymm1, %ymm12 +vpsllq $44, %ymm9, %ymm14 +vpsllq $43, %ymm10, %ymm15 +vpsllq $21, %ymm11, %ymm7 +vpsllq $14, %ymm12, %ymm6 +vpsrlq $20, %ymm9, %ymm9 +vpsrlq $21, %ymm10, %ymm10 +vpsrlq $43, %ymm11, %ymm11 +vpsrlq $50, %ymm12, %ymm12 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vpbroadcastq 24(%rsi), %ymm8 +vpxor %ymm8, %ymm13, %ymm13 +vmovdqa %ymm13, 0(%rdi) +vmovdqa %ymm14, 32(%rdi) +vmovdqa %ymm15, 64(%rdi) +vmovdqa %ymm7, 96(%rdi) +vmovdqa %ymm6, 128(%rdi) +vpxor 256(%rdi), %ymm2, %ymm8 +vpxor 288(%rdi), %ymm1, %ymm9 +vpxor 160(%rdi), %ymm5, %ymm10 +vpxor 192(%rdi), %ymm4, %ymm11 +vpxor 224(%rdi), %ymm3, %ymm12 +vpsllq $28, %ymm8, %ymm13 +vpsllq $20, %ymm9, %ymm14 +vpsllq $3, %ymm10, %ymm15 +vpsllq $45, %ymm11, %ymm7 +vpsllq $61, %ymm12, %ymm6 +vpsrlq $36, %ymm8, %ymm8 +vpsrlq $44, %ymm9, %ymm9 +vpsrlq $61, %ymm10, %ymm10 +vpsrlq $19, %ymm11, %ymm11 +vpsrlq $3, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 160(%rdi) +vmovdqa %ymm14, 192(%rdi) +vmovdqa %ymm15, 224(%rdi) +vmovdqa %ymm7, 256(%rdi) +vmovdqa %ymm6, 288(%rdi) +vpxor 352(%rdi), %ymm4, %ymm8 +vpxor 384(%rdi), %ymm3, %ymm9 +vpxor 416(%rdi), %ymm2, %ymm10 +vpxor 448(%rdi), %ymm1, %ymm11 +vpxor 320(%rdi), %ymm5, %ymm12 +vpsllq $1, %ymm8, %ymm13 +vpsllq $6, %ymm9, %ymm14 +vpsllq $25, %ymm10, %ymm15 +#vpsllq $8, %ymm11, %ymm7 +vpsllq $18, %ymm12, %ymm6 +vpsrlq $63, %ymm8, %ymm8 +vpsrlq $58, %ymm9, %ymm9 +vpsrlq $39, %ymm10, %ymm10 +#vpsrlq $56, %ymm11, %ymm11 +vpsrlq $46, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +#vpor %ymm7, %ymm11, %ymm11 +vpshufb %ymm0, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 320(%rdi) +vmovdqa %ymm14, 352(%rdi) +vmovdqa %ymm15, 384(%rdi) +vmovdqa %ymm7, 416(%rdi) +vmovdqa %ymm6, 448(%rdi) +vpxor 608(%rdi), %ymm1, %ymm8 +vpxor 480(%rdi), %ymm5, %ymm9 +vpxor 512(%rdi), %ymm4, %ymm10 +vpxor 544(%rdi), %ymm3, %ymm11 +vpxor 576(%rdi), %ymm2, %ymm12 +vpsllq $27, %ymm8, %ymm13 +vpsllq $36, %ymm9, %ymm14 +vpsllq $10, %ymm10, %ymm15 +vpsllq $15, %ymm11, %ymm7 +#vpsllq $56, %ymm12, %ymm6 +vpsrlq $37, %ymm8, %ymm8 +vpsrlq $28, %ymm9, %ymm9 +vpsrlq $54, %ymm10, %ymm10 +vpsrlq $49, %ymm11, %ymm11 +#vpsrlq $8, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +#vpor %ymm6, %ymm12, %ymm12 +vpshufb rho56(%rip), %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 480(%rdi) +vmovdqa %ymm14, 512(%rdi) +vmovdqa %ymm15, 544(%rdi) +vmovdqa %ymm7, 576(%rdi) +vmovdqa %ymm6, 608(%rdi) +vpxor 704(%rdi), %ymm3, %ymm8 +vpxor 736(%rdi), %ymm2, %ymm9 +vpxor 768(%rdi), %ymm1, %ymm10 +vpxor 640(%rdi), %ymm5, %ymm11 +vpxor 672(%rdi), %ymm4, %ymm12 +vpsllq $62, %ymm8, %ymm13 +vpsllq $55, %ymm9, %ymm14 +vpsllq $39, %ymm10, %ymm15 +vpsllq $41, %ymm11, %ymm7 +vpsllq $2, %ymm12, %ymm6 +vpsrlq $2, %ymm8, %ymm8 +vpsrlq $9, %ymm9, %ymm9 +vpsrlq $25, %ymm10, %ymm10 +vpsrlq $23, %ymm11, %ymm11 +vpsrlq $62, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 640(%rdi) +vmovdqa %ymm14, 672(%rdi) +vmovdqa %ymm15, 704(%rdi) +vmovdqa %ymm7, 736(%rdi) +vmovdqa %ymm6, 768(%rdi) +addq $32, %rsi +subq $1, %rax +jnz looptop +ret diff --git a/crypto_sign/dilithium/dilithium3/avx2/fips202x4.c b/crypto_sign/dilithium/dilithium3/avx2/fips202x4.c new file mode 100644 index 00000000..bb4a3767 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/fips202x4.c @@ -0,0 +1,219 @@ +#include "fips202.h" +#include "fips202x4.h" +#include +#include +#include +#include + +#define NROUNDS 24 + +/* Keccak round constants */ +static const uint64_t KeccakF_RoundConstants[NROUNDS] = { + (uint64_t)0x0000000000000001ULL, + (uint64_t)0x0000000000008082ULL, + (uint64_t)0x800000000000808aULL, + (uint64_t)0x8000000080008000ULL, + (uint64_t)0x000000000000808bULL, + (uint64_t)0x0000000080000001ULL, + (uint64_t)0x8000000080008081ULL, + (uint64_t)0x8000000000008009ULL, + (uint64_t)0x000000000000008aULL, + (uint64_t)0x0000000000000088ULL, + (uint64_t)0x0000000080008009ULL, + (uint64_t)0x000000008000000aULL, + (uint64_t)0x000000008000808bULL, + (uint64_t)0x800000000000008bULL, + (uint64_t)0x8000000000008089ULL, + (uint64_t)0x8000000000008003ULL, + (uint64_t)0x8000000000008002ULL, + (uint64_t)0x8000000000000080ULL, + (uint64_t)0x000000000000800aULL, + (uint64_t)0x800000008000000aULL, + (uint64_t)0x8000000080008081ULL, + (uint64_t)0x8000000000008080ULL, + (uint64_t)0x0000000080000001ULL, + (uint64_t)0x8000000080008008ULL +}; + +static void keccakx4_absorb_once(__m256i s[25], + unsigned int r, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen, + uint8_t p) { + size_t i; + uint64_t pos = 0; + __m256i t, idx; + + for (i = 0; i < 25; ++i) { + s[i] = _mm256_setzero_si256(); + } + + idx = _mm256_set_epi64x((long long)in3, (long long)in2, (long long)in1, (long long)in0); + while (inlen >= r) { + for (i = 0; i < r / 8; ++i) { + t = _mm256_i64gather_epi64((long long *)pos, idx, 1); + s[i] = _mm256_xor_si256(s[i], t); + pos += 8; + } + inlen -= r; + + PQCLEAN_DILITHIUM3_AVX2_f1600x4(s, KeccakF_RoundConstants); + } + + for (i = 0; i < inlen / 8; ++i) { + t = _mm256_i64gather_epi64((long long *)pos, idx, 1); + s[i] = _mm256_xor_si256(s[i], t); + pos += 8; + } + inlen -= 8 * i; + + if (inlen) { + t = _mm256_i64gather_epi64((long long *)pos, idx, 1); + idx = _mm256_set1_epi64x((long long)((1ULL << (8 * inlen)) - 1)); + t = _mm256_and_si256(t, idx); + s[i] = _mm256_xor_si256(s[i], t); + } + + t = _mm256_set1_epi64x((uint64_t)p << 8 * inlen); + s[i] = _mm256_xor_si256(s[i], t); + t = _mm256_set1_epi64x((long long)(1ULL << 63)); + s[r / 8 - 1] = _mm256_xor_si256(s[r / 8 - 1], t); +} + +static void keccakx4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + unsigned int r, + __m256i s[25]) { + unsigned int i; + __m128d t; + + while (nblocks > 0) { + PQCLEAN_DILITHIUM3_AVX2_f1600x4(s, KeccakF_RoundConstants); + for (i = 0; i < r / 8; ++i) { + t = _mm_castsi128_pd(_mm256_castsi256_si128(s[i])); + _mm_storel_pd((double *)&out0[8 * i], t); + _mm_storeh_pd((double *)&out1[8 * i], t); + t = _mm_castsi128_pd(_mm256_extracti128_si256(s[i], 1)); + _mm_storel_pd((double *)&out2[8 * i], t); + _mm_storeh_pd((double *)&out3[8 * i], t); + } + + out0 += r; + out1 += r; + out2 += r; + out3 += r; + --nblocks; + } +} + +void PQCLEAN_DILITHIUM3_AVX2_shake128x4_absorb_once(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen) { + keccakx4_absorb_once(state->s, SHAKE128_RATE, in0, in1, in2, in3, inlen, 0x1F); +} + +void PQCLEAN_DILITHIUM3_AVX2_shake128x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state) { + keccakx4_squeezeblocks(out0, out1, out2, out3, nblocks, SHAKE128_RATE, state->s); +} + +void PQCLEAN_DILITHIUM3_AVX2_shake256x4_absorb_once(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen) { + keccakx4_absorb_once(state->s, SHAKE256_RATE, in0, in1, in2, in3, inlen, 0x1F); +} + +void PQCLEAN_DILITHIUM3_AVX2_shake256x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state) { + keccakx4_squeezeblocks(out0, out1, out2, out3, nblocks, SHAKE256_RATE, state->s); +} + +void PQCLEAN_DILITHIUM3_AVX2_shake128x4(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t outlen, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen) { + unsigned int i; + size_t nblocks = outlen / SHAKE128_RATE; + uint8_t t[4][SHAKE128_RATE]; + keccakx4_state state; + + PQCLEAN_DILITHIUM3_AVX2_shake128x4_absorb_once(&state, in0, in1, in2, in3, inlen); + PQCLEAN_DILITHIUM3_AVX2_shake128x4_squeezeblocks(out0, out1, out2, out3, nblocks, &state); + + out0 += nblocks * SHAKE128_RATE; + out1 += nblocks * SHAKE128_RATE; + out2 += nblocks * SHAKE128_RATE; + out3 += nblocks * SHAKE128_RATE; + outlen -= nblocks * SHAKE128_RATE; + + if (outlen) { + PQCLEAN_DILITHIUM3_AVX2_shake128x4_squeezeblocks(t[0], t[1], t[2], t[3], 1, &state); + for (i = 0; i < outlen; ++i) { + out0[i] = t[0][i]; + out1[i] = t[1][i]; + out2[i] = t[2][i]; + out3[i] = t[3][i]; + } + } +} + +void PQCLEAN_DILITHIUM3_AVX2_shake256x4(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t outlen, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen) { + unsigned int i; + size_t nblocks = outlen / SHAKE256_RATE; + uint8_t t[4][SHAKE256_RATE]; + keccakx4_state state; + + PQCLEAN_DILITHIUM3_AVX2_shake256x4_absorb_once(&state, in0, in1, in2, in3, inlen); + PQCLEAN_DILITHIUM3_AVX2_shake256x4_squeezeblocks(out0, out1, out2, out3, nblocks, &state); + + out0 += nblocks * SHAKE256_RATE; + out1 += nblocks * SHAKE256_RATE; + out2 += nblocks * SHAKE256_RATE; + out3 += nblocks * SHAKE256_RATE; + outlen -= nblocks * SHAKE256_RATE; + + if (outlen) { + PQCLEAN_DILITHIUM3_AVX2_shake256x4_squeezeblocks(t[0], t[1], t[2], t[3], 1, &state); + for (i = 0; i < outlen; ++i) { + out0[i] = t[0][i]; + out1[i] = t[1][i]; + out2[i] = t[2][i]; + out3[i] = t[3][i]; + } + } +} diff --git a/crypto_sign/dilithium/dilithium3/avx2/fips202x4.h b/crypto_sign/dilithium/dilithium3/avx2/fips202x4.h new file mode 100644 index 00000000..0a480d63 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/fips202x4.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_DILITHIUM3_AVX2_FIPS202X4_H +#define PQCLEAN_DILITHIUM3_AVX2_FIPS202X4_H + +#include +#include +#include + +typedef struct { + __m256i s[25]; +} keccakx4_state; + +void PQCLEAN_DILITHIUM3_AVX2_f1600x4(__m256i *s, const uint64_t *rc); + +void PQCLEAN_DILITHIUM3_AVX2_shake128x4_absorb_once(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen); + +void PQCLEAN_DILITHIUM3_AVX2_shake128x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state); + +void PQCLEAN_DILITHIUM3_AVX2_shake256x4_absorb_once(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen); + +void PQCLEAN_DILITHIUM3_AVX2_shake256x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state); + +void PQCLEAN_DILITHIUM3_AVX2_shake128x4(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t outlen, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen); + +void PQCLEAN_DILITHIUM3_AVX2_shake256x4(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t outlen, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen); + +#endif diff --git a/crypto_sign/dilithium/dilithium3/avx2/invntt.S b/crypto_sign/dilithium/dilithium3/avx2/invntt.S new file mode 100644 index 00000000..41c831b0 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/invntt.S @@ -0,0 +1,240 @@ +#include "cdecl.h" +.include "shuffle.inc" + +.macro butterfly l,h,zl0=1,zl1=1,zh0=2,zh1=2 +vpsubd %ymm\l,%ymm\h,%ymm12 +vpaddd %ymm\h,%ymm\l,%ymm\l + +vpmuldq %ymm\zl0,%ymm12,%ymm13 +vmovshdup %ymm12,%ymm\h +vpmuldq %ymm\zl1,%ymm\h,%ymm14 + +vpmuldq %ymm\zh0,%ymm12,%ymm12 +vpmuldq %ymm\zh1,%ymm\h,%ymm\h + +vpmuldq %ymm0,%ymm13,%ymm13 +vpmuldq %ymm0,%ymm14,%ymm14 + +vpsubd %ymm13,%ymm12,%ymm12 +vpsubd %ymm14,%ymm\h,%ymm\h + +vmovshdup %ymm12,%ymm12 +vpblendd $0xAA,%ymm\h,%ymm12,%ymm\h +.endm + +.macro levels0t5 off +vmovdqa 256*\off+ 0(%rdi),%ymm4 +vmovdqa 256*\off+ 32(%rdi),%ymm5 +vmovdqa 256*\off+ 64(%rdi),%ymm6 +vmovdqa 256*\off+ 96(%rdi),%ymm7 +vmovdqa 256*\off+128(%rdi),%ymm8 +vmovdqa 256*\off+160(%rdi),%ymm9 +vmovdqa 256*\off+192(%rdi),%ymm10 +vmovdqa 256*\off+224(%rdi),%ymm11 + +/* level 0 */ +vpermq $0x1B,(_ZETAS_QINV+296-8*\off-8)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+296-8*\off-8)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 4,5,1,3,2,15 + +vpermq $0x1B,(_ZETAS_QINV+296-8*\off-40)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+296-8*\off-40)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 6,7,1,3,2,15 + +vpermq $0x1B,(_ZETAS_QINV+296-8*\off-72)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+296-8*\off-72)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 8,9,1,3,2,15 + +vpermq $0x1B,(_ZETAS_QINV+296-8*\off-104)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+296-8*\off-104)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 10,11,1,3,2,15 + +/* level 1 */ +vpermq $0x1B,(_ZETAS_QINV+168-8*\off-8)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+168-8*\off-8)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 4,6,1,3,2,15 +butterfly 5,7,1,3,2,15 + +vpermq $0x1B,(_ZETAS_QINV+168-8*\off-40)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+168-8*\off-40)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 8,10,1,3,2,15 +butterfly 9,11,1,3,2,15 + +/* level 2 */ +vpermq $0x1B,(_ZETAS_QINV+104-8*\off-8)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+104-8*\off-8)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 4,8,1,3,2,15 +butterfly 5,9,1,3,2,15 +butterfly 6,10,1,3,2,15 +butterfly 7,11,1,3,2,15 + +/* level 3 */ +shuffle2 4,5,3,5 +shuffle2 6,7,4,7 +shuffle2 8,9,6,9 +shuffle2 10,11,8,11 + +vpermq $0x1B,(_ZETAS_QINV+72-8*\off-8)*4(%rsi),%ymm1 +vpermq $0x1B,(_ZETAS+72-8*\off-8)*4(%rsi),%ymm2 +butterfly 3,5 +butterfly 4,7 +butterfly 6,9 +butterfly 8,11 + +/* level 4 */ +shuffle4 3,4,10,4 +shuffle4 6,8,3,8 +shuffle4 5,7,6,7 +shuffle4 9,11,5,11 + +vpermq $0x1B,(_ZETAS_QINV+40-8*\off-8)*4(%rsi),%ymm1 +vpermq $0x1B,(_ZETAS+40-8*\off-8)*4(%rsi),%ymm2 +butterfly 10,4 +butterfly 3,8 +butterfly 6,7 +butterfly 5,11 + +/* level 5 */ +shuffle8 10,3,9,3 +shuffle8 6,5,10,5 +shuffle8 4,8,6,8 +shuffle8 7,11,4,11 + +vpbroadcastd (_ZETAS_QINV+7-\off)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+7-\off)*4(%rsi),%ymm2 +butterfly 9,3 +butterfly 10,5 +butterfly 6,8 +butterfly 4,11 + +vmovdqa %ymm9,256*\off+ 0(%rdi) +vmovdqa %ymm10,256*\off+ 32(%rdi) +vmovdqa %ymm6,256*\off+ 64(%rdi) +vmovdqa %ymm4,256*\off+ 96(%rdi) +vmovdqa %ymm3,256*\off+128(%rdi) +vmovdqa %ymm5,256*\off+160(%rdi) +vmovdqa %ymm8,256*\off+192(%rdi) +vmovdqa %ymm11,256*\off+224(%rdi) +.endm + +.macro levels6t7 off +vmovdqa 0+32*\off(%rdi),%ymm4 +vmovdqa 128+32*\off(%rdi),%ymm5 +vmovdqa 256+32*\off(%rdi),%ymm6 +vmovdqa 384+32*\off(%rdi),%ymm7 +vmovdqa 512+32*\off(%rdi),%ymm8 +vmovdqa 640+32*\off(%rdi),%ymm9 +vmovdqa 768+32*\off(%rdi),%ymm10 +vmovdqa 896+32*\off(%rdi),%ymm11 + +/* level 6 */ +vpbroadcastd (_ZETAS_QINV+3)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+3)*4(%rsi),%ymm2 +butterfly 4,6 +butterfly 5,7 + +vpbroadcastd (_ZETAS_QINV+2)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+2)*4(%rsi),%ymm2 +butterfly 8,10 +butterfly 9,11 + +/* level 7 */ +vpbroadcastd (_ZETAS_QINV+0)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+0)*4(%rsi),%ymm2 + +butterfly 4,8 +butterfly 5,9 +butterfly 6,10 +butterfly 7,11 + +vmovdqa %ymm8,512+32*\off(%rdi) +vmovdqa %ymm9,640+32*\off(%rdi) +vmovdqa %ymm10,768+32*\off(%rdi) +vmovdqa %ymm11,896+32*\off(%rdi) + +vmovdqa (_8XDIV_QINV)*4(%rsi),%ymm1 +vmovdqa (_8XDIV)*4(%rsi),%ymm2 +vpmuldq %ymm1,%ymm4,%ymm12 +vpmuldq %ymm1,%ymm5,%ymm13 +vmovshdup %ymm4,%ymm8 +vmovshdup %ymm5,%ymm9 +vpmuldq %ymm1,%ymm8,%ymm14 +vpmuldq %ymm1,%ymm9,%ymm15 +vpmuldq %ymm2,%ymm4,%ymm4 +vpmuldq %ymm2,%ymm5,%ymm5 +vpmuldq %ymm2,%ymm8,%ymm8 +vpmuldq %ymm2,%ymm9,%ymm9 +vpmuldq %ymm0,%ymm12,%ymm12 +vpmuldq %ymm0,%ymm13,%ymm13 +vpmuldq %ymm0,%ymm14,%ymm14 +vpmuldq %ymm0,%ymm15,%ymm15 +vpsubd %ymm12,%ymm4,%ymm4 +vpsubd %ymm13,%ymm5,%ymm5 +vpsubd %ymm14,%ymm8,%ymm8 +vpsubd %ymm15,%ymm9,%ymm9 +vmovshdup %ymm4,%ymm4 +vmovshdup %ymm5,%ymm5 +vpblendd $0xAA,%ymm8,%ymm4,%ymm4 +vpblendd $0xAA,%ymm9,%ymm5,%ymm5 + +vpmuldq %ymm1,%ymm6,%ymm12 +vpmuldq %ymm1,%ymm7,%ymm13 +vmovshdup %ymm6,%ymm8 +vmovshdup %ymm7,%ymm9 +vpmuldq %ymm1,%ymm8,%ymm14 +vpmuldq %ymm1,%ymm9,%ymm15 +vpmuldq %ymm2,%ymm6,%ymm6 +vpmuldq %ymm2,%ymm7,%ymm7 +vpmuldq %ymm2,%ymm8,%ymm8 +vpmuldq %ymm2,%ymm9,%ymm9 +vpmuldq %ymm0,%ymm12,%ymm12 +vpmuldq %ymm0,%ymm13,%ymm13 +vpmuldq %ymm0,%ymm14,%ymm14 +vpmuldq %ymm0,%ymm15,%ymm15 +vpsubd %ymm12,%ymm6,%ymm6 +vpsubd %ymm13,%ymm7,%ymm7 +vpsubd %ymm14,%ymm8,%ymm8 +vpsubd %ymm15,%ymm9,%ymm9 +vmovshdup %ymm6,%ymm6 +vmovshdup %ymm7,%ymm7 +vpblendd $0xAA,%ymm8,%ymm6,%ymm6 +vpblendd $0xAA,%ymm9,%ymm7,%ymm7 + +vmovdqa %ymm4, 0+32*\off(%rdi) +vmovdqa %ymm5,128+32*\off(%rdi) +vmovdqa %ymm6,256+32*\off(%rdi) +vmovdqa %ymm7,384+32*\off(%rdi) +.endm + +.text +.global cdecl(PQCLEAN_DILITHIUM3_AVX2_invntt_avx) +.global _cdecl(PQCLEAN_DILITHIUM3_AVX2_invntt_avx) +cdecl(PQCLEAN_DILITHIUM3_AVX2_invntt_avx): +_cdecl(PQCLEAN_DILITHIUM3_AVX2_invntt_avx): +vmovdqa _8XQ*4(%rsi),%ymm0 + +levels0t5 0 +levels0t5 1 +levels0t5 2 +levels0t5 3 + +levels6t7 0 +levels6t7 1 +levels6t7 2 +levels6t7 3 + +ret diff --git a/crypto_sign/dilithium/dilithium3/avx2/ntt.S b/crypto_sign/dilithium/dilithium3/avx2/ntt.S new file mode 100644 index 00000000..d89f8045 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/ntt.S @@ -0,0 +1,199 @@ +#include "cdecl.h" +.include "shuffle.inc" + +.macro butterfly l,h,zl0=1,zl1=1,zh0=2,zh1=2 +vpmuldq %ymm\zl0,%ymm\h,%ymm13 +vmovshdup %ymm\h,%ymm12 +vpmuldq %ymm\zl1,%ymm12,%ymm14 + +vpmuldq %ymm\zh0,%ymm\h,%ymm\h +vpmuldq %ymm\zh1,%ymm12,%ymm12 + +vpmuldq %ymm0,%ymm13,%ymm13 +vpmuldq %ymm0,%ymm14,%ymm14 + +vmovshdup %ymm\h,%ymm\h +vpblendd $0xAA,%ymm12,%ymm\h,%ymm\h + +vpsubd %ymm\h,%ymm\l,%ymm12 +vpaddd %ymm\h,%ymm\l,%ymm\l + +vmovshdup %ymm13,%ymm13 +vpblendd $0xAA,%ymm14,%ymm13,%ymm13 + +vpaddd %ymm13,%ymm12,%ymm\h +vpsubd %ymm13,%ymm\l,%ymm\l +.endm + +.macro levels0t1 off +/* level 0 */ +vpbroadcastd (_ZETAS_QINV+1)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+1)*4(%rsi),%ymm2 + +vmovdqa 0+32*\off(%rdi),%ymm4 +vmovdqa 128+32*\off(%rdi),%ymm5 +vmovdqa 256+32*\off(%rdi),%ymm6 +vmovdqa 384+32*\off(%rdi),%ymm7 +vmovdqa 512+32*\off(%rdi),%ymm8 +vmovdqa 640+32*\off(%rdi),%ymm9 +vmovdqa 768+32*\off(%rdi),%ymm10 +vmovdqa 896+32*\off(%rdi),%ymm11 + +butterfly 4,8 +butterfly 5,9 +butterfly 6,10 +butterfly 7,11 + +/* level 1 */ +vpbroadcastd (_ZETAS_QINV+2)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+2)*4(%rsi),%ymm2 +butterfly 4,6 +butterfly 5,7 + +vpbroadcastd (_ZETAS_QINV+3)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+3)*4(%rsi),%ymm2 +butterfly 8,10 +butterfly 9,11 + +vmovdqa %ymm4, 0+32*\off(%rdi) +vmovdqa %ymm5,128+32*\off(%rdi) +vmovdqa %ymm6,256+32*\off(%rdi) +vmovdqa %ymm7,384+32*\off(%rdi) +vmovdqa %ymm8,512+32*\off(%rdi) +vmovdqa %ymm9,640+32*\off(%rdi) +vmovdqa %ymm10,768+32*\off(%rdi) +vmovdqa %ymm11,896+32*\off(%rdi) +.endm + +.macro levels2t7 off +/* level 2 */ +vmovdqa 256*\off+ 0(%rdi),%ymm4 +vmovdqa 256*\off+ 32(%rdi),%ymm5 +vmovdqa 256*\off+ 64(%rdi),%ymm6 +vmovdqa 256*\off+ 96(%rdi),%ymm7 +vmovdqa 256*\off+128(%rdi),%ymm8 +vmovdqa 256*\off+160(%rdi),%ymm9 +vmovdqa 256*\off+192(%rdi),%ymm10 +vmovdqa 256*\off+224(%rdi),%ymm11 + +vpbroadcastd (_ZETAS_QINV+4+\off)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+4+\off)*4(%rsi),%ymm2 + +butterfly 4,8 +butterfly 5,9 +butterfly 6,10 +butterfly 7,11 + +shuffle8 4,8,3,8 +shuffle8 5,9,4,9 +shuffle8 6,10,5,10 +shuffle8 7,11,6,11 + +/* level 3 */ +vmovdqa (_ZETAS_QINV+8+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+8+8*\off)*4(%rsi),%ymm2 + +butterfly 3,5 +butterfly 8,10 +butterfly 4,6 +butterfly 9,11 + +shuffle4 3,5,7,5 +shuffle4 8,10,3,10 +shuffle4 4,6,8,6 +shuffle4 9,11,4,11 + +/* level 4 */ +vmovdqa (_ZETAS_QINV+40+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+40+8*\off)*4(%rsi),%ymm2 + +butterfly 7,8 +butterfly 5,6 +butterfly 3,4 +butterfly 10,11 + +shuffle2 7,8,9,8 +shuffle2 5,6,7,6 +shuffle2 3,4,5,4 +shuffle2 10,11,3,11 + +/* level 5 */ +vmovdqa (_ZETAS_QINV+72+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+72+8*\off)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 + +butterfly 9,5,1,10,2,15 +butterfly 8,4,1,10,2,15 +butterfly 7,3,1,10,2,15 +butterfly 6,11,1,10,2,15 + +/* level 6 */ +vmovdqa (_ZETAS_QINV+104+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+104+8*\off)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 9,7,1,10,2,15 +butterfly 8,6,1,10,2,15 + +vmovdqa (_ZETAS_QINV+104+8*\off+32)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+104+8*\off+32)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 5,3,1,10,2,15 +butterfly 4,11,1,10,2,15 + +/* level 7 */ +vmovdqa (_ZETAS_QINV+168+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+168+8*\off)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 9,8,1,10,2,15 + +vmovdqa (_ZETAS_QINV+168+8*\off+32)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+168+8*\off+32)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 7,6,1,10,2,15 + +vmovdqa (_ZETAS_QINV+168+8*\off+64)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+168+8*\off+64)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 5,4,1,10,2,15 + +vmovdqa (_ZETAS_QINV+168+8*\off+96)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+168+8*\off+96)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 3,11,1,10,2,15 + +vmovdqa %ymm9,256*\off+ 0(%rdi) +vmovdqa %ymm8,256*\off+ 32(%rdi) +vmovdqa %ymm7,256*\off+ 64(%rdi) +vmovdqa %ymm6,256*\off+ 96(%rdi) +vmovdqa %ymm5,256*\off+128(%rdi) +vmovdqa %ymm4,256*\off+160(%rdi) +vmovdqa %ymm3,256*\off+192(%rdi) +vmovdqa %ymm11,256*\off+224(%rdi) +.endm + +.text +.global cdecl(PQCLEAN_DILITHIUM3_AVX2_ntt_avx) +.global _cdecl(PQCLEAN_DILITHIUM3_AVX2_ntt_avx) +cdecl(PQCLEAN_DILITHIUM3_AVX2_ntt_avx): +_cdecl(PQCLEAN_DILITHIUM3_AVX2_ntt_avx): +vmovdqa _8XQ*4(%rsi),%ymm0 + +levels0t1 0 +levels0t1 1 +levels0t1 2 +levels0t1 3 + +levels2t7 0 +levels2t7 1 +levels2t7 2 +levels2t7 3 + +ret + diff --git a/crypto_sign/dilithium/dilithium3/avx2/ntt.h b/crypto_sign/dilithium/dilithium3/avx2/ntt.h new file mode 100644 index 00000000..b6c6e372 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/ntt.h @@ -0,0 +1,14 @@ +#ifndef PQCLEAN_DILITHIUM3_AVX2_NTT_H +#define PQCLEAN_DILITHIUM3_AVX2_NTT_H + +#include + +void PQCLEAN_DILITHIUM3_AVX2_ntt_avx(__m256i *a, const __m256i *PQCLEAN_DILITHIUM3_AVX2_qdata); +void PQCLEAN_DILITHIUM3_AVX2_invntt_avx(__m256i *a, const __m256i *PQCLEAN_DILITHIUM3_AVX2_qdata); + +void PQCLEAN_DILITHIUM3_AVX2_nttunpack_avx(__m256i *a); + +void PQCLEAN_DILITHIUM3_AVX2_pointwise_avx(__m256i *c, const __m256i *a, const __m256i *b, const __m256i *PQCLEAN_DILITHIUM3_AVX2_qdata); +void PQCLEAN_DILITHIUM3_AVX2_pointwise_acc_avx(__m256i *c, const __m256i *a, const __m256i *b, const __m256i *PQCLEAN_DILITHIUM3_AVX2_qdata); + +#endif diff --git a/crypto_sign/dilithium/dilithium3/avx2/packing.c b/crypto_sign/dilithium/dilithium3/avx2/packing.c new file mode 100644 index 00000000..dde16b0f --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/packing.c @@ -0,0 +1,261 @@ +#include "packing.h" +#include "params.h" +#include "poly.h" +#include "polyvec.h" + + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_pack_pk +* +* Description: Bit-pack public key pk = (rho, t1). +* +* Arguments: - uint8_t pk[]: output byte array +* - const uint8_t rho[]: byte array containing rho +* - const polyveck *t1: pointer to vector t1 +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_pack_pk(uint8_t pk[PQCLEAN_DILITHIUM3_AVX2_CRYPTO_PUBLICKEYBYTES], + const uint8_t rho[SEEDBYTES], + const polyveck *t1) { + unsigned int i; + + for (i = 0; i < SEEDBYTES; ++i) { + pk[i] = rho[i]; + } + pk += SEEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_AVX2_polyt1_pack(pk + i * POLYT1_PACKEDBYTES, &t1->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_unpack_pk +* +* Description: Unpack public key pk = (rho, t1). +* +* Arguments: - const uint8_t rho[]: output byte array for rho +* - const polyveck *t1: pointer to output vector t1 +* - uint8_t pk[]: byte array containing bit-packed pk +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_unpack_pk(uint8_t rho[SEEDBYTES], + polyveck *t1, + const uint8_t pk[PQCLEAN_DILITHIUM3_AVX2_CRYPTO_PUBLICKEYBYTES]) { + unsigned int i; + + for (i = 0; i < SEEDBYTES; ++i) { + rho[i] = pk[i]; + } + pk += SEEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_AVX2_polyt1_unpack(&t1->vec[i], pk + i * POLYT1_PACKEDBYTES); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_pack_sk +* +* Description: Bit-pack secret key sk = (rho, tr, key, t0, s1, s2). +* +* Arguments: - uint8_t sk[]: output byte array +* - const uint8_t rho[]: byte array containing rho +* - const uint8_t tr[]: byte array containing tr +* - const uint8_t key[]: byte array containing key +* - const polyveck *t0: pointer to vector t0 +* - const polyvecl *s1: pointer to vector s1 +* - const polyveck *s2: pointer to vector s2 +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_pack_sk(uint8_t sk[PQCLEAN_DILITHIUM3_AVX2_CRYPTO_SECRETKEYBYTES], + const uint8_t rho[SEEDBYTES], + const uint8_t tr[CRHBYTES], + const uint8_t key[SEEDBYTES], + const polyveck *t0, + const polyvecl *s1, + const polyveck *s2) { + unsigned int i; + + for (i = 0; i < SEEDBYTES; ++i) { + sk[i] = rho[i]; + } + sk += SEEDBYTES; + + for (i = 0; i < SEEDBYTES; ++i) { + sk[i] = key[i]; + } + sk += SEEDBYTES; + + for (i = 0; i < CRHBYTES; ++i) { + sk[i] = tr[i]; + } + sk += CRHBYTES; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM3_AVX2_polyeta_pack(sk + i * POLYETA_PACKEDBYTES, &s1->vec[i]); + } + sk += L * POLYETA_PACKEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_AVX2_polyeta_pack(sk + i * POLYETA_PACKEDBYTES, &s2->vec[i]); + } + sk += K * POLYETA_PACKEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_AVX2_polyt0_pack(sk + i * POLYT0_PACKEDBYTES, &t0->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_unpack_sk +* +* Description: Unpack secret key sk = (rho, tr, key, t0, s1, s2). +* +* Arguments: - const uint8_t rho[]: output byte array for rho +* - const uint8_t tr[]: output byte array for tr +* - const uint8_t key[]: output byte array for key +* - const polyveck *t0: pointer to output vector t0 +* - const polyvecl *s1: pointer to output vector s1 +* - const polyveck *s2: pointer to output vector s2 +* - uint8_t sk[]: byte array containing bit-packed sk +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_unpack_sk(uint8_t rho[SEEDBYTES], + uint8_t tr[CRHBYTES], + uint8_t key[SEEDBYTES], + polyveck *t0, + polyvecl *s1, + polyveck *s2, + const uint8_t sk[PQCLEAN_DILITHIUM3_AVX2_CRYPTO_SECRETKEYBYTES]) { + unsigned int i; + + for (i = 0; i < SEEDBYTES; ++i) { + rho[i] = sk[i]; + } + sk += SEEDBYTES; + + for (i = 0; i < SEEDBYTES; ++i) { + key[i] = sk[i]; + } + sk += SEEDBYTES; + + for (i = 0; i < CRHBYTES; ++i) { + tr[i] = sk[i]; + } + sk += CRHBYTES; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM3_AVX2_polyeta_unpack(&s1->vec[i], sk + i * POLYETA_PACKEDBYTES); + } + sk += L * POLYETA_PACKEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_AVX2_polyeta_unpack(&s2->vec[i], sk + i * POLYETA_PACKEDBYTES); + } + sk += K * POLYETA_PACKEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_AVX2_polyt0_unpack(&t0->vec[i], sk + i * POLYT0_PACKEDBYTES); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_pack_sig +* +* Description: Bit-pack signature sig = (c, z, h). +* +* Arguments: - uint8_t sig[]: output byte array +* - const uint8_t *c: pointer to PQCLEAN_DILITHIUM3_AVX2_challenge hash length SEEDBYTES +* - const polyvecl *z: pointer to vector z +* - const polyveck *h: pointer to hint vector h +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_pack_sig(uint8_t sig[PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES], + const uint8_t c[SEEDBYTES], + const polyvecl *z, + const polyveck *h) { + unsigned int i, j, k; + + for (i = 0; i < SEEDBYTES; ++i) { + sig[i] = c[i]; + } + sig += SEEDBYTES; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM3_AVX2_polyz_pack(sig + i * POLYZ_PACKEDBYTES, &z->vec[i]); + } + sig += L * POLYZ_PACKEDBYTES; + + /* Encode h */ + for (i = 0; i < OMEGA + K; ++i) { + sig[i] = 0; + } + + k = 0; + for (i = 0; i < K; ++i) { + for (j = 0; j < N; ++j) { + if (h->vec[i].coeffs[j] != 0) { + sig[k++] = (uint8_t) j; + } + } + + sig[OMEGA + i] = (uint8_t) k; + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_unpack_sig +* +* Description: Unpack signature sig = (c, z, h). +* +* Arguments: - uint8_t *c: pointer to output PQCLEAN_DILITHIUM3_AVX2_challenge hash +* - polyvecl *z: pointer to output vector z +* - polyveck *h: pointer to output hint vector h +* - const uint8_t sig[]: byte array containing +* bit-packed signature +* +* Returns 1 in case of malformed signature; otherwise 0. +**************************************************/ +int PQCLEAN_DILITHIUM3_AVX2_unpack_sig(uint8_t c[SEEDBYTES], + polyvecl *z, + polyveck *h, + const uint8_t sig[PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES]) { + unsigned int i, j, k; + + for (i = 0; i < SEEDBYTES; ++i) { + c[i] = sig[i]; + } + sig += SEEDBYTES; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM3_AVX2_polyz_unpack(&z->vec[i], sig + i * POLYZ_PACKEDBYTES); + } + sig += L * POLYZ_PACKEDBYTES; + + /* Decode h */ + k = 0; + for (i = 0; i < K; ++i) { + for (j = 0; j < N; ++j) { + h->vec[i].coeffs[j] = 0; + } + + if (sig[OMEGA + i] < k || sig[OMEGA + i] > OMEGA) { + return 1; + } + + for (j = k; j < sig[OMEGA + i]; ++j) { + /* Coefficients are ordered for strong unforgeability */ + if (j > k && sig[j] <= sig[j - 1]) { + return 1; + } + h->vec[i].coeffs[sig[j]] = 1; + } + + k = sig[OMEGA + i]; + } + + /* Extra indices are zero for strong unforgeability */ + for (j = k; j < OMEGA; ++j) { + if (sig[j]) { + return 1; + } + } + + return 0; +} diff --git a/crypto_sign/dilithium/dilithium3/avx2/packing.h b/crypto_sign/dilithium/dilithium3/avx2/packing.h new file mode 100644 index 00000000..9404c00f --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/packing.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_DILITHIUM3_AVX2_PACKING_H +#define PQCLEAN_DILITHIUM3_AVX2_PACKING_H +#include "params.h" +#include "polyvec.h" +#include + +void PQCLEAN_DILITHIUM3_AVX2_pack_pk(uint8_t pk[PQCLEAN_DILITHIUM3_AVX2_CRYPTO_PUBLICKEYBYTES], const uint8_t rho[SEEDBYTES], const polyveck *t1); + +void PQCLEAN_DILITHIUM3_AVX2_pack_sk(uint8_t sk[PQCLEAN_DILITHIUM3_AVX2_CRYPTO_SECRETKEYBYTES], + const uint8_t rho[SEEDBYTES], + const uint8_t tr[CRHBYTES], + const uint8_t key[SEEDBYTES], + const polyveck *t0, + const polyvecl *s1, + const polyveck *s2); + +void PQCLEAN_DILITHIUM3_AVX2_pack_sig(uint8_t sig[PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES], const uint8_t c[SEEDBYTES], const polyvecl *z, const polyveck *h); + +void PQCLEAN_DILITHIUM3_AVX2_unpack_pk(uint8_t rho[SEEDBYTES], polyveck *t1, const uint8_t pk[PQCLEAN_DILITHIUM3_AVX2_CRYPTO_PUBLICKEYBYTES]); + +void PQCLEAN_DILITHIUM3_AVX2_unpack_sk(uint8_t rho[SEEDBYTES], + uint8_t tr[CRHBYTES], + uint8_t key[SEEDBYTES], + polyveck *t0, + polyvecl *s1, + polyveck *s2, + const uint8_t sk[PQCLEAN_DILITHIUM3_AVX2_CRYPTO_SECRETKEYBYTES]); + +int PQCLEAN_DILITHIUM3_AVX2_unpack_sig(uint8_t c[SEEDBYTES], polyvecl *z, polyveck *h, const uint8_t sig[PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES]); + +#endif diff --git a/crypto_sign/dilithium/dilithium3/avx2/params.h b/crypto_sign/dilithium/dilithium3/avx2/params.h new file mode 100644 index 00000000..d824116f --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/params.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_DILITHIUM3_AVX2_PARAMS_H +#define PQCLEAN_DILITHIUM3_AVX2_PARAMS_H + + + +#define SEEDBYTES 32 +#define CRHBYTES 48 +#define N 256 +#define Q 8380417 +#define D 13 +#define ROOT_OF_UNITY 1753 + +#define K 6 +#define L 5 +#define ETA 4 +#define TAU 49 +#define BETA 196 +#define GAMMA1 (1 << 19) +#define GAMMA2 ((Q-1)/32) +#define OMEGA 55 +#define PQCLEAN_DILITHIUM3_AVX2_CRYPTO_ALGNAME "Dilithium3" + + +#define POLYT1_PACKEDBYTES 320 +#define POLYT0_PACKEDBYTES 416 +#define POLYVECH_PACKEDBYTES (OMEGA + K) + +#define POLYZ_PACKEDBYTES 640 + +#define POLYW1_PACKEDBYTES 128 + +#define POLYETA_PACKEDBYTES 128 + +#define PQCLEAN_DILITHIUM3_AVX2_CRYPTO_PUBLICKEYBYTES (SEEDBYTES + K*POLYT1_PACKEDBYTES) +#define PQCLEAN_DILITHIUM3_AVX2_CRYPTO_SECRETKEYBYTES (2*SEEDBYTES + CRHBYTES \ + + L*POLYETA_PACKEDBYTES \ + + K*POLYETA_PACKEDBYTES \ + + K*POLYT0_PACKEDBYTES) +#define PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES (SEEDBYTES + L*POLYZ_PACKEDBYTES + POLYVECH_PACKEDBYTES) + +#endif diff --git a/crypto_sign/dilithium/dilithium3/avx2/pointwise.S b/crypto_sign/dilithium/dilithium3/avx2/pointwise.S new file mode 100644 index 00000000..b3e020f4 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/pointwise.S @@ -0,0 +1,201 @@ +#include "params.h" +#include "cdecl.h" + +.text +.global cdecl(PQCLEAN_DILITHIUM3_AVX2_pointwise_avx) +.global _cdecl(PQCLEAN_DILITHIUM3_AVX2_pointwise_avx) +cdecl(PQCLEAN_DILITHIUM3_AVX2_pointwise_avx): +_cdecl(PQCLEAN_DILITHIUM3_AVX2_pointwise_avx): +#consts +vmovdqa _8XQINV*4(%rcx),%ymm0 +vmovdqa _8XQ*4(%rcx),%ymm1 + +xor %eax,%eax +_looptop1: +#load +vmovdqa (%rsi),%ymm2 +vmovdqa 32(%rsi),%ymm4 +vmovdqa 64(%rsi),%ymm6 +vmovdqa (%rdx),%ymm10 +vmovdqa 32(%rdx),%ymm12 +vmovdqa 64(%rdx),%ymm14 +vpsrlq $32,%ymm2,%ymm3 +vpsrlq $32,%ymm4,%ymm5 +vmovshdup %ymm6,%ymm7 +vpsrlq $32,%ymm10,%ymm11 +vpsrlq $32,%ymm12,%ymm13 +vmovshdup %ymm14,%ymm15 + +#mul +vpmuldq %ymm2,%ymm10,%ymm2 +vpmuldq %ymm3,%ymm11,%ymm3 +vpmuldq %ymm4,%ymm12,%ymm4 +vpmuldq %ymm5,%ymm13,%ymm5 +vpmuldq %ymm6,%ymm14,%ymm6 +vpmuldq %ymm7,%ymm15,%ymm7 + +#reduce +vpmuldq %ymm0,%ymm2,%ymm10 +vpmuldq %ymm0,%ymm3,%ymm11 +vpmuldq %ymm0,%ymm4,%ymm12 +vpmuldq %ymm0,%ymm5,%ymm13 +vpmuldq %ymm0,%ymm6,%ymm14 +vpmuldq %ymm0,%ymm7,%ymm15 +vpmuldq %ymm1,%ymm10,%ymm10 +vpmuldq %ymm1,%ymm11,%ymm11 +vpmuldq %ymm1,%ymm12,%ymm12 +vpmuldq %ymm1,%ymm13,%ymm13 +vpmuldq %ymm1,%ymm14,%ymm14 +vpmuldq %ymm1,%ymm15,%ymm15 +vpsubq %ymm10,%ymm2,%ymm2 +vpsubq %ymm11,%ymm3,%ymm3 +vpsubq %ymm12,%ymm4,%ymm4 +vpsubq %ymm13,%ymm5,%ymm5 +vpsubq %ymm14,%ymm6,%ymm6 +vpsubq %ymm15,%ymm7,%ymm7 +vpsrlq $32,%ymm2,%ymm2 +vpsrlq $32,%ymm4,%ymm4 +vmovshdup %ymm6,%ymm6 + +#store +vpblendd $0xAA,%ymm3,%ymm2,%ymm2 +vpblendd $0xAA,%ymm5,%ymm4,%ymm4 +vpblendd $0xAA,%ymm7,%ymm6,%ymm6 +vmovdqa %ymm2,(%rdi) +vmovdqa %ymm4,32(%rdi) +vmovdqa %ymm6,64(%rdi) + +add $96,%rdi +add $96,%rsi +add $96,%rdx +add $1,%eax +cmp $10,%eax +jb _looptop1 + +vmovdqa (%rsi),%ymm2 +vmovdqa 32(%rsi),%ymm4 +vmovdqa (%rdx),%ymm10 +vmovdqa 32(%rdx),%ymm12 +vpsrlq $32,%ymm2,%ymm3 +vpsrlq $32,%ymm4,%ymm5 +vmovshdup %ymm10,%ymm11 +vmovshdup %ymm12,%ymm13 + +#mul +vpmuldq %ymm2,%ymm10,%ymm2 +vpmuldq %ymm3,%ymm11,%ymm3 +vpmuldq %ymm4,%ymm12,%ymm4 +vpmuldq %ymm5,%ymm13,%ymm5 + +#reduce +vpmuldq %ymm0,%ymm2,%ymm10 +vpmuldq %ymm0,%ymm3,%ymm11 +vpmuldq %ymm0,%ymm4,%ymm12 +vpmuldq %ymm0,%ymm5,%ymm13 +vpmuldq %ymm1,%ymm10,%ymm10 +vpmuldq %ymm1,%ymm11,%ymm11 +vpmuldq %ymm1,%ymm12,%ymm12 +vpmuldq %ymm1,%ymm13,%ymm13 +vpsubq %ymm10,%ymm2,%ymm2 +vpsubq %ymm11,%ymm3,%ymm3 +vpsubq %ymm12,%ymm4,%ymm4 +vpsubq %ymm13,%ymm5,%ymm5 +vpsrlq $32,%ymm2,%ymm2 +vmovshdup %ymm4,%ymm4 + +#store +vpblendd $0x55,%ymm2,%ymm3,%ymm2 +vpblendd $0x55,%ymm4,%ymm5,%ymm4 +vmovdqa %ymm2,(%rdi) +vmovdqa %ymm4,32(%rdi) + +ret + +.macro pointwise off +#load +vmovdqa \off(%rsi),%ymm6 +vmovdqa \off+32(%rsi),%ymm8 +vmovdqa \off(%rdx),%ymm10 +vmovdqa \off+32(%rdx),%ymm12 +vpsrlq $32,%ymm6,%ymm7 +vpsrlq $32,%ymm8,%ymm9 +vmovshdup %ymm10,%ymm11 +vmovshdup %ymm12,%ymm13 + +#mul +vpmuldq %ymm6,%ymm10,%ymm6 +vpmuldq %ymm7,%ymm11,%ymm7 +vpmuldq %ymm8,%ymm12,%ymm8 +vpmuldq %ymm9,%ymm13,%ymm9 +.endm + +.macro acc +vpaddq %ymm6,%ymm2,%ymm2 +vpaddq %ymm7,%ymm3,%ymm3 +vpaddq %ymm8,%ymm4,%ymm4 +vpaddq %ymm9,%ymm5,%ymm5 +.endm + +.global cdecl(PQCLEAN_DILITHIUM3_AVX2_pointwise_acc_avx) +.global _cdecl(PQCLEAN_DILITHIUM3_AVX2_pointwise_acc_avx) +cdecl(PQCLEAN_DILITHIUM3_AVX2_pointwise_acc_avx): +_cdecl(PQCLEAN_DILITHIUM3_AVX2_pointwise_acc_avx): +#consts +vmovdqa _8XQINV*4(%rcx),%ymm0 +vmovdqa _8XQ*4(%rcx),%ymm1 + +xor %eax,%eax +_looptop2: +pointwise 0 + +#mov +vmovdqa %ymm6,%ymm2 +vmovdqa %ymm7,%ymm3 +vmovdqa %ymm8,%ymm4 +vmovdqa %ymm9,%ymm5 + +pointwise 1024 +acc + +pointwise 2048 +acc + +pointwise 3072 +acc + +pointwise 4096 +acc + + + +#reduce +vpmuldq %ymm0,%ymm2,%ymm6 +vpmuldq %ymm0,%ymm3,%ymm7 +vpmuldq %ymm0,%ymm4,%ymm8 +vpmuldq %ymm0,%ymm5,%ymm9 +vpmuldq %ymm1,%ymm6,%ymm6 +vpmuldq %ymm1,%ymm7,%ymm7 +vpmuldq %ymm1,%ymm8,%ymm8 +vpmuldq %ymm1,%ymm9,%ymm9 +vpsubq %ymm6,%ymm2,%ymm2 +vpsubq %ymm7,%ymm3,%ymm3 +vpsubq %ymm8,%ymm4,%ymm4 +vpsubq %ymm9,%ymm5,%ymm5 +vpsrlq $32,%ymm2,%ymm2 +vmovshdup %ymm4,%ymm4 + +#store +vpblendd $0xAA,%ymm3,%ymm2,%ymm2 +vpblendd $0xAA,%ymm5,%ymm4,%ymm4 + +vmovdqa %ymm2,(%rdi) +vmovdqa %ymm4,32(%rdi) + +add $64,%rsi +add $64,%rdx +add $64,%rdi +add $1,%eax +cmp $16,%eax +jb _looptop2 + +ret diff --git a/crypto_sign/dilithium/dilithium3/avx2/poly.c b/crypto_sign/dilithium/dilithium3/avx2/poly.c new file mode 100644 index 00000000..550ed556 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/poly.c @@ -0,0 +1,998 @@ +#include "align.h" +#include "consts.h" +#include "fips202x4.h" +#include "ntt.h" +#include "params.h" +#include "poly.h" +#include "rejsample.h" +#include "rounding.h" +#include "symmetric.h" +#include +#include +#include + +#define DBENCH_START() +#define DBENCH_STOP(t) + +#define _mm256_blendv_epi32(a,b,mask) \ + _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(a), \ + _mm256_castsi256_ps(b), \ + _mm256_castsi256_ps(mask))) + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_poly_reduce +* +* Description: Inplace reduction of all coefficients of polynomial to +* representative in [-6283009,6283007]. Assumes input +* coefficients to be at most 2^31 - 2^22 - 1 in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_poly_reduce(poly *a) { + unsigned int i; + __m256i f, g; + const __m256i q = _mm256_load_si256(&PQCLEAN_DILITHIUM3_AVX2_qdata.vec[_8XQ / 8]); + const __m256i off = _mm256_set1_epi32(1 << 22); + DBENCH_START(); + + for (i = 0; i < N / 8; i++) { + f = _mm256_load_si256(&a->vec[i]); + g = _mm256_add_epi32(f, off); + g = _mm256_srai_epi32(g, 23); + g = _mm256_mullo_epi32(g, q); + f = _mm256_sub_epi32(f, g); + _mm256_store_si256(&a->vec[i], f); + } + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: poly_addq +* +* Description: For all coefficients of in/out polynomial add Q if +* coefficient is negative. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_poly_caddq(poly *a) { + unsigned int i; + __m256i f, g; + const __m256i q = _mm256_load_si256(&PQCLEAN_DILITHIUM3_AVX2_qdata.vec[_8XQ / 8]); + const __m256i zero = _mm256_setzero_si256(); + DBENCH_START(); + + for (i = 0; i < N / 8; i++) { + f = _mm256_load_si256(&a->vec[i]); + g = _mm256_blendv_epi32(zero, q, f); + f = _mm256_add_epi32(f, g); + _mm256_store_si256(&a->vec[i], f); + } + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_poly_freeze +* +* Description: Inplace reduction of all coefficients of polynomial to +* positive standard representatives. Assumes input +* coefficients to be at most 2^31 - 2^22 + 1 in +* absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_poly_freeze(poly *a) { + DBENCH_START(); + + PQCLEAN_DILITHIUM3_AVX2_poly_reduce(a); + PQCLEAN_DILITHIUM3_AVX2_poly_caddq(a); + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_poly_add +* +* Description: Add polynomials. No modular reduction is performed. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first summand +* - const poly *b: pointer to second summand +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_poly_add(poly *c, const poly *a, const poly *b) { + unsigned int i; + __m256i f, g; + DBENCH_START(); + + for (i = 0; i < N / 8; i++) { + f = _mm256_load_si256(&a->vec[i]); + g = _mm256_load_si256(&b->vec[i]); + f = _mm256_add_epi32(f, g); + _mm256_store_si256(&c->vec[i], f); + } + + DBENCH_STOP(*tadd); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_poly_sub +* +* Description: Subtract polynomials. No modular reduction is +* performed. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial to be +* subtraced from first input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_poly_sub(poly *c, const poly *a, const poly *b) { + unsigned int i; + __m256i f, g; + DBENCH_START(); + + for (i = 0; i < N / 8; i++) { + f = _mm256_load_si256(&a->vec[i]); + g = _mm256_load_si256(&b->vec[i]); + f = _mm256_sub_epi32(f, g); + _mm256_store_si256(&c->vec[i], f); + } + + DBENCH_STOP(*tadd); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_poly_shiftl +* +* Description: Multiply polynomial by 2^D without modular reduction. Assumes +* input coefficients to be less than 2^{31-D} in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_poly_shiftl(poly *a) { + unsigned int i; + __m256i f; + DBENCH_START(); + + for (i = 0; i < N / 8; i++) { + f = _mm256_load_si256(&a->vec[i]); + f = _mm256_slli_epi32(f, D); + _mm256_store_si256(&a->vec[i], f); + } + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_poly_ntt +* +* Description: Inplace forward NTT. Coefficients can grow by up to +* 8*Q in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_poly_ntt(poly *a) { + DBENCH_START(); + + PQCLEAN_DILITHIUM3_AVX2_ntt_avx(a->vec, PQCLEAN_DILITHIUM3_AVX2_qdata.vec); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_poly_invntt_tomont +* +* Description: Inplace inverse NTT and multiplication by 2^{32}. +* Input coefficients need to be less than Q in absolute +* value and output coefficients are again bounded by Q. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_poly_invntt_tomont(poly *a) { + DBENCH_START(); + + PQCLEAN_DILITHIUM3_AVX2_invntt_avx(a->vec, PQCLEAN_DILITHIUM3_AVX2_qdata.vec); + + DBENCH_STOP(*tmul); +} + +void PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(poly *a) { + DBENCH_START(); + + PQCLEAN_DILITHIUM3_AVX2_nttunpack_avx(a->vec); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_poly_pointwise_montgomery +* +* Description: Pointwise multiplication of polynomials in NTT domain +* representation and multiplication of resulting polynomial +* by 2^{-32}. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_poly_pointwise_montgomery(poly *c, const poly *a, const poly *b) { + DBENCH_START(); + + PQCLEAN_DILITHIUM3_AVX2_pointwise_avx(c->vec, a->vec, b->vec, PQCLEAN_DILITHIUM3_AVX2_qdata.vec); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_poly_power2round +* +* Description: For all coefficients c of the input polynomial, +* compute c0, c1 such that c mod^+ Q = c1*2^D + c0 +* with -2^{D-1} < c0 <= 2^{D-1}. Assumes coefficients to be +* positive standard representatives. +* +* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 +* - poly *a0: pointer to output polynomial with coefficients c0 +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_poly_power2round(poly *a1, poly *a0, const poly *a) { + DBENCH_START(); + + PQCLEAN_DILITHIUM3_AVX2_power2round_avx(a1->vec, a0->vec, a->vec); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_poly_decompose +* +* Description: For all coefficients c of the input polynomial, +* compute high and low bits c0, c1 such c mod^+ Q = c1*ALPHA + c0 +* with -ALPHA/2 < c0 <= ALPHA/2 except if c1 = (Q-1)/ALPHA where we +* set c1 = 0 and -ALPHA/2 <= c0 = c mod Q - Q < 0. +* Assumes coefficients to be positive standard representatives. +* +* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 +* - poly *a0: pointer to output polynomial with coefficients c0 +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_poly_decompose(poly *a1, poly *a0, const poly *a) { + DBENCH_START(); + + PQCLEAN_DILITHIUM3_AVX2_decompose_avx(a1->vec, a0->vec, a->vec); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_poly_make_hint +* +* Description: Compute hint array. The coefficients of which are the +* indices of the coefficients of the input polynomial +* whose low bits overflow into the high bits. +* +* Arguments: - uint8_t *h: pointer to output hint array (preallocated of length N) +* - const poly *a0: pointer to low part of input polynomial +* - const poly *a1: pointer to high part of input polynomial +* +* Returns number of hints, i.e. length of hint array. +**************************************************/ +unsigned int PQCLEAN_DILITHIUM3_AVX2_poly_make_hint(uint8_t hint[N], const poly *a0, const poly *a1) { + unsigned int r; + DBENCH_START(); + + r = PQCLEAN_DILITHIUM3_AVX2_make_hint_avx(hint, a0->vec, a1->vec); + + DBENCH_STOP(*tround); + return r; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_poly_use_hint +* +* Description: Use hint polynomial to correct the high bits of a polynomial. +* +* Arguments: - poly *b: pointer to output polynomial with corrected high bits +* - const poly *a: pointer to input polynomial +* - const poly *h: pointer to input hint polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_poly_use_hint(poly *b, const poly *a, const poly *h) { + DBENCH_START(); + + PQCLEAN_DILITHIUM3_AVX2_use_hint_avx(b->vec, a->vec, h->vec); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_poly_chknorm +* +* Description: Check infinity norm of polynomial against given bound. +* Assumes input polynomial to be reduced by PQCLEAN_DILITHIUM3_AVX2_poly_reduce(). +* +* Arguments: - const poly *a: pointer to polynomial +* - int32_t B: norm bound +* +* Returns 0 if norm is strictly smaller than B <= (Q-1)/8 and 1 otherwise. +**************************************************/ +int PQCLEAN_DILITHIUM3_AVX2_poly_chknorm(const poly *a, int32_t B) { + unsigned int i; + int r; + __m256i f, t; + const __m256i bound = _mm256_set1_epi32(B - 1); + DBENCH_START(); + + if (B > (Q - 1) / 8) { + return 1; + } + + t = _mm256_setzero_si256(); + for (i = 0; i < N / 8; i++) { + f = _mm256_load_si256(&a->vec[i]); + f = _mm256_abs_epi32(f); + f = _mm256_cmpgt_epi32(f, bound); + t = _mm256_or_si256(t, f); + } + + r = 1 - _mm256_testz_si256(t, t); + DBENCH_STOP(*tsample); + return r; +} + +/************************************************* +* Name: rej_uniform +* +* Description: Sample uniformly random coefficients in [0, Q-1] by +* performing rejection sampling on array of random bytes. +* +* Arguments: - int32_t *a: pointer to output array (allocated) +* - unsigned int len: number of coefficients to be sampled +* - const uint8_t *buf: array of random bytes +* - unsigned int buflen: length of array of random bytes +* +* Returns number of sampled coefficients. Can be smaller than len if not enough +* random bytes were given. +**************************************************/ +static unsigned int rej_uniform(int32_t *a, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) { + unsigned int ctr, pos; + uint32_t t; + DBENCH_START(); + + ctr = pos = 0; + while (ctr < len && pos + 3 <= buflen) { + t = buf[pos++]; + t |= (uint32_t)buf[pos++] << 8; + t |= (uint32_t)buf[pos++] << 16; + t &= 0x7FFFFF; + + if (t < Q) { + a[ctr++] = t; + } + } + + DBENCH_STOP(*tsample); + return ctr; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_poly_uniform +* +* Description: Sample polynomial with uniformly random coefficients +* in [0,Q-1] by performing rejection sampling on the +* output stream of SHAKE256(seed|nonce) or AES256CTR(seed,nonce). +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length SEEDBYTES +* - uint16_t nonce: 2-byte nonce +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_poly_uniform_preinit(poly *a, stream128_state *state) { + unsigned int ctr; + /* PQCLEAN_DILITHIUM3_AVX2_rej_uniform_avx reads up to 8 additional bytes */ + ALIGNED_UINT8(REJ_UNIFORM_BUFLEN + 8) buf; + + stream128_squeezeblocks(buf.coeffs, REJ_UNIFORM_NBLOCKS, state); + ctr = PQCLEAN_DILITHIUM3_AVX2_rej_uniform_avx(a->coeffs, buf.coeffs); + + while (ctr < N) { + /* length of buf is always divisible by 3; hence, no bytes left */ + stream128_squeezeblocks(buf.coeffs, 1, state); + ctr += rej_uniform(a->coeffs + ctr, N - ctr, buf.coeffs, STREAM128_BLOCKBYTES); + } +} + +void PQCLEAN_DILITHIUM3_AVX2_poly_uniform(poly *a, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + stream128_state state; + stream128_init(&state, seed, nonce); + PQCLEAN_DILITHIUM3_AVX2_poly_uniform_preinit(a, &state); + stream128_release(&state); +} + +void PQCLEAN_DILITHIUM3_AVX2_poly_uniform_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[32], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3) { + unsigned int ctr0, ctr1, ctr2, ctr3; + ALIGNED_UINT8(REJ_UNIFORM_BUFLEN + 8) buf[4]; + keccakx4_state state; + __m256i f; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + + buf[0].coeffs[SEEDBYTES + 0] = nonce0; + buf[0].coeffs[SEEDBYTES + 1] = nonce0 >> 8; + buf[1].coeffs[SEEDBYTES + 0] = nonce1; + buf[1].coeffs[SEEDBYTES + 1] = nonce1 >> 8; + buf[2].coeffs[SEEDBYTES + 0] = nonce2; + buf[2].coeffs[SEEDBYTES + 1] = nonce2 >> 8; + buf[3].coeffs[SEEDBYTES + 0] = nonce3; + buf[3].coeffs[SEEDBYTES + 1] = nonce3 >> 8; + + PQCLEAN_DILITHIUM3_AVX2_shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, SEEDBYTES + 2); + PQCLEAN_DILITHIUM3_AVX2_shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_NBLOCKS, &state); + + ctr0 = PQCLEAN_DILITHIUM3_AVX2_rej_uniform_avx(a0->coeffs, buf[0].coeffs); + ctr1 = PQCLEAN_DILITHIUM3_AVX2_rej_uniform_avx(a1->coeffs, buf[1].coeffs); + ctr2 = PQCLEAN_DILITHIUM3_AVX2_rej_uniform_avx(a2->coeffs, buf[2].coeffs); + ctr3 = PQCLEAN_DILITHIUM3_AVX2_rej_uniform_avx(a3->coeffs, buf[3].coeffs); + + while (ctr0 < N || ctr1 < N || ctr2 < N || ctr3 < N) { + PQCLEAN_DILITHIUM3_AVX2_shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); + + ctr0 += rej_uniform(a0->coeffs + ctr0, N - ctr0, buf[0].coeffs, SHAKE128_RATE); + ctr1 += rej_uniform(a1->coeffs + ctr1, N - ctr1, buf[1].coeffs, SHAKE128_RATE); + ctr2 += rej_uniform(a2->coeffs + ctr2, N - ctr2, buf[2].coeffs, SHAKE128_RATE); + ctr3 += rej_uniform(a3->coeffs + ctr3, N - ctr3, buf[3].coeffs, SHAKE128_RATE); + } +} + +/************************************************* +* Name: rej_eta +* +* Description: Sample uniformly random coefficients in [-ETA, ETA] by +* performing rejection sampling on array of random bytes. +* +* Arguments: - int32_t *a: pointer to output array (allocated) +* - unsigned int len: number of coefficients to be sampled +* - const uint8_t *buf: array of random bytes +* - unsigned int buflen: length of array of random bytes +* +* Returns number of sampled coefficients. Can be smaller than len if not enough +* random bytes were given. +**************************************************/ +static unsigned int rej_eta(int32_t *a, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) { + unsigned int ctr, pos; + uint32_t t0, t1; + DBENCH_START(); + + ctr = pos = 0; + while (ctr < len && pos < buflen) { + t0 = buf[pos] & 0x0F; + t1 = buf[pos++] >> 4; + + if (t0 < 9) { + a[ctr++] = 4 - t0; + } + if (t1 < 9 && ctr < len) { + a[ctr++] = 4 - t1; + } + } + + DBENCH_STOP(*tsample); + return ctr; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_poly_uniform_eta +* +* Description: Sample polynomial with uniformly random coefficients +* in [-ETA,ETA] by performing rejection sampling using the +* output stream of SHAKE256(seed|nonce) +* or AES256CTR(seed,nonce). +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length SEEDBYTES +* - uint16_t nonce: 2-byte nonce +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_poly_uniform_eta_preinit(poly *a, stream128_state *state) { + unsigned int ctr; + ALIGNED_UINT8(REJ_UNIFORM_BUFLEN * STREAM128_BLOCKBYTES) buf; + + stream128_squeezeblocks(buf.coeffs, REJ_UNIFORM_ETA_NBLOCKS, state); + ctr = PQCLEAN_DILITHIUM3_AVX2_rej_eta_avx(a->coeffs, buf.coeffs); + + while (ctr < N) { + stream128_squeezeblocks(buf.coeffs, 1, state); + ctr += rej_eta(a->coeffs + ctr, N - ctr, buf.coeffs, STREAM128_BLOCKBYTES); + } +} + +void PQCLEAN_DILITHIUM3_AVX2_poly_uniform_eta(poly *a, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + stream128_state state; + stream128_init(&state, seed, nonce); + PQCLEAN_DILITHIUM3_AVX2_poly_uniform_eta_preinit(a, &state); + stream128_release(&state); +} + +void PQCLEAN_DILITHIUM3_AVX2_poly_uniform_eta_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[32], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3) { + unsigned int ctr0, ctr1, ctr2, ctr3; + ALIGNED_UINT8(REJ_UNIFORM_ETA_BUFLEN) buf[4]; + + __m256i f; + keccakx4_state state; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + + buf[0].coeffs[SEEDBYTES + 0] = nonce0; + buf[0].coeffs[SEEDBYTES + 1] = nonce0 >> 8; + buf[1].coeffs[SEEDBYTES + 0] = nonce1; + buf[1].coeffs[SEEDBYTES + 1] = nonce1 >> 8; + buf[2].coeffs[SEEDBYTES + 0] = nonce2; + buf[2].coeffs[SEEDBYTES + 1] = nonce2 >> 8; + buf[3].coeffs[SEEDBYTES + 0] = nonce3; + buf[3].coeffs[SEEDBYTES + 1] = nonce3 >> 8; + + PQCLEAN_DILITHIUM3_AVX2_shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, SEEDBYTES + 2); + PQCLEAN_DILITHIUM3_AVX2_shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_ETA_NBLOCKS, &state); + + ctr0 = PQCLEAN_DILITHIUM3_AVX2_rej_eta_avx(a0->coeffs, buf[0].coeffs); + ctr1 = PQCLEAN_DILITHIUM3_AVX2_rej_eta_avx(a1->coeffs, buf[1].coeffs); + ctr2 = PQCLEAN_DILITHIUM3_AVX2_rej_eta_avx(a2->coeffs, buf[2].coeffs); + ctr3 = PQCLEAN_DILITHIUM3_AVX2_rej_eta_avx(a3->coeffs, buf[3].coeffs); + + while (ctr0 < N || ctr1 < N || ctr2 < N || ctr3 < N) { + PQCLEAN_DILITHIUM3_AVX2_shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); + + ctr0 += rej_eta(a0->coeffs + ctr0, N - ctr0, buf[0].coeffs, SHAKE128_RATE); + ctr1 += rej_eta(a1->coeffs + ctr1, N - ctr1, buf[1].coeffs, SHAKE128_RATE); + ctr2 += rej_eta(a2->coeffs + ctr2, N - ctr2, buf[2].coeffs, SHAKE128_RATE); + ctr3 += rej_eta(a3->coeffs + ctr3, N - ctr3, buf[3].coeffs, SHAKE128_RATE); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_poly_uniform_gamma1 +* +* Description: Sample polynomial with uniformly random coefficients +* in [-(GAMMA1 - 1), GAMMA1] by unpacking output stream +* of SHAKE256(seed|nonce) or AES256CTR(seed,nonce). +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length CRHBYTES +* - uint16_t nonce: 16-bit nonce +**************************************************/ +#define POLY_UNIFORM_GAMMA1_NBLOCKS ((POLYZ_PACKEDBYTES+STREAM256_BLOCKBYTES-1)/STREAM256_BLOCKBYTES) +void PQCLEAN_DILITHIUM3_AVX2_poly_uniform_gamma1_preinit(poly *a, stream256_state *state) { + /* PQCLEAN_DILITHIUM3_AVX2_polyz_unpack reads 14 additional bytes */ + ALIGNED_UINT8(POLY_UNIFORM_GAMMA1_NBLOCKS * STREAM256_BLOCKBYTES + 14) buf; + stream256_squeezeblocks(buf.coeffs, POLY_UNIFORM_GAMMA1_NBLOCKS, state); + PQCLEAN_DILITHIUM3_AVX2_polyz_unpack(a, buf.coeffs); +} + +void PQCLEAN_DILITHIUM3_AVX2_poly_uniform_gamma1(poly *a, const uint8_t seed[CRHBYTES], uint16_t nonce) { + stream256_state state; + stream256_init(&state, seed, nonce); + PQCLEAN_DILITHIUM3_AVX2_poly_uniform_gamma1_preinit(a, &state); + stream256_release(&state); +} + +void PQCLEAN_DILITHIUM3_AVX2_poly_uniform_gamma1_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[48], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3) { + ALIGNED_UINT8(POLY_UNIFORM_GAMMA1_NBLOCKS * STREAM256_BLOCKBYTES + 14) buf[4]; + keccakx4_state state; + __m256i f; + __m128i g; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + g = _mm_loadu_si128((__m128i *)&seed[32]); + _mm_store_si128((__m128i *)&buf[0].vec[1], g); + _mm_store_si128((__m128i *)&buf[1].vec[1], g); + _mm_store_si128((__m128i *)&buf[2].vec[1], g); + _mm_store_si128((__m128i *)&buf[3].vec[1], g); + + buf[0].coeffs[CRHBYTES + 0] = nonce0; + buf[0].coeffs[CRHBYTES + 1] = nonce0 >> 8; + buf[1].coeffs[CRHBYTES + 0] = nonce1; + buf[1].coeffs[CRHBYTES + 1] = nonce1 >> 8; + buf[2].coeffs[CRHBYTES + 0] = nonce2; + buf[2].coeffs[CRHBYTES + 1] = nonce2 >> 8; + buf[3].coeffs[CRHBYTES + 0] = nonce3; + buf[3].coeffs[CRHBYTES + 1] = nonce3 >> 8; + + PQCLEAN_DILITHIUM3_AVX2_shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, CRHBYTES + 2); + PQCLEAN_DILITHIUM3_AVX2_shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, POLY_UNIFORM_GAMMA1_NBLOCKS, &state); + + PQCLEAN_DILITHIUM3_AVX2_polyz_unpack(a0, buf[0].coeffs); + PQCLEAN_DILITHIUM3_AVX2_polyz_unpack(a1, buf[1].coeffs); + PQCLEAN_DILITHIUM3_AVX2_polyz_unpack(a2, buf[2].coeffs); + PQCLEAN_DILITHIUM3_AVX2_polyz_unpack(a3, buf[3].coeffs); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_challenge +* +* Description: Implementation of H. Samples polynomial with TAU nonzero +* coefficients in {-1,1} using the output stream of +* SHAKE256(seed). +* +* Arguments: - poly *c: pointer to output polynomial +* - const uint8_t mu[]: byte array containing seed of length SEEDBYTES +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_poly_challenge(poly *restrict c, const uint8_t seed[SEEDBYTES]) { + unsigned int i, b, pos; + uint64_t signs; + ALIGNED_UINT8(SHAKE256_RATE) buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, seed, SEEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf.coeffs, SHAKE256_RATE, &state); + + memcpy(&signs, buf.coeffs, 8); + pos = 8; + + memset(c->vec, 0, sizeof(poly)); + for (i = N - TAU; i < N; ++i) { + do { + if (pos >= SHAKE256_RATE) { + shake256_inc_squeeze(buf.coeffs, SHAKE256_RATE, &state); + pos = 0; + } + + b = buf.coeffs[pos++]; + } while (b > i); + + c->coeffs[i] = c->coeffs[b]; + c->coeffs[b] = 1 - 2 * (signs & 1); + signs >>= 1; + } + shake256_inc_ctx_release(&state); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyeta_pack +* +* Description: Bit-pack polynomial with coefficients in [-ETA,ETA]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYETA_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_polyeta_pack(uint8_t r[POLYETA_PACKEDBYTES], const poly *restrict a) { + unsigned int i; + uint8_t t[8]; + DBENCH_START(); + + for (i = 0; i < N / 2; ++i) { + t[0] = ETA - a->coeffs[2 * i + 0]; + t[1] = ETA - a->coeffs[2 * i + 1]; + r[i] = t[0] | (t[1] << 4); + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyeta_unpack +* +* Description: Unpack polynomial with coefficients in [-ETA,ETA]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_polyeta_unpack(poly *restrict r, const uint8_t a[POLYETA_PACKEDBYTES]) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 2; ++i) { + r->coeffs[2 * i + 0] = a[i] & 0x0F; + r->coeffs[2 * i + 1] = a[i] >> 4; + r->coeffs[2 * i + 0] = ETA - r->coeffs[2 * i + 0]; + r->coeffs[2 * i + 1] = ETA - r->coeffs[2 * i + 1]; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyt1_pack +* +* Description: Bit-pack polynomial t1 with coefficients fitting in 10 bits. +* Input coefficients are assumed to be positive standard representatives. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYT1_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_polyt1_pack(uint8_t r[POLYT1_PACKEDBYTES], const poly *restrict a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 4; ++i) { + r[5 * i + 0] = (a->coeffs[4 * i + 0] >> 0); + r[5 * i + 1] = (a->coeffs[4 * i + 0] >> 8) | (a->coeffs[4 * i + 1] << 2); + r[5 * i + 2] = (a->coeffs[4 * i + 1] >> 6) | (a->coeffs[4 * i + 2] << 4); + r[5 * i + 3] = (a->coeffs[4 * i + 2] >> 4) | (a->coeffs[4 * i + 3] << 6); + r[5 * i + 4] = (a->coeffs[4 * i + 3] >> 2); + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyt1_unpack +* +* Description: Unpack polynomial t1 with 10-bit coefficients. +* Output coefficients are positive standard representatives. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_polyt1_unpack(poly *restrict r, const uint8_t a[POLYT1_PACKEDBYTES]) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 4; ++i) { + r->coeffs[4 * i + 0] = ((a[5 * i + 0] >> 0) | ((uint32_t)a[5 * i + 1] << 8)) & 0x3FF; + r->coeffs[4 * i + 1] = ((a[5 * i + 1] >> 2) | ((uint32_t)a[5 * i + 2] << 6)) & 0x3FF; + r->coeffs[4 * i + 2] = ((a[5 * i + 2] >> 4) | ((uint32_t)a[5 * i + 3] << 4)) & 0x3FF; + r->coeffs[4 * i + 3] = ((a[5 * i + 3] >> 6) | ((uint32_t)a[5 * i + 4] << 2)) & 0x3FF; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyt0_pack +* +* Description: Bit-pack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYT0_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_polyt0_pack(uint8_t r[POLYT0_PACKEDBYTES], const poly *restrict a) { + unsigned int i; + uint32_t t[8]; + DBENCH_START(); + + for (i = 0; i < N / 8; ++i) { + t[0] = (1 << (D - 1)) - a->coeffs[8 * i + 0]; + t[1] = (1 << (D - 1)) - a->coeffs[8 * i + 1]; + t[2] = (1 << (D - 1)) - a->coeffs[8 * i + 2]; + t[3] = (1 << (D - 1)) - a->coeffs[8 * i + 3]; + t[4] = (1 << (D - 1)) - a->coeffs[8 * i + 4]; + t[5] = (1 << (D - 1)) - a->coeffs[8 * i + 5]; + t[6] = (1 << (D - 1)) - a->coeffs[8 * i + 6]; + t[7] = (1 << (D - 1)) - a->coeffs[8 * i + 7]; + + r[13 * i + 0] = t[0]; + r[13 * i + 1] = t[0] >> 8; + r[13 * i + 1] |= t[1] << 5; + r[13 * i + 2] = t[1] >> 3; + r[13 * i + 3] = t[1] >> 11; + r[13 * i + 3] |= t[2] << 2; + r[13 * i + 4] = t[2] >> 6; + r[13 * i + 4] |= t[3] << 7; + r[13 * i + 5] = t[3] >> 1; + r[13 * i + 6] = t[3] >> 9; + r[13 * i + 6] |= t[4] << 4; + r[13 * i + 7] = t[4] >> 4; + r[13 * i + 8] = t[4] >> 12; + r[13 * i + 8] |= t[5] << 1; + r[13 * i + 9] = t[5] >> 7; + r[13 * i + 9] |= t[6] << 6; + r[13 * i + 10] = t[6] >> 2; + r[13 * i + 11] = t[6] >> 10; + r[13 * i + 11] |= t[7] << 3; + r[13 * i + 12] = t[7] >> 5; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyt0_unpack +* +* Description: Unpack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_polyt0_unpack(poly *restrict r, const uint8_t a[POLYT0_PACKEDBYTES]) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 8; ++i) { + r->coeffs[8 * i + 0] = a[13 * i + 0]; + r->coeffs[8 * i + 0] |= (uint32_t)a[13 * i + 1] << 8; + r->coeffs[8 * i + 0] &= 0x1FFF; + + r->coeffs[8 * i + 1] = a[13 * i + 1] >> 5; + r->coeffs[8 * i + 1] |= (uint32_t)a[13 * i + 2] << 3; + r->coeffs[8 * i + 1] |= (uint32_t)a[13 * i + 3] << 11; + r->coeffs[8 * i + 1] &= 0x1FFF; + + r->coeffs[8 * i + 2] = a[13 * i + 3] >> 2; + r->coeffs[8 * i + 2] |= (uint32_t)a[13 * i + 4] << 6; + r->coeffs[8 * i + 2] &= 0x1FFF; + + r->coeffs[8 * i + 3] = a[13 * i + 4] >> 7; + r->coeffs[8 * i + 3] |= (uint32_t)a[13 * i + 5] << 1; + r->coeffs[8 * i + 3] |= (uint32_t)a[13 * i + 6] << 9; + r->coeffs[8 * i + 3] &= 0x1FFF; + + r->coeffs[8 * i + 4] = a[13 * i + 6] >> 4; + r->coeffs[8 * i + 4] |= (uint32_t)a[13 * i + 7] << 4; + r->coeffs[8 * i + 4] |= (uint32_t)a[13 * i + 8] << 12; + r->coeffs[8 * i + 4] &= 0x1FFF; + + r->coeffs[8 * i + 5] = a[13 * i + 8] >> 1; + r->coeffs[8 * i + 5] |= (uint32_t)a[13 * i + 9] << 7; + r->coeffs[8 * i + 5] &= 0x1FFF; + + r->coeffs[8 * i + 6] = a[13 * i + 9] >> 6; + r->coeffs[8 * i + 6] |= (uint32_t)a[13 * i + 10] << 2; + r->coeffs[8 * i + 6] |= (uint32_t)a[13 * i + 11] << 10; + r->coeffs[8 * i + 6] &= 0x1FFF; + + r->coeffs[8 * i + 7] = a[13 * i + 11] >> 3; + r->coeffs[8 * i + 7] |= (uint32_t)a[13 * i + 12] << 5; + r->coeffs[8 * i + 7] &= 0x1FFF; + + r->coeffs[8 * i + 0] = (1 << (D - 1)) - r->coeffs[8 * i + 0]; + r->coeffs[8 * i + 1] = (1 << (D - 1)) - r->coeffs[8 * i + 1]; + r->coeffs[8 * i + 2] = (1 << (D - 1)) - r->coeffs[8 * i + 2]; + r->coeffs[8 * i + 3] = (1 << (D - 1)) - r->coeffs[8 * i + 3]; + r->coeffs[8 * i + 4] = (1 << (D - 1)) - r->coeffs[8 * i + 4]; + r->coeffs[8 * i + 5] = (1 << (D - 1)) - r->coeffs[8 * i + 5]; + r->coeffs[8 * i + 6] = (1 << (D - 1)) - r->coeffs[8 * i + 6]; + r->coeffs[8 * i + 7] = (1 << (D - 1)) - r->coeffs[8 * i + 7]; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyz_pack +* +* Description: Bit-pack polynomial with coefficients +* in [-(GAMMA1 - 1), GAMMA1]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYZ_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_polyz_pack(uint8_t r[POLYZ_PACKEDBYTES], const poly *restrict a) { + unsigned int i; + uint32_t t[4]; + DBENCH_START(); + + for (i = 0; i < N / 2; ++i) { + t[0] = GAMMA1 - a->coeffs[2 * i + 0]; + t[1] = GAMMA1 - a->coeffs[2 * i + 1]; + + r[5 * i + 0] = t[0]; + r[5 * i + 1] = t[0] >> 8; + r[5 * i + 2] = t[0] >> 16; + r[5 * i + 2] |= t[1] << 4; + r[5 * i + 3] = t[1] >> 4; + r[5 * i + 4] = t[1] >> 12; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyz_unpack +* +* Description: Unpack polynomial z with coefficients +* in [-(GAMMA1 - 1), GAMMA1]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_polyz_unpack(poly *restrict r, const uint8_t a[POLYZ_PACKEDBYTES + 12]) { + unsigned int i; + __m256i f; + const __m256i shufbidx = _mm256_set_epi8(-1, 11, 10, 9, -1, 9, 8, 7, -1, 6, 5, 4, -1, 4, 3, 2, + -1, 9, 8, 7, -1, 7, 6, 5, -1, 4, 3, 2, -1, 2, 1, 0); + const __m256i srlvdidx = _mm256_set1_epi64x((uint64_t)4 << 32); + const __m256i mask = _mm256_set1_epi32(0xFFFFF); + const __m256i gamma1 = _mm256_set1_epi32(GAMMA1); + DBENCH_START(); + + for (i = 0; i < N / 8; i++) { + f = _mm256_loadu_si256((__m256i *)&a[20 * i]); + f = _mm256_permute4x64_epi64(f, 0x94); + f = _mm256_shuffle_epi8(f, shufbidx); + f = _mm256_srlv_epi32(f, srlvdidx); + f = _mm256_and_si256(f, mask); + f = _mm256_sub_epi32(gamma1, f); + _mm256_store_si256(&r->vec[i], f); + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyw1_pack +* +* Description: Bit-pack polynomial w1 with coefficients in [0,15] or [0,43]. +* Input coefficients are assumed to be positive standard representatives. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYW1_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_polyw1_pack(uint8_t r[POLYW1_PACKEDBYTES], const poly *restrict a) { + unsigned int i; + __m256i f0, f1, f2, f3, f4, f5, f6, f7; + const __m256i shift = _mm256_set1_epi16((16 << 8) + 1); + const __m256i shufbidx = _mm256_set_epi8(15, 14, 7, 6, 13, 12, 5, 4, 11, 10, 3, 2, 9, 8, 1, 0, + 15, 14, 7, 6, 13, 12, 5, 4, 11, 10, 3, 2, 9, 8, 1, 0); + DBENCH_START(); + + for (i = 0; i < N / 64; ++i) { + f0 = _mm256_load_si256(&a->vec[8 * i + 0]); + f1 = _mm256_load_si256(&a->vec[8 * i + 1]); + f2 = _mm256_load_si256(&a->vec[8 * i + 2]); + f3 = _mm256_load_si256(&a->vec[8 * i + 3]); + f4 = _mm256_load_si256(&a->vec[8 * i + 4]); + f5 = _mm256_load_si256(&a->vec[8 * i + 5]); + f6 = _mm256_load_si256(&a->vec[8 * i + 6]); + f7 = _mm256_load_si256(&a->vec[8 * i + 7]); + f0 = _mm256_packus_epi32(f0, f1); + f1 = _mm256_packus_epi32(f2, f3); + f2 = _mm256_packus_epi32(f4, f5); + f3 = _mm256_packus_epi32(f6, f7); + f0 = _mm256_packus_epi16(f0, f1); + f1 = _mm256_packus_epi16(f2, f3); + f0 = _mm256_maddubs_epi16(f0, shift); + f1 = _mm256_maddubs_epi16(f1, shift); + f0 = _mm256_packus_epi16(f0, f1); + f0 = _mm256_permute4x64_epi64(f0, 0xD8); + f0 = _mm256_shuffle_epi8(f0, shufbidx); + _mm256_storeu_si256((__m256i *)&r[32 * i], f0); + } + + DBENCH_STOP(*tpack); +} diff --git a/crypto_sign/dilithium/dilithium3/avx2/poly.h b/crypto_sign/dilithium/dilithium3/avx2/poly.h new file mode 100644 index 00000000..b424c625 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/poly.h @@ -0,0 +1,79 @@ +#ifndef PQCLEAN_DILITHIUM3_AVX2_POLY_H +#define PQCLEAN_DILITHIUM3_AVX2_POLY_H +#include "align.h" +#include "params.h" +#include "symmetric.h" +#include + +typedef ALIGNED_INT32(N) poly; + +void PQCLEAN_DILITHIUM3_AVX2_poly_reduce(poly *a); +void PQCLEAN_DILITHIUM3_AVX2_poly_caddq(poly *a); +void PQCLEAN_DILITHIUM3_AVX2_poly_freeze(poly *a); + +void PQCLEAN_DILITHIUM3_AVX2_poly_add(poly *c, const poly *a, const poly *b); +void PQCLEAN_DILITHIUM3_AVX2_poly_sub(poly *c, const poly *a, const poly *b); +void PQCLEAN_DILITHIUM3_AVX2_poly_shiftl(poly *a); + +void PQCLEAN_DILITHIUM3_AVX2_poly_ntt(poly *a); +void PQCLEAN_DILITHIUM3_AVX2_poly_invntt_tomont(poly *a); +void PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(poly *a); +void PQCLEAN_DILITHIUM3_AVX2_poly_pointwise_montgomery(poly *c, const poly *a, const poly *b); + +void PQCLEAN_DILITHIUM3_AVX2_poly_power2round(poly *a1, poly *a0, const poly *a); +void PQCLEAN_DILITHIUM3_AVX2_poly_decompose(poly *a1, poly *a0, const poly *a); +unsigned int PQCLEAN_DILITHIUM3_AVX2_poly_make_hint(uint8_t hint[N], const poly *a0, const poly *a1); +void PQCLEAN_DILITHIUM3_AVX2_poly_use_hint(poly *b, const poly *a, const poly *h); + +int PQCLEAN_DILITHIUM3_AVX2_poly_chknorm(const poly *a, int32_t B); +void PQCLEAN_DILITHIUM3_AVX2_poly_uniform_preinit(poly *a, stream128_state *state); +void PQCLEAN_DILITHIUM3_AVX2_poly_uniform(poly *a, const uint8_t seed[SEEDBYTES], uint16_t nonce); +void PQCLEAN_DILITHIUM3_AVX2_poly_uniform_eta_preinit(poly *a, stream128_state *state); +void PQCLEAN_DILITHIUM3_AVX2_poly_uniform_eta(poly *a, const uint8_t seed[SEEDBYTES], uint16_t nonce); +void PQCLEAN_DILITHIUM3_AVX2_poly_uniform_gamma1_preinit(poly *a, stream256_state *state); +void PQCLEAN_DILITHIUM3_AVX2_poly_uniform_gamma1(poly *a, const uint8_t seed[CRHBYTES], uint16_t nonce); +void PQCLEAN_DILITHIUM3_AVX2_poly_challenge(poly *c, const uint8_t seed[SEEDBYTES]); + +void PQCLEAN_DILITHIUM3_AVX2_poly_uniform_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[SEEDBYTES], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3); +void PQCLEAN_DILITHIUM3_AVX2_poly_uniform_eta_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[SEEDBYTES], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3); +void PQCLEAN_DILITHIUM3_AVX2_poly_uniform_gamma1_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[CRHBYTES], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3); + +void PQCLEAN_DILITHIUM3_AVX2_polyeta_pack(uint8_t r[POLYETA_PACKEDBYTES], const poly *a); +void PQCLEAN_DILITHIUM3_AVX2_polyeta_unpack(poly *r, const uint8_t a[POLYETA_PACKEDBYTES]); + +void PQCLEAN_DILITHIUM3_AVX2_polyt1_pack(uint8_t r[POLYT1_PACKEDBYTES], const poly *a); +void PQCLEAN_DILITHIUM3_AVX2_polyt1_unpack(poly *r, const uint8_t a[POLYT1_PACKEDBYTES]); + +void PQCLEAN_DILITHIUM3_AVX2_polyt0_pack(uint8_t r[POLYT0_PACKEDBYTES], const poly *a); +void PQCLEAN_DILITHIUM3_AVX2_polyt0_unpack(poly *r, const uint8_t a[POLYT0_PACKEDBYTES]); + +void PQCLEAN_DILITHIUM3_AVX2_polyz_pack(uint8_t r[POLYZ_PACKEDBYTES], const poly *a); +void PQCLEAN_DILITHIUM3_AVX2_polyz_unpack(poly *r, const uint8_t a[POLYZ_PACKEDBYTES + 14]); + +void PQCLEAN_DILITHIUM3_AVX2_polyw1_pack(uint8_t r[POLYW1_PACKEDBYTES + 8], const poly *a); + +#endif diff --git a/crypto_sign/dilithium/dilithium3/avx2/polyvec.c b/crypto_sign/dilithium/dilithium3/avx2/polyvec.c new file mode 100644 index 00000000..10ccd9f8 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/polyvec.c @@ -0,0 +1,498 @@ +#include "consts.h" +#include "ntt.h" +#include "params.h" +#include "poly.h" +#include "polyvec.h" +#include + +#define UNUSED(x) (void)x + +/************************************************* +* Name: expand_mat +* +* Description: Implementation of ExpandA. Generates matrix A with uniformly +* random coefficients a_{i,j} by performing rejection +* sampling on the output stream of SHAKE128(rho|j|i) +* or AES256CTR(rho,j|i). +* +* Arguments: - polyvecl mat[K]: output matrix +* - const uint8_t rho[]: byte array containing seed rho +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]) { + polyvecl tmp; + PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand_row0(&mat[0], &mat[1], rho); + PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand_row1(&mat[1], &mat[2], rho); + PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand_row2(&mat[2], &mat[3], rho); + PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand_row3(&mat[3], NULL, rho); + PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand_row4(&mat[4], &mat[5], rho); + PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand_row5(&mat[5], &tmp, rho); +} + +void PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand_row0(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + PQCLEAN_DILITHIUM3_AVX2_poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 0, 1, 2, 3); + PQCLEAN_DILITHIUM3_AVX2_poly_uniform_4x(&rowa->vec[4], &rowb->vec[0], &rowb->vec[1], &rowb->vec[2], rho, 4, 256, 257, 258); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowa->vec[0]); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowa->vec[1]); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowa->vec[2]); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowa->vec[3]); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowa->vec[4]); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowb->vec[0]); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowb->vec[1]); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowb->vec[2]); +} + +void PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand_row1(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + PQCLEAN_DILITHIUM3_AVX2_poly_uniform_4x(&rowa->vec[3], &rowa->vec[4], &rowb->vec[0], &rowb->vec[1], rho, 259, 260, 512, 513); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowa->vec[3]); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowa->vec[4]); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowb->vec[0]); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowb->vec[1]); +} + +void PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand_row2(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + PQCLEAN_DILITHIUM3_AVX2_poly_uniform_4x(&rowa->vec[2], &rowa->vec[3], &rowa->vec[4], &rowb->vec[0], rho, 514, 515, 516, 768); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowa->vec[2]); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowa->vec[3]); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowa->vec[4]); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowb->vec[0]); +} + +void PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand_row3(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + UNUSED(rowb); + PQCLEAN_DILITHIUM3_AVX2_poly_uniform_4x(&rowa->vec[1], &rowa->vec[2], &rowa->vec[3], &rowa->vec[4], rho, 769, 770, 771, 772); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowa->vec[1]); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowa->vec[2]); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowa->vec[3]); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowa->vec[4]); +} + +void PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand_row4(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + PQCLEAN_DILITHIUM3_AVX2_poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 1024, 1025, 1026, 1027); + PQCLEAN_DILITHIUM3_AVX2_poly_uniform_4x(&rowa->vec[4], &rowb->vec[0], &rowb->vec[1], &rowb->vec[2], rho, 1028, 1280, 1281, 1282); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowa->vec[0]); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowa->vec[1]); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowa->vec[2]); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowa->vec[3]); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowa->vec[4]); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowb->vec[0]); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowb->vec[1]); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowb->vec[2]); +} + +void PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand_row5(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + PQCLEAN_DILITHIUM3_AVX2_poly_uniform_4x(&rowa->vec[3], &rowa->vec[4], &rowb->vec[0], &rowb->vec[1], rho, 1283, 1284, 1536, 1537); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowa->vec[3]); + PQCLEAN_DILITHIUM3_AVX2_poly_nttunpack(&rowa->vec[4]); +} + + +void PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_pointwise_montgomery(polyveck *t, const polyvecl mat[K], const polyvecl *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_AVX2_polyvecl_pointwise_acc_montgomery(&t->vec[i], &mat[i], v); + } +} + +/**************************************************************/ +/************ Vectors of polynomials of length L **************/ +/**************************************************************/ + +void PQCLEAN_DILITHIUM3_AVX2_polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM3_AVX2_poly_uniform_eta(&v->vec[i], seed, nonce++); + } +} + +void PQCLEAN_DILITHIUM3_AVX2_polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM3_AVX2_poly_uniform_gamma1(&v->vec[i], seed, L * nonce + i); + } +} + +void PQCLEAN_DILITHIUM3_AVX2_polyvecl_reduce(polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM3_AVX2_poly_reduce(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyvecl_freeze +* +* Description: Reduce coefficients of polynomials in vector of length L +* to standard representatives. +* +* Arguments: - polyvecl *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_polyvecl_freeze(polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM3_AVX2_poly_freeze(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyvecl_add +* +* Description: Add vectors of polynomials of length L. +* No modular reduction is performed. +* +* Arguments: - polyvecl *w: pointer to output vector +* - const polyvecl *u: pointer to first summand +* - const polyvecl *v: pointer to second summand +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM3_AVX2_poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyvecl_ntt +* +* Description: Forward NTT of all polynomials in vector of length L. Output +* coefficients can be up to 16*Q larger than input coefficients. +* +* Arguments: - polyvecl *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_polyvecl_ntt(polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM3_AVX2_poly_ntt(&v->vec[i]); + } +} + +void PQCLEAN_DILITHIUM3_AVX2_polyvecl_invntt_tomont(polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM3_AVX2_poly_invntt_tomont(&v->vec[i]); + } +} + +void PQCLEAN_DILITHIUM3_AVX2_polyvecl_pointwise_poly_montgomery(polyvecl *r, const poly *a, const polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM3_AVX2_poly_pointwise_montgomery(&r->vec[i], a, &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyvecl_pointwise_acc_montgomery +* +* Description: Pointwise multiply vectors of polynomials of length L, multiply +* resulting vector by 2^{-32} and add (accumulate) polynomials +* in it. Input/output vectors are in NTT domain representation. +* +* Arguments: - poly *w: output polynomial +* - const polyvecl *u: pointer to first input vector +* - const polyvecl *v: pointer to second input vector +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_polyvecl_pointwise_acc_montgomery(poly *w, const polyvecl *u, const polyvecl *v) { + PQCLEAN_DILITHIUM3_AVX2_pointwise_acc_avx(w->vec, u->vec->vec, v->vec->vec, PQCLEAN_DILITHIUM3_AVX2_qdata.vec); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyvecl_chknorm +* +* Description: Check infinity norm of polynomials in vector of length L. +* Assumes input polyvecl to be reduced by PQCLEAN_DILITHIUM3_AVX2_polyvecl_reduce(). +* +* Arguments: - const polyvecl *v: pointer to vector +* - int32_t B: norm bound +* +* Returns 0 if norm of all polynomials is strictly smaller than B <= (Q-1)/8 +* and 1 otherwise. +**************************************************/ +int PQCLEAN_DILITHIUM3_AVX2_polyvecl_chknorm(const polyvecl *v, int32_t bound) { + unsigned int i; + + for (i = 0; i < L; ++i) { + if (PQCLEAN_DILITHIUM3_AVX2_poly_chknorm(&v->vec[i], bound)) { + return 1; + } + } + + return 0; +} + +/**************************************************************/ +/************ Vectors of polynomials of length K **************/ +/**************************************************************/ + +void PQCLEAN_DILITHIUM3_AVX2_polyveck_uniform_eta(polyveck *v, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_AVX2_poly_uniform_eta(&v->vec[i], seed, nonce++); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyveck_reduce +* +* Description: Reduce coefficients of polynomials in vector of length K +* to representatives in [-6283009,6283007]. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_polyveck_reduce(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_AVX2_poly_reduce(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyveck_caddq +* +* Description: For all coefficients of polynomials in vector of length K +* add Q if coefficient is negative. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_polyveck_caddq(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_AVX2_poly_caddq(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyveck_freeze +* +* Description: Reduce coefficients of polynomials in vector of length K +* to standard representatives. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_polyveck_freeze(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_AVX2_poly_freeze(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyveck_add +* +* Description: Add vectors of polynomials of length K. +* No modular reduction is performed. +* +* Arguments: - polyveck *w: pointer to output vector +* - const polyveck *u: pointer to first summand +* - const polyveck *v: pointer to second summand +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_polyveck_add(polyveck *w, const polyveck *u, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_AVX2_poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyveck_sub +* +* Description: Subtract vectors of polynomials of length K. +* No modular reduction is performed. +* +* Arguments: - polyveck *w: pointer to output vector +* - const polyveck *u: pointer to first input vector +* - const polyveck *v: pointer to second input vector to be +* subtracted from first input vector +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_AVX2_poly_sub(&w->vec[i], &u->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyveck_shiftl +* +* Description: Multiply vector of polynomials of Length K by 2^D without modular +* reduction. Assumes input coefficients to be less than 2^{31-D}. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_polyveck_shiftl(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_AVX2_poly_shiftl(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyveck_ntt +* +* Description: Forward NTT of all polynomials in vector of length K. Output +* coefficients can be up to 16*Q larger than input coefficients. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_polyveck_ntt(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_AVX2_poly_ntt(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyveck_invntt_tomont +* +* Description: Inverse NTT and multiplication by 2^{32} of polynomials +* in vector of length K. Input coefficients need to be less +* than 2*Q. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_polyveck_invntt_tomont(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_AVX2_poly_invntt_tomont(&v->vec[i]); + } +} + +void PQCLEAN_DILITHIUM3_AVX2_polyveck_pointwise_poly_montgomery(polyveck *r, const poly *a, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_AVX2_poly_pointwise_montgomery(&r->vec[i], a, &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyveck_chknorm +* +* Description: Check infinity norm of polynomials in vector of length K. +* Assumes input polyveck to be reduced by PQCLEAN_DILITHIUM3_AVX2_polyveck_reduce(). +* +* Arguments: - const polyveck *v: pointer to vector +* - int32_t B: norm bound +* +* Returns 0 if norm of all polynomials are strictly smaller than B <= (Q-1)/8 +* and 1 otherwise. +**************************************************/ +int PQCLEAN_DILITHIUM3_AVX2_polyveck_chknorm(const polyveck *v, int32_t bound) { + unsigned int i; + + for (i = 0; i < K; ++i) { + if (PQCLEAN_DILITHIUM3_AVX2_poly_chknorm(&v->vec[i], bound)) { + return 1; + } + } + + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyveck_power2round +* +* Description: For all coefficients a of polynomials in vector of length K, +* compute a0, a1 such that a mod^+ Q = a1*2^D + a0 +* with -2^{D-1} < a0 <= 2^{D-1}. Assumes coefficients to be +* standard representatives. +* +* Arguments: - polyveck *v1: pointer to output vector of polynomials with +* coefficients a1 +* - polyveck *v0: pointer to output vector of polynomials with +* coefficients a0 +* - const polyveck *v: pointer to input vector +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_AVX2_poly_power2round(&v1->vec[i], &v0->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyveck_decompose +* +* Description: For all coefficients a of polynomials in vector of length K, +* compute high and low bits a0, a1 such a mod^+ Q = a1*ALPHA + a0 +* with -ALPHA/2 < a0 <= ALPHA/2 except a1 = (Q-1)/ALPHA where we +* set a1 = 0 and -ALPHA/2 <= a0 = a mod Q - Q < 0. +* Assumes coefficients to be standard representatives. +* +* Arguments: - polyveck *v1: pointer to output vector of polynomials with +* coefficients a1 +* - polyveck *v0: pointer to output vector of polynomials with +* coefficients a0 +* - const polyveck *v: pointer to input vector +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_AVX2_poly_decompose(&v1->vec[i], &v0->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyveck_make_hint +* +* Description: Compute hint vector. +* +* Arguments: - uint8_t *hint: pointer to output hint array +* - const polyveck *v0: pointer to low part of input vector +* - const polyveck *v1: pointer to high part of input vector +* +* Returns number of 1 bits. +**************************************************/ +unsigned int PQCLEAN_DILITHIUM3_AVX2_polyveck_make_hint(uint8_t *hint, const polyveck *v0, const polyveck *v1) { + unsigned int i, n = 0; + + for (i = 0; i < K; ++i) { + n += PQCLEAN_DILITHIUM3_AVX2_poly_make_hint(&hint[n], &v0->vec[i], &v1->vec[i]); + } + + return n; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_polyveck_use_hint +* +* Description: Use hint vector to correct the high bits of input vector. +* +* Arguments: - polyveck *w: pointer to output vector of polynomials with +* corrected high bits +* - const polyveck *u: pointer to input vector +* - const polyveck *h: pointer to input hint vector +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_polyveck_use_hint(polyveck *w, const polyveck *u, const polyveck *h) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_AVX2_poly_use_hint(&w->vec[i], &u->vec[i], &h->vec[i]); + } +} + +void PQCLEAN_DILITHIUM3_AVX2_polyveck_pack_w1(uint8_t r[K * POLYW1_PACKEDBYTES], const polyveck *w1) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_AVX2_polyw1_pack(&r[i * POLYW1_PACKEDBYTES], &w1->vec[i]); + } +} diff --git a/crypto_sign/dilithium/dilithium3/avx2/polyvec.h b/crypto_sign/dilithium/dilithium3/avx2/polyvec.h new file mode 100644 index 00000000..983e13f0 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/polyvec.h @@ -0,0 +1,72 @@ +#ifndef PQCLEAN_DILITHIUM3_AVX2_POLYVEC_H +#define PQCLEAN_DILITHIUM3_AVX2_POLYVEC_H +#include "params.h" +#include "poly.h" +#include + +/* Vectors of polynomials of length L */ +typedef struct { + poly vec[L]; +} polyvecl; + +void PQCLEAN_DILITHIUM3_AVX2_polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[SEEDBYTES], uint16_t nonce); + +void PQCLEAN_DILITHIUM3_AVX2_polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[SEEDBYTES], uint16_t nonce); + +void PQCLEAN_DILITHIUM3_AVX2_polyvecl_reduce(polyvecl *v); + +void PQCLEAN_DILITHIUM3_AVX2_polyvecl_freeze(polyvecl *v); + +void PQCLEAN_DILITHIUM3_AVX2_polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v); + +void PQCLEAN_DILITHIUM3_AVX2_polyvecl_ntt(polyvecl *v); +void PQCLEAN_DILITHIUM3_AVX2_polyvecl_invntt_tomont(polyvecl *v); +void PQCLEAN_DILITHIUM3_AVX2_polyvecl_pointwise_poly_montgomery(polyvecl *r, const poly *a, const polyvecl *v); +void PQCLEAN_DILITHIUM3_AVX2_polyvecl_pointwise_acc_montgomery(poly *w, + const polyvecl *u, + const polyvecl *v); + +int PQCLEAN_DILITHIUM3_AVX2_polyvecl_chknorm(const polyvecl *v, int32_t B); + +/* Vectors of polynomials of length K */ +typedef struct { + poly vec[K]; +} polyveck; + +void PQCLEAN_DILITHIUM3_AVX2_polyveck_uniform_eta(polyveck *v, const uint8_t seed[SEEDBYTES], uint16_t nonce); + +void PQCLEAN_DILITHIUM3_AVX2_polyveck_reduce(polyveck *v); +void PQCLEAN_DILITHIUM3_AVX2_polyveck_caddq(polyveck *v); +void PQCLEAN_DILITHIUM3_AVX2_polyveck_freeze(polyveck *v); + +void PQCLEAN_DILITHIUM3_AVX2_polyveck_add(polyveck *w, const polyveck *u, const polyveck *v); +void PQCLEAN_DILITHIUM3_AVX2_polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v); +void PQCLEAN_DILITHIUM3_AVX2_polyveck_shiftl(polyveck *v); + +void PQCLEAN_DILITHIUM3_AVX2_polyveck_ntt(polyveck *v); +void PQCLEAN_DILITHIUM3_AVX2_polyveck_invntt_tomont(polyveck *v); +void PQCLEAN_DILITHIUM3_AVX2_polyveck_pointwise_poly_montgomery(polyveck *r, const poly *a, const polyveck *v); + +int PQCLEAN_DILITHIUM3_AVX2_polyveck_chknorm(const polyveck *v, int32_t B); + +void PQCLEAN_DILITHIUM3_AVX2_polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v); +void PQCLEAN_DILITHIUM3_AVX2_polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v); +unsigned int PQCLEAN_DILITHIUM3_AVX2_polyveck_make_hint(uint8_t *hint, const polyveck *v0, const polyveck *v1); +void PQCLEAN_DILITHIUM3_AVX2_polyveck_use_hint(polyveck *w, const polyveck *u, const polyveck *h); + +void PQCLEAN_DILITHIUM3_AVX2_polyveck_pack_w1(uint8_t r[K * POLYW1_PACKEDBYTES], const polyveck *w1); + +void PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]); + +void PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand_row0(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +void PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand_row1(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +void PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand_row2(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +void PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand_row3(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +void PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand_row4(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +void PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand_row5(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +void PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand_row6(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +void PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand_row7(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); + +void PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_pointwise_montgomery(polyveck *t, const polyvecl mat[K], const polyvecl *v); + +#endif diff --git a/crypto_sign/dilithium/dilithium3/avx2/rejsample.c b/crypto_sign/dilithium/dilithium3/avx2/rejsample.c new file mode 100644 index 00000000..a70674c2 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/rejsample.c @@ -0,0 +1,392 @@ +#include "params.h" +#include "rejsample.h" +#include "symmetric.h" +#include +#include + +const uint8_t PQCLEAN_DILITHIUM3_AVX2_idxlut[256][8] = { + { 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0}, + { 1, 0, 0, 0, 0, 0, 0, 0}, + { 0, 1, 0, 0, 0, 0, 0, 0}, + { 2, 0, 0, 0, 0, 0, 0, 0}, + { 0, 2, 0, 0, 0, 0, 0, 0}, + { 1, 2, 0, 0, 0, 0, 0, 0}, + { 0, 1, 2, 0, 0, 0, 0, 0}, + { 3, 0, 0, 0, 0, 0, 0, 0}, + { 0, 3, 0, 0, 0, 0, 0, 0}, + { 1, 3, 0, 0, 0, 0, 0, 0}, + { 0, 1, 3, 0, 0, 0, 0, 0}, + { 2, 3, 0, 0, 0, 0, 0, 0}, + { 0, 2, 3, 0, 0, 0, 0, 0}, + { 1, 2, 3, 0, 0, 0, 0, 0}, + { 0, 1, 2, 3, 0, 0, 0, 0}, + { 4, 0, 0, 0, 0, 0, 0, 0}, + { 0, 4, 0, 0, 0, 0, 0, 0}, + { 1, 4, 0, 0, 0, 0, 0, 0}, + { 0, 1, 4, 0, 0, 0, 0, 0}, + { 2, 4, 0, 0, 0, 0, 0, 0}, + { 0, 2, 4, 0, 0, 0, 0, 0}, + { 1, 2, 4, 0, 0, 0, 0, 0}, + { 0, 1, 2, 4, 0, 0, 0, 0}, + { 3, 4, 0, 0, 0, 0, 0, 0}, + { 0, 3, 4, 0, 0, 0, 0, 0}, + { 1, 3, 4, 0, 0, 0, 0, 0}, + { 0, 1, 3, 4, 0, 0, 0, 0}, + { 2, 3, 4, 0, 0, 0, 0, 0}, + { 0, 2, 3, 4, 0, 0, 0, 0}, + { 1, 2, 3, 4, 0, 0, 0, 0}, + { 0, 1, 2, 3, 4, 0, 0, 0}, + { 5, 0, 0, 0, 0, 0, 0, 0}, + { 0, 5, 0, 0, 0, 0, 0, 0}, + { 1, 5, 0, 0, 0, 0, 0, 0}, + { 0, 1, 5, 0, 0, 0, 0, 0}, + { 2, 5, 0, 0, 0, 0, 0, 0}, + { 0, 2, 5, 0, 0, 0, 0, 0}, + { 1, 2, 5, 0, 0, 0, 0, 0}, + { 0, 1, 2, 5, 0, 0, 0, 0}, + { 3, 5, 0, 0, 0, 0, 0, 0}, + { 0, 3, 5, 0, 0, 0, 0, 0}, + { 1, 3, 5, 0, 0, 0, 0, 0}, + { 0, 1, 3, 5, 0, 0, 0, 0}, + { 2, 3, 5, 0, 0, 0, 0, 0}, + { 0, 2, 3, 5, 0, 0, 0, 0}, + { 1, 2, 3, 5, 0, 0, 0, 0}, + { 0, 1, 2, 3, 5, 0, 0, 0}, + { 4, 5, 0, 0, 0, 0, 0, 0}, + { 0, 4, 5, 0, 0, 0, 0, 0}, + { 1, 4, 5, 0, 0, 0, 0, 0}, + { 0, 1, 4, 5, 0, 0, 0, 0}, + { 2, 4, 5, 0, 0, 0, 0, 0}, + { 0, 2, 4, 5, 0, 0, 0, 0}, + { 1, 2, 4, 5, 0, 0, 0, 0}, + { 0, 1, 2, 4, 5, 0, 0, 0}, + { 3, 4, 5, 0, 0, 0, 0, 0}, + { 0, 3, 4, 5, 0, 0, 0, 0}, + { 1, 3, 4, 5, 0, 0, 0, 0}, + { 0, 1, 3, 4, 5, 0, 0, 0}, + { 2, 3, 4, 5, 0, 0, 0, 0}, + { 0, 2, 3, 4, 5, 0, 0, 0}, + { 1, 2, 3, 4, 5, 0, 0, 0}, + { 0, 1, 2, 3, 4, 5, 0, 0}, + { 6, 0, 0, 0, 0, 0, 0, 0}, + { 0, 6, 0, 0, 0, 0, 0, 0}, + { 1, 6, 0, 0, 0, 0, 0, 0}, + { 0, 1, 6, 0, 0, 0, 0, 0}, + { 2, 6, 0, 0, 0, 0, 0, 0}, + { 0, 2, 6, 0, 0, 0, 0, 0}, + { 1, 2, 6, 0, 0, 0, 0, 0}, + { 0, 1, 2, 6, 0, 0, 0, 0}, + { 3, 6, 0, 0, 0, 0, 0, 0}, + { 0, 3, 6, 0, 0, 0, 0, 0}, + { 1, 3, 6, 0, 0, 0, 0, 0}, + { 0, 1, 3, 6, 0, 0, 0, 0}, + { 2, 3, 6, 0, 0, 0, 0, 0}, + { 0, 2, 3, 6, 0, 0, 0, 0}, + { 1, 2, 3, 6, 0, 0, 0, 0}, + { 0, 1, 2, 3, 6, 0, 0, 0}, + { 4, 6, 0, 0, 0, 0, 0, 0}, + { 0, 4, 6, 0, 0, 0, 0, 0}, + { 1, 4, 6, 0, 0, 0, 0, 0}, + { 0, 1, 4, 6, 0, 0, 0, 0}, + { 2, 4, 6, 0, 0, 0, 0, 0}, + { 0, 2, 4, 6, 0, 0, 0, 0}, + { 1, 2, 4, 6, 0, 0, 0, 0}, + { 0, 1, 2, 4, 6, 0, 0, 0}, + { 3, 4, 6, 0, 0, 0, 0, 0}, + { 0, 3, 4, 6, 0, 0, 0, 0}, + { 1, 3, 4, 6, 0, 0, 0, 0}, + { 0, 1, 3, 4, 6, 0, 0, 0}, + { 2, 3, 4, 6, 0, 0, 0, 0}, + { 0, 2, 3, 4, 6, 0, 0, 0}, + { 1, 2, 3, 4, 6, 0, 0, 0}, + { 0, 1, 2, 3, 4, 6, 0, 0}, + { 5, 6, 0, 0, 0, 0, 0, 0}, + { 0, 5, 6, 0, 0, 0, 0, 0}, + { 1, 5, 6, 0, 0, 0, 0, 0}, + { 0, 1, 5, 6, 0, 0, 0, 0}, + { 2, 5, 6, 0, 0, 0, 0, 0}, + { 0, 2, 5, 6, 0, 0, 0, 0}, + { 1, 2, 5, 6, 0, 0, 0, 0}, + { 0, 1, 2, 5, 6, 0, 0, 0}, + { 3, 5, 6, 0, 0, 0, 0, 0}, + { 0, 3, 5, 6, 0, 0, 0, 0}, + { 1, 3, 5, 6, 0, 0, 0, 0}, + { 0, 1, 3, 5, 6, 0, 0, 0}, + { 2, 3, 5, 6, 0, 0, 0, 0}, + { 0, 2, 3, 5, 6, 0, 0, 0}, + { 1, 2, 3, 5, 6, 0, 0, 0}, + { 0, 1, 2, 3, 5, 6, 0, 0}, + { 4, 5, 6, 0, 0, 0, 0, 0}, + { 0, 4, 5, 6, 0, 0, 0, 0}, + { 1, 4, 5, 6, 0, 0, 0, 0}, + { 0, 1, 4, 5, 6, 0, 0, 0}, + { 2, 4, 5, 6, 0, 0, 0, 0}, + { 0, 2, 4, 5, 6, 0, 0, 0}, + { 1, 2, 4, 5, 6, 0, 0, 0}, + { 0, 1, 2, 4, 5, 6, 0, 0}, + { 3, 4, 5, 6, 0, 0, 0, 0}, + { 0, 3, 4, 5, 6, 0, 0, 0}, + { 1, 3, 4, 5, 6, 0, 0, 0}, + { 0, 1, 3, 4, 5, 6, 0, 0}, + { 2, 3, 4, 5, 6, 0, 0, 0}, + { 0, 2, 3, 4, 5, 6, 0, 0}, + { 1, 2, 3, 4, 5, 6, 0, 0}, + { 0, 1, 2, 3, 4, 5, 6, 0}, + { 7, 0, 0, 0, 0, 0, 0, 0}, + { 0, 7, 0, 0, 0, 0, 0, 0}, + { 1, 7, 0, 0, 0, 0, 0, 0}, + { 0, 1, 7, 0, 0, 0, 0, 0}, + { 2, 7, 0, 0, 0, 0, 0, 0}, + { 0, 2, 7, 0, 0, 0, 0, 0}, + { 1, 2, 7, 0, 0, 0, 0, 0}, + { 0, 1, 2, 7, 0, 0, 0, 0}, + { 3, 7, 0, 0, 0, 0, 0, 0}, + { 0, 3, 7, 0, 0, 0, 0, 0}, + { 1, 3, 7, 0, 0, 0, 0, 0}, + { 0, 1, 3, 7, 0, 0, 0, 0}, + { 2, 3, 7, 0, 0, 0, 0, 0}, + { 0, 2, 3, 7, 0, 0, 0, 0}, + { 1, 2, 3, 7, 0, 0, 0, 0}, + { 0, 1, 2, 3, 7, 0, 0, 0}, + { 4, 7, 0, 0, 0, 0, 0, 0}, + { 0, 4, 7, 0, 0, 0, 0, 0}, + { 1, 4, 7, 0, 0, 0, 0, 0}, + { 0, 1, 4, 7, 0, 0, 0, 0}, + { 2, 4, 7, 0, 0, 0, 0, 0}, + { 0, 2, 4, 7, 0, 0, 0, 0}, + { 1, 2, 4, 7, 0, 0, 0, 0}, + { 0, 1, 2, 4, 7, 0, 0, 0}, + { 3, 4, 7, 0, 0, 0, 0, 0}, + { 0, 3, 4, 7, 0, 0, 0, 0}, + { 1, 3, 4, 7, 0, 0, 0, 0}, + { 0, 1, 3, 4, 7, 0, 0, 0}, + { 2, 3, 4, 7, 0, 0, 0, 0}, + { 0, 2, 3, 4, 7, 0, 0, 0}, + { 1, 2, 3, 4, 7, 0, 0, 0}, + { 0, 1, 2, 3, 4, 7, 0, 0}, + { 5, 7, 0, 0, 0, 0, 0, 0}, + { 0, 5, 7, 0, 0, 0, 0, 0}, + { 1, 5, 7, 0, 0, 0, 0, 0}, + { 0, 1, 5, 7, 0, 0, 0, 0}, + { 2, 5, 7, 0, 0, 0, 0, 0}, + { 0, 2, 5, 7, 0, 0, 0, 0}, + { 1, 2, 5, 7, 0, 0, 0, 0}, + { 0, 1, 2, 5, 7, 0, 0, 0}, + { 3, 5, 7, 0, 0, 0, 0, 0}, + { 0, 3, 5, 7, 0, 0, 0, 0}, + { 1, 3, 5, 7, 0, 0, 0, 0}, + { 0, 1, 3, 5, 7, 0, 0, 0}, + { 2, 3, 5, 7, 0, 0, 0, 0}, + { 0, 2, 3, 5, 7, 0, 0, 0}, + { 1, 2, 3, 5, 7, 0, 0, 0}, + { 0, 1, 2, 3, 5, 7, 0, 0}, + { 4, 5, 7, 0, 0, 0, 0, 0}, + { 0, 4, 5, 7, 0, 0, 0, 0}, + { 1, 4, 5, 7, 0, 0, 0, 0}, + { 0, 1, 4, 5, 7, 0, 0, 0}, + { 2, 4, 5, 7, 0, 0, 0, 0}, + { 0, 2, 4, 5, 7, 0, 0, 0}, + { 1, 2, 4, 5, 7, 0, 0, 0}, + { 0, 1, 2, 4, 5, 7, 0, 0}, + { 3, 4, 5, 7, 0, 0, 0, 0}, + { 0, 3, 4, 5, 7, 0, 0, 0}, + { 1, 3, 4, 5, 7, 0, 0, 0}, + { 0, 1, 3, 4, 5, 7, 0, 0}, + { 2, 3, 4, 5, 7, 0, 0, 0}, + { 0, 2, 3, 4, 5, 7, 0, 0}, + { 1, 2, 3, 4, 5, 7, 0, 0}, + { 0, 1, 2, 3, 4, 5, 7, 0}, + { 6, 7, 0, 0, 0, 0, 0, 0}, + { 0, 6, 7, 0, 0, 0, 0, 0}, + { 1, 6, 7, 0, 0, 0, 0, 0}, + { 0, 1, 6, 7, 0, 0, 0, 0}, + { 2, 6, 7, 0, 0, 0, 0, 0}, + { 0, 2, 6, 7, 0, 0, 0, 0}, + { 1, 2, 6, 7, 0, 0, 0, 0}, + { 0, 1, 2, 6, 7, 0, 0, 0}, + { 3, 6, 7, 0, 0, 0, 0, 0}, + { 0, 3, 6, 7, 0, 0, 0, 0}, + { 1, 3, 6, 7, 0, 0, 0, 0}, + { 0, 1, 3, 6, 7, 0, 0, 0}, + { 2, 3, 6, 7, 0, 0, 0, 0}, + { 0, 2, 3, 6, 7, 0, 0, 0}, + { 1, 2, 3, 6, 7, 0, 0, 0}, + { 0, 1, 2, 3, 6, 7, 0, 0}, + { 4, 6, 7, 0, 0, 0, 0, 0}, + { 0, 4, 6, 7, 0, 0, 0, 0}, + { 1, 4, 6, 7, 0, 0, 0, 0}, + { 0, 1, 4, 6, 7, 0, 0, 0}, + { 2, 4, 6, 7, 0, 0, 0, 0}, + { 0, 2, 4, 6, 7, 0, 0, 0}, + { 1, 2, 4, 6, 7, 0, 0, 0}, + { 0, 1, 2, 4, 6, 7, 0, 0}, + { 3, 4, 6, 7, 0, 0, 0, 0}, + { 0, 3, 4, 6, 7, 0, 0, 0}, + { 1, 3, 4, 6, 7, 0, 0, 0}, + { 0, 1, 3, 4, 6, 7, 0, 0}, + { 2, 3, 4, 6, 7, 0, 0, 0}, + { 0, 2, 3, 4, 6, 7, 0, 0}, + { 1, 2, 3, 4, 6, 7, 0, 0}, + { 0, 1, 2, 3, 4, 6, 7, 0}, + { 5, 6, 7, 0, 0, 0, 0, 0}, + { 0, 5, 6, 7, 0, 0, 0, 0}, + { 1, 5, 6, 7, 0, 0, 0, 0}, + { 0, 1, 5, 6, 7, 0, 0, 0}, + { 2, 5, 6, 7, 0, 0, 0, 0}, + { 0, 2, 5, 6, 7, 0, 0, 0}, + { 1, 2, 5, 6, 7, 0, 0, 0}, + { 0, 1, 2, 5, 6, 7, 0, 0}, + { 3, 5, 6, 7, 0, 0, 0, 0}, + { 0, 3, 5, 6, 7, 0, 0, 0}, + { 1, 3, 5, 6, 7, 0, 0, 0}, + { 0, 1, 3, 5, 6, 7, 0, 0}, + { 2, 3, 5, 6, 7, 0, 0, 0}, + { 0, 2, 3, 5, 6, 7, 0, 0}, + { 1, 2, 3, 5, 6, 7, 0, 0}, + { 0, 1, 2, 3, 5, 6, 7, 0}, + { 4, 5, 6, 7, 0, 0, 0, 0}, + { 0, 4, 5, 6, 7, 0, 0, 0}, + { 1, 4, 5, 6, 7, 0, 0, 0}, + { 0, 1, 4, 5, 6, 7, 0, 0}, + { 2, 4, 5, 6, 7, 0, 0, 0}, + { 0, 2, 4, 5, 6, 7, 0, 0}, + { 1, 2, 4, 5, 6, 7, 0, 0}, + { 0, 1, 2, 4, 5, 6, 7, 0}, + { 3, 4, 5, 6, 7, 0, 0, 0}, + { 0, 3, 4, 5, 6, 7, 0, 0}, + { 1, 3, 4, 5, 6, 7, 0, 0}, + { 0, 1, 3, 4, 5, 6, 7, 0}, + { 2, 3, 4, 5, 6, 7, 0, 0}, + { 0, 2, 3, 4, 5, 6, 7, 0}, + { 1, 2, 3, 4, 5, 6, 7, 0}, + { 0, 1, 2, 3, 4, 5, 6, 7} +}; + +unsigned int PQCLEAN_DILITHIUM3_AVX2_rej_uniform_avx(int32_t *restrict r, const uint8_t buf[REJ_UNIFORM_BUFLEN + 8]) { + unsigned int ctr, pos; + uint32_t good; + __m256i d, tmp; + const __m256i bound = _mm256_set1_epi32(Q); + const __m256i mask = _mm256_set1_epi32(0x7FFFFF); + const __m256i idx8 = _mm256_set_epi8(-1, 15, 14, 13, -1, 12, 11, 10, + -1, 9, 8, 7, -1, 6, 5, 4, + -1, 11, 10, 9, -1, 8, 7, 6, + -1, 5, 4, 3, -1, 2, 1, 0); + + ctr = pos = 0; + while (pos <= REJ_UNIFORM_BUFLEN - 24) { + d = _mm256_loadu_si256((__m256i *)&buf[pos]); + d = _mm256_permute4x64_epi64(d, 0x94); + d = _mm256_shuffle_epi8(d, idx8); + d = _mm256_and_si256(d, mask); + pos += 24; + + tmp = _mm256_sub_epi32(d, bound); + good = _mm256_movemask_ps((__m256)tmp); + tmp = _mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i *)&PQCLEAN_DILITHIUM3_AVX2_idxlut[good])); + d = _mm256_permutevar8x32_epi32(d, tmp); + + _mm256_storeu_si256((__m256i *)&r[ctr], d); + ctr += _mm_popcnt_u32(good); + + if (ctr > N - 8) { + break; + } + } + + uint32_t t; + while (ctr < N && pos <= REJ_UNIFORM_BUFLEN - 3) { + t = buf[pos++]; + t |= (uint32_t)buf[pos++] << 8; + t |= (uint32_t)buf[pos++] << 16; + t &= 0x7FFFFF; + + if (t < Q) { + r[ctr++] = t; + } + } + + return ctr; +} + +unsigned int PQCLEAN_DILITHIUM3_AVX2_rej_eta_avx(int32_t *restrict r, const uint8_t buf[REJ_UNIFORM_ETA_BUFLEN]) { + unsigned int ctr, pos; + uint32_t good; + __m256i f0, f1; + __m128i g0, g1; + const __m256i mask = _mm256_set1_epi8(15); + const __m256i eta = _mm256_set1_epi8(4); + const __m256i bound = _mm256_set1_epi8(9); + + ctr = pos = 0; + while (ctr <= N - 8 && pos <= REJ_UNIFORM_ETA_BUFLEN - 16) { + f0 = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i *)&buf[pos])); + f1 = _mm256_slli_epi16(f0, 4); + f0 = _mm256_or_si256(f0, f1); + f0 = _mm256_and_si256(f0, mask); + + f1 = _mm256_sub_epi8(f0, bound); + f0 = _mm256_sub_epi8(eta, f0); + good = _mm256_movemask_epi8(f1); + + g0 = _mm256_castsi256_si128(f0); + g1 = _mm_loadl_epi64((__m128i *)&PQCLEAN_DILITHIUM3_AVX2_idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0, g1); + f1 = _mm256_cvtepi8_epi32(g1); + _mm256_storeu_si256((__m256i *)&r[ctr], f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if (ctr > N - 8) { + break; + } + g0 = _mm_bsrli_si128(g0, 8); + g1 = _mm_loadl_epi64((__m128i *)&PQCLEAN_DILITHIUM3_AVX2_idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0, g1); + f1 = _mm256_cvtepi8_epi32(g1); + _mm256_storeu_si256((__m256i *)&r[ctr], f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if (ctr > N - 8) { + break; + } + g0 = _mm256_extracti128_si256(f0, 1); + g1 = _mm_loadl_epi64((__m128i *)&PQCLEAN_DILITHIUM3_AVX2_idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0, g1); + f1 = _mm256_cvtepi8_epi32(g1); + _mm256_storeu_si256((__m256i *)&r[ctr], f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if (ctr > N - 8) { + break; + } + g0 = _mm_bsrli_si128(g0, 8); + g1 = _mm_loadl_epi64((__m128i *)&PQCLEAN_DILITHIUM3_AVX2_idxlut[good]); + g1 = _mm_shuffle_epi8(g0, g1); + f1 = _mm256_cvtepi8_epi32(g1); + _mm256_storeu_si256((__m256i *)&r[ctr], f1); + ctr += _mm_popcnt_u32(good); + pos += 4; + } + + uint32_t t0, t1; + while (ctr < N && pos < REJ_UNIFORM_ETA_BUFLEN) { + t0 = buf[pos] & 0x0F; + t1 = buf[pos++] >> 4; + + if (t0 < 9) { + r[ctr++] = 4 - t0; + } + if (t1 < 9 && ctr < N) { + r[ctr++] = 4 - t1; + } + } + + return ctr; +} diff --git a/crypto_sign/dilithium/dilithium3/avx2/rejsample.h b/crypto_sign/dilithium/dilithium3/avx2/rejsample.h new file mode 100644 index 00000000..9012ae97 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/rejsample.h @@ -0,0 +1,19 @@ +#ifndef PQCLEAN_DILITHIUM3_AVX2_REJSAMPLE_H +#define PQCLEAN_DILITHIUM3_AVX2_REJSAMPLE_H +#include "params.h" +#include "symmetric.h" +#include + +#define REJ_UNIFORM_NBLOCKS ((768+STREAM128_BLOCKBYTES-1)/STREAM128_BLOCKBYTES) +#define REJ_UNIFORM_BUFLEN (REJ_UNIFORM_NBLOCKS*STREAM128_BLOCKBYTES) + +#define REJ_UNIFORM_ETA_NBLOCKS ((228+STREAM128_BLOCKBYTES-1)/STREAM128_BLOCKBYTES) +#define REJ_UNIFORM_ETA_BUFLEN (REJ_UNIFORM_ETA_NBLOCKS*STREAM128_BLOCKBYTES) + +extern const uint8_t PQCLEAN_DILITHIUM3_AVX2_idxlut[256][8]; + +unsigned int PQCLEAN_DILITHIUM3_AVX2_rej_uniform_avx(int32_t *r, const uint8_t buf[REJ_UNIFORM_BUFLEN + 8]); + +unsigned int PQCLEAN_DILITHIUM3_AVX2_rej_eta_avx(int32_t *r, const uint8_t buf[REJ_UNIFORM_BUFLEN]); + +#endif diff --git a/crypto_sign/dilithium/dilithium3/avx2/rounding.c b/crypto_sign/dilithium/dilithium3/avx2/rounding.c new file mode 100644 index 00000000..1fbe15f5 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/rounding.c @@ -0,0 +1,154 @@ +#include "consts.h" +#include "params.h" +#include "rejsample.h" +#include "rounding.h" +#include +#include +#include + +#define _mm256_blendv_epi32(a,b,mask) \ + _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(a), \ + _mm256_castsi256_ps(b), \ + _mm256_castsi256_ps(mask))) + +/************************************************* +* Name: power2round +* +* Description: For finite field elements a, compute a0, a1 such that +* a mod^+ Q = a1*2^D + a0 with -2^{D-1} < a0 <= 2^{D-1}. +* Assumes a to be positive standard representative. +* +* Arguments: - __m256i *a1: output array of length N/8 with high bits +* - __m256i *a0: output array of length N/8 with low bits a0 +* - const __m256i *a: input array of length N/8 +* +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_power2round_avx(__m256i *a1, __m256i *a0, const __m256i *a) { + unsigned int i; + __m256i f, f0, f1; + const __m256i mask = _mm256_set1_epi32(-(1 << D)); + const __m256i half = _mm256_set1_epi32((1 << (D - 1)) - 1); + + for (i = 0; i < N / 8; ++i) { + f = _mm256_load_si256(&a[i]); + f1 = _mm256_add_epi32(f, half); + f0 = _mm256_and_si256(f1, mask); + f1 = _mm256_srli_epi32(f1, D); + f0 = _mm256_sub_epi32(f, f0); + _mm256_store_si256(&a1[i], f1); + _mm256_store_si256(&a0[i], f0); + } +} + +/************************************************* +* Name: decompose +* +* Description: For finite field element a, compute high and low parts a0, a1 such +* that a mod^+ Q = a1*ALPHA + a0 with -ALPHA/2 < a0 <= ALPHA/2 except +* if a1 = (Q-1)/ALPHA where we set a1 = 0 and +* -ALPHA/2 <= a0 = a mod Q - Q < 0. Assumes a to be positive standard +* representative. +* +* Arguments: - __m256i *a1: output array of length N/8 with high parts +* - __m256i *a0: output array of length N/8 with low parts a0 +* - const __m256i *a: input array of length N/8 +* +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_decompose_avx(__m256i *a1, __m256i *a0, const __m256i *a) { + unsigned int i; + __m256i f, f0, f1; + const __m256i q = _mm256_load_si256(&PQCLEAN_DILITHIUM3_AVX2_qdata.vec[_8XQ / 8]); + const __m256i hq = _mm256_srli_epi32(q, 1); + const __m256i v = _mm256_set1_epi32(1025); + const __m256i alpha = _mm256_set1_epi32(2 * GAMMA2); + const __m256i off = _mm256_set1_epi32(127); + const __m256i shift = _mm256_set1_epi32(512); + const __m256i mask = _mm256_set1_epi32(15); + + for (i = 0; i < N / 8; i++) { + f = _mm256_load_si256(&a[i]); + f1 = _mm256_add_epi32(f, off); + f1 = _mm256_srli_epi32(f1, 7); + f1 = _mm256_mulhi_epu16(f1, v); + f1 = _mm256_mulhrs_epi16(f1, shift); + f1 = _mm256_and_si256(f1, mask); + f0 = _mm256_mullo_epi32(f1, alpha); + f0 = _mm256_sub_epi32(f, f0); + f = _mm256_cmpgt_epi32(f0, hq); + f = _mm256_and_si256(f, q); + f0 = _mm256_sub_epi32(f0, f); + _mm256_store_si256(&a1[i], f1); + _mm256_store_si256(&a0[i], f0); + } +} + + +/************************************************* +* Name: make_hint +* +* Description: Compute indices of polynomial coefficients whose low bits +* overflow into the high bits. +* +* Arguments: - uint8_t *hint: hint array +* - const __m256i *a0: low bits of input elements +* - const __m256i *a1: high bits of input elements +* +* Returns number of overflowing low bits +**************************************************/ +unsigned int PQCLEAN_DILITHIUM3_AVX2_make_hint_avx(uint8_t hint[N], const __m256i *restrict a0, const __m256i *restrict a1) { + unsigned int i, n = 0; + __m256i f0, f1, g0, g1; + uint32_t bad; + uint64_t idx; + const __m256i low = _mm256_set1_epi32(-GAMMA2); + const __m256i high = _mm256_set1_epi32(GAMMA2); + + for (i = 0; i < N / 8; ++i) { + f0 = _mm256_load_si256(&a0[i]); + f1 = _mm256_load_si256(&a1[i]); + g0 = _mm256_abs_epi32(f0); + g0 = _mm256_cmpgt_epi32(g0, high); + g1 = _mm256_cmpeq_epi32(f0, low); + g1 = _mm256_sign_epi32(g1, f1); + g0 = _mm256_or_si256(g0, g1); + + bad = _mm256_movemask_ps((__m256)g0); + memcpy(&idx, PQCLEAN_DILITHIUM3_AVX2_idxlut[bad], 8); + idx += (uint64_t)0x0808080808080808 * i; + memcpy(&hint[n], &idx, 8); + n += _mm_popcnt_u32(bad); + } + + return n; +} + +/************************************************* +* Name: use_hint +* +* Description: Correct high parts according to hint. +* +* Arguments: - __m256i *b: output array of length N/8 with corrected high parts +* - const __m256i *a: input array of length N/8 +* - const __m256i *a: input array of length N/8 with hint bits +* +**************************************************/ +void PQCLEAN_DILITHIUM3_AVX2_use_hint_avx(__m256i *b, const __m256i *a, const __m256i *restrict hint) { + unsigned int i; + __m256i a0[N / 8]; + __m256i f, g, h, t; + const __m256i zero = _mm256_setzero_si256(); + const __m256i mask = _mm256_set1_epi32(15); + + PQCLEAN_DILITHIUM3_AVX2_decompose_avx(b, a0, a); + for (i = 0; i < N / 8; i++) { + f = _mm256_load_si256(&a0[i]); + g = _mm256_load_si256(&b[i]); + h = _mm256_load_si256(&hint[i]); + t = _mm256_blendv_epi32(zero, h, f); + t = _mm256_slli_epi32(t, 1); + h = _mm256_sub_epi32(h, t); + g = _mm256_add_epi32(g, h); + g = _mm256_and_si256(g, mask); + _mm256_store_si256(&b[i], g); + } +} diff --git a/crypto_sign/dilithium/dilithium3/avx2/rounding.h b/crypto_sign/dilithium/dilithium3/avx2/rounding.h new file mode 100644 index 00000000..69a15900 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/rounding.h @@ -0,0 +1,12 @@ +#ifndef PQCLEAN_DILITHIUM3_AVX2_ROUNDING_H +#define PQCLEAN_DILITHIUM3_AVX2_ROUNDING_H +#include "params.h" +#include +#include + +void PQCLEAN_DILITHIUM3_AVX2_power2round_avx(__m256i *a1, __m256i *a0, const __m256i *a); +void PQCLEAN_DILITHIUM3_AVX2_decompose_avx(__m256i *a1, __m256i *a0, const __m256i *a); +unsigned int PQCLEAN_DILITHIUM3_AVX2_make_hint_avx(uint8_t hint[N], const __m256i *a0, const __m256i *a1); +void PQCLEAN_DILITHIUM3_AVX2_use_hint_avx(__m256i *b, const __m256i *a, const __m256i *hint); + +#endif diff --git a/crypto_sign/dilithium/dilithium3/avx2/shuffle.S b/crypto_sign/dilithium/dilithium3/avx2/shuffle.S new file mode 100644 index 00000000..e81f2486 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/shuffle.S @@ -0,0 +1,54 @@ +#include "cdecl.h" +.include "shuffle.inc" + +.text +nttunpack128_avx: +#load +vmovdqa (%rdi),%ymm4 +vmovdqa 32(%rdi),%ymm5 +vmovdqa 64(%rdi),%ymm6 +vmovdqa 96(%rdi),%ymm7 +vmovdqa 128(%rdi),%ymm8 +vmovdqa 160(%rdi),%ymm9 +vmovdqa 192(%rdi),%ymm10 +vmovdqa 224(%rdi),%ymm11 + +shuffle8 4,8,3,8 +shuffle8 5,9,4,9 +shuffle8 6,10,5,10 +shuffle8 7,11,6,11 + +shuffle4 3,5,7,5 +shuffle4 8,10,3,10 +shuffle4 4,6,8,6 +shuffle4 9,11,4,11 + +shuffle2 7,8,9,8 +shuffle2 5,6,7,6 +shuffle2 3,4,5,4 +shuffle2 10,11,3,11 + +#store +vmovdqa %ymm9,(%rdi) +vmovdqa %ymm8,32(%rdi) +vmovdqa %ymm7,64(%rdi) +vmovdqa %ymm6,96(%rdi) +vmovdqa %ymm5,128(%rdi) +vmovdqa %ymm4,160(%rdi) +vmovdqa %ymm3,192(%rdi) +vmovdqa %ymm11,224(%rdi) + +ret + +.global cdecl(PQCLEAN_DILITHIUM3_AVX2_nttunpack_avx) +.global _cdecl(PQCLEAN_DILITHIUM3_AVX2_nttunpack_avx) +cdecl(PQCLEAN_DILITHIUM3_AVX2_nttunpack_avx): +_cdecl(PQCLEAN_DILITHIUM3_AVX2_nttunpack_avx): +call nttunpack128_avx +add $256,%rdi +call nttunpack128_avx +add $256,%rdi +call nttunpack128_avx +add $256,%rdi +call nttunpack128_avx +ret diff --git a/crypto_sign/dilithium/dilithium3/avx2/shuffle.inc b/crypto_sign/dilithium/dilithium3/avx2/shuffle.inc new file mode 100644 index 00000000..73e9ffe0 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/shuffle.inc @@ -0,0 +1,25 @@ +.macro shuffle8 r0,r1,r2,r3 +vperm2i128 $0x20,%ymm\r1,%ymm\r0,%ymm\r2 +vperm2i128 $0x31,%ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle4 r0,r1,r2,r3 +vpunpcklqdq %ymm\r1,%ymm\r0,%ymm\r2 +vpunpckhqdq %ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle2 r0,r1,r2,r3 +#vpsllq $32,%ymm\r1,%ymm\r2 +vmovsldup %ymm\r1,%ymm\r2 +vpblendd $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 +vpsrlq $32,%ymm\r0,%ymm\r0 +#vmovshdup %ymm\r0,%ymm\r0 +vpblendd $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle1 r0,r1,r2,r3 +vpslld $16,%ymm\r1,%ymm\r2 +vpblendw $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 +vpsrld $16,%ymm\r0,%ymm\r0 +vpblendw $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 +.endm diff --git a/crypto_sign/dilithium/dilithium3/avx2/sign.c b/crypto_sign/dilithium/dilithium3/avx2/sign.c new file mode 100644 index 00000000..f746e8f8 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/sign.c @@ -0,0 +1,425 @@ +#include "align.h" +#include "fips202.h" +#include "packing.h" +#include "params.h" +#include "poly.h" +#include "polyvec.h" +#include "randombytes.h" +#include "sign.h" +#include "symmetric.h" +#include +#include + +static inline void polyvec_matrix_expand_row(polyvecl **row, polyvecl buf[2], const uint8_t rho[SEEDBYTES], unsigned int i) { + switch (i) { + case 0: + PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand_row0(buf, buf + 1, rho); + *row = buf; + break; + case 1: + PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand_row1(buf + 1, buf, rho); + *row = buf + 1; + break; + case 2: + PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand_row2(buf, buf + 1, rho); + *row = buf; + break; + case 3: + PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand_row3(buf + 1, buf, rho); + *row = buf + 1; + break; + case 4: + PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand_row4(buf, buf + 1, rho); + *row = buf; + break; + case 5: + PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand_row5(buf + 1, buf, rho); + *row = buf + 1; + break; + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_crypto_sign_keypair +* +* Description: Generates public and private key. +* +* Arguments: - uint8_t *pk: pointer to output public key (allocated +* array of PQCLEAN_DILITHIUM3_AVX2_CRYPTO_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key (allocated +* array of PQCLEAN_DILITHIUM3_AVX2_CRYPTO_SECRETKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int PQCLEAN_DILITHIUM3_AVX2_crypto_sign_keypair(uint8_t *pk, uint8_t *sk) { + unsigned int i; + uint8_t seedbuf[3 * SEEDBYTES]; + const uint8_t *rho, *rhoprime, *key; + polyvecl rowbuf[2]; + polyvecl s1, *row = rowbuf; + polyveck s2; + poly t1, t0; + + /* Get randomness for rho, rhoprime and key */ + randombytes(seedbuf, SEEDBYTES); + shake256(seedbuf, 3 * SEEDBYTES, seedbuf, SEEDBYTES); + rho = seedbuf; + rhoprime = seedbuf + SEEDBYTES; + key = seedbuf + 2 * SEEDBYTES; + + /* Store rho, key */ + memcpy(pk, rho, SEEDBYTES); + memcpy(sk, rho, SEEDBYTES); + memcpy(sk + SEEDBYTES, key, SEEDBYTES); + + /* Sample short vectors s1 and s2 */ + PQCLEAN_DILITHIUM3_AVX2_poly_uniform_eta_4x(&s1.vec[0], &s1.vec[1], &s1.vec[2], &s1.vec[3], rhoprime, 0, 1, 2, 3); + PQCLEAN_DILITHIUM3_AVX2_poly_uniform_eta_4x(&s1.vec[4], &s2.vec[0], &s2.vec[1], &s2.vec[2], rhoprime, 4, 5, 6, 7); + PQCLEAN_DILITHIUM3_AVX2_poly_uniform_eta_4x(&s2.vec[3], &s2.vec[4], &s2.vec[5], &t0, rhoprime, 8, 9, 10, 11); + + /* Pack secret vectors */ + for (i = 0; i < L; i++) { + PQCLEAN_DILITHIUM3_AVX2_polyeta_pack(sk + 2 * SEEDBYTES + CRHBYTES + i * POLYETA_PACKEDBYTES, &s1.vec[i]); + } + for (i = 0; i < K; i++) { + PQCLEAN_DILITHIUM3_AVX2_polyeta_pack(sk + 2 * SEEDBYTES + CRHBYTES + (L + i)*POLYETA_PACKEDBYTES, &s2.vec[i]); + } + + /* Transform s1 */ + PQCLEAN_DILITHIUM3_AVX2_polyvecl_ntt(&s1); + + + for (i = 0; i < K; i++) { + /* Expand matrix row */ + polyvec_matrix_expand_row(&row, rowbuf, rho, i); + + /* Compute inner-product */ + PQCLEAN_DILITHIUM3_AVX2_polyvecl_pointwise_acc_montgomery(&t1, row, &s1); + PQCLEAN_DILITHIUM3_AVX2_poly_invntt_tomont(&t1); + + /* Add error polynomial */ + PQCLEAN_DILITHIUM3_AVX2_poly_add(&t1, &t1, &s2.vec[i]); + + /* Round t and pack t1, t0 */ + PQCLEAN_DILITHIUM3_AVX2_poly_caddq(&t1); + PQCLEAN_DILITHIUM3_AVX2_poly_power2round(&t1, &t0, &t1); + PQCLEAN_DILITHIUM3_AVX2_polyt1_pack(pk + SEEDBYTES + i * POLYT1_PACKEDBYTES, &t1); + PQCLEAN_DILITHIUM3_AVX2_polyt0_pack(sk + 2 * SEEDBYTES + CRHBYTES + (L + K)*POLYETA_PACKEDBYTES + i * POLYT0_PACKEDBYTES, &t0); + } + + /* Compute CRH(rho, t1) and store in secret key */ + crh(sk + 2 * SEEDBYTES, pk, PQCLEAN_DILITHIUM3_AVX2_CRYPTO_PUBLICKEYBYTES); + + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_crypto_sign_signature +* +* Description: Computes signature. +* +* Arguments: - uint8_t *sig: pointer to output signature (of length PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES) +* - size_t *siglen: pointer to output length of signature +* - uint8_t *m: pointer to message to be signed +* - size_t mlen: length of message +* - uint8_t *sk: pointer to bit-packed secret key +* +* Returns 0 (success) +**************************************************/ +int PQCLEAN_DILITHIUM3_AVX2_crypto_sign_signature(uint8_t *sig, size_t *siglen, const uint8_t *m, size_t mlen, const uint8_t *sk) { + unsigned int i, n, pos; + uint8_t seedbuf[2 * SEEDBYTES + 3 * CRHBYTES]; + uint8_t *rho, *tr, *key, *mu, *rhoprime; + uint8_t hintbuf[N]; + uint8_t *hint = sig + SEEDBYTES + L * POLYZ_PACKEDBYTES; + uint64_t nonce = 0; + polyvecl mat[K], s1, z; + polyveck t0, s2, w1; + poly c, tmp; + union { + polyvecl y; + polyveck w0; + } tmpv; + shake256incctx state; + + rho = seedbuf; + tr = rho + SEEDBYTES; + key = tr + CRHBYTES; + mu = key + SEEDBYTES; + rhoprime = mu + CRHBYTES; + PQCLEAN_DILITHIUM3_AVX2_unpack_sk(rho, tr, key, &t0, &s1, &s2, sk); + + /* Compute CRH(tr, msg) */ + shake256_inc_init(&state); + shake256_inc_absorb(&state, tr, CRHBYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(mu, CRHBYTES, &state); + shake256_inc_ctx_release(&state); + + crh(rhoprime, key, SEEDBYTES + CRHBYTES); + + /* Expand matrix and transform vectors */ + PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_expand(mat, rho); + PQCLEAN_DILITHIUM3_AVX2_polyvecl_ntt(&s1); + PQCLEAN_DILITHIUM3_AVX2_polyveck_ntt(&s2); + PQCLEAN_DILITHIUM3_AVX2_polyveck_ntt(&t0); + + +rej: + /* Sample intermediate vector y */ + PQCLEAN_DILITHIUM3_AVX2_poly_uniform_gamma1_4x(&z.vec[0], &z.vec[1], &z.vec[2], &z.vec[3], + rhoprime, nonce, nonce + 1, nonce + 2, nonce + 3); + PQCLEAN_DILITHIUM3_AVX2_poly_uniform_gamma1(&z.vec[4], rhoprime, nonce + 4); + nonce += 5; + + /* Matrix-vector product */ + tmpv.y = z; + PQCLEAN_DILITHIUM3_AVX2_polyvecl_ntt(&tmpv.y); + PQCLEAN_DILITHIUM3_AVX2_polyvec_matrix_pointwise_montgomery(&w1, mat, &tmpv.y); + PQCLEAN_DILITHIUM3_AVX2_polyveck_invntt_tomont(&w1); + + /* Decompose w and call the random oracle */ + PQCLEAN_DILITHIUM3_AVX2_polyveck_caddq(&w1); + PQCLEAN_DILITHIUM3_AVX2_polyveck_decompose(&w1, &tmpv.w0, &w1); + PQCLEAN_DILITHIUM3_AVX2_polyveck_pack_w1(sig, &w1); + + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, sig, K * POLYW1_PACKEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(sig, SEEDBYTES, &state); + shake256_inc_ctx_release(&state); + PQCLEAN_DILITHIUM3_AVX2_poly_challenge(&c, sig); + PQCLEAN_DILITHIUM3_AVX2_poly_ntt(&c); + + /* Compute z, reject if it reveals secret */ + for (i = 0; i < L; i++) { + PQCLEAN_DILITHIUM3_AVX2_poly_pointwise_montgomery(&tmp, &c, &s1.vec[i]); + PQCLEAN_DILITHIUM3_AVX2_poly_invntt_tomont(&tmp); + PQCLEAN_DILITHIUM3_AVX2_poly_add(&z.vec[i], &z.vec[i], &tmp); + PQCLEAN_DILITHIUM3_AVX2_poly_reduce(&z.vec[i]); + if (PQCLEAN_DILITHIUM3_AVX2_poly_chknorm(&z.vec[i], GAMMA1 - BETA)) { + goto rej; + } + } + + /* Zero hint vector in signature */ + pos = 0; + memset(hint, 0, OMEGA); + + for (i = 0; i < K; i++) { + /* Check that subtracting cs2 does not change high bits of w and low bits + * do not reveal secret information */ + PQCLEAN_DILITHIUM3_AVX2_poly_pointwise_montgomery(&tmp, &c, &s2.vec[i]); + PQCLEAN_DILITHIUM3_AVX2_poly_invntt_tomont(&tmp); + PQCLEAN_DILITHIUM3_AVX2_poly_sub(&tmpv.w0.vec[i], &tmpv.w0.vec[i], &tmp); + PQCLEAN_DILITHIUM3_AVX2_poly_reduce(&tmpv.w0.vec[i]); + if (PQCLEAN_DILITHIUM3_AVX2_poly_chknorm(&tmpv.w0.vec[i], GAMMA2 - BETA)) { + goto rej; + } + + /* Compute hints */ + PQCLEAN_DILITHIUM3_AVX2_poly_pointwise_montgomery(&tmp, &c, &t0.vec[i]); + PQCLEAN_DILITHIUM3_AVX2_poly_invntt_tomont(&tmp); + PQCLEAN_DILITHIUM3_AVX2_poly_reduce(&tmp); + if (PQCLEAN_DILITHIUM3_AVX2_poly_chknorm(&tmp, GAMMA2)) { + goto rej; + } + + PQCLEAN_DILITHIUM3_AVX2_poly_add(&tmpv.w0.vec[i], &tmpv.w0.vec[i], &tmp); + n = PQCLEAN_DILITHIUM3_AVX2_poly_make_hint(hintbuf, &tmpv.w0.vec[i], &w1.vec[i]); + if (pos + n > OMEGA) { + goto rej; + } + + /* Store hints in signature */ + memcpy(&hint[pos], hintbuf, n); + hint[OMEGA + i] = pos = pos + n; + } + + /* Pack z into signature */ + for (i = 0; i < L; i++) { + PQCLEAN_DILITHIUM3_AVX2_polyz_pack(sig + SEEDBYTES + i * POLYZ_PACKEDBYTES, &z.vec[i]); + } + + *siglen = PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES; + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_crypto_sign +* +* Description: Compute signed message. +* +* Arguments: - uint8_t *sm: pointer to output signed message (allocated +* array with PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES + mlen bytes), +* can be equal to m +* - size_t *smlen: pointer to output length of signed +* message +* - const uint8_t *m: pointer to message to be signed +* - size_t mlen: length of message +* - const uint8_t *sk: pointer to bit-packed secret key +* +* Returns 0 (success) +**************************************************/ +int PQCLEAN_DILITHIUM3_AVX2_crypto_sign(uint8_t *sm, size_t *smlen, const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t i; + + for (i = 0; i < mlen; ++i) { + sm[PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES + mlen - 1 - i] = m[mlen - 1 - i]; + } + PQCLEAN_DILITHIUM3_AVX2_crypto_sign_signature(sm, smlen, sm + PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES, mlen, sk); + *smlen += mlen; + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_crypto_sign_verify +* +* Description: Verifies signature. +* +* Arguments: - uint8_t *m: pointer to input signature +* - size_t siglen: length of signature +* - const uint8_t *m: pointer to message +* - size_t mlen: length of message +* - const uint8_t *pk: pointer to bit-packed public key +* +* Returns 0 if signature could be verified correctly and -1 otherwise +**************************************************/ +int PQCLEAN_DILITHIUM3_AVX2_crypto_sign_verify(const uint8_t *sig, size_t siglen, const uint8_t *m, size_t mlen, const uint8_t *pk) { + unsigned int i, j, pos = 0; + /* PQCLEAN_DILITHIUM3_AVX2_polyw1_pack writes additional 14 bytes */ + ALIGNED_UINT8(K * POLYW1_PACKEDBYTES + 14) buf; + uint8_t mu[CRHBYTES]; + const uint8_t *hint = sig + SEEDBYTES + L * POLYZ_PACKEDBYTES; + polyvecl rowbuf[2]; + polyvecl *row = rowbuf; + polyvecl z; + poly c, w1, h; + shake256incctx state; + + if (siglen != PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES) { + return -1; + } + + /* Compute CRH(CRH(rho, t1), msg) */ + crh(mu, pk, PQCLEAN_DILITHIUM3_AVX2_CRYPTO_PUBLICKEYBYTES); + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(mu, CRHBYTES, &state); + shake256_inc_ctx_release(&state); + + /* Expand PQCLEAN_DILITHIUM3_AVX2_challenge */ + PQCLEAN_DILITHIUM3_AVX2_poly_challenge(&c, sig); + PQCLEAN_DILITHIUM3_AVX2_poly_ntt(&c); + + /* Unpack z; shortness follows from unpacking */ + for (i = 0; i < L; i++) { + PQCLEAN_DILITHIUM3_AVX2_polyz_unpack(&z.vec[i], sig + SEEDBYTES + i * POLYZ_PACKEDBYTES); + PQCLEAN_DILITHIUM3_AVX2_poly_ntt(&z.vec[i]); + } + + + for (i = 0; i < K; i++) { + /* Expand matrix row */ + polyvec_matrix_expand_row(&row, rowbuf, pk, i); + + /* Compute i-th row of Az - c2^Dt1 */ + PQCLEAN_DILITHIUM3_AVX2_polyvecl_pointwise_acc_montgomery(&w1, row, &z); + + PQCLEAN_DILITHIUM3_AVX2_polyt1_unpack(&h, pk + SEEDBYTES + i * POLYT1_PACKEDBYTES); + PQCLEAN_DILITHIUM3_AVX2_poly_shiftl(&h); + PQCLEAN_DILITHIUM3_AVX2_poly_ntt(&h); + PQCLEAN_DILITHIUM3_AVX2_poly_pointwise_montgomery(&h, &c, &h); + + PQCLEAN_DILITHIUM3_AVX2_poly_sub(&w1, &w1, &h); + PQCLEAN_DILITHIUM3_AVX2_poly_reduce(&w1); + PQCLEAN_DILITHIUM3_AVX2_poly_invntt_tomont(&w1); + + /* Get hint polynomial and reconstruct w1 */ + memset(h.vec, 0, sizeof(poly)); + if (hint[OMEGA + i] < pos || hint[OMEGA + i] > OMEGA) { + return -1; + } + + for (j = pos; j < hint[OMEGA + i]; ++j) { + /* Coefficients are ordered for strong unforgeability */ + if (j > pos && hint[j] <= hint[j - 1]) { + return -1; + } + h.coeffs[hint[j]] = 1; + } + pos = hint[OMEGA + i]; + + PQCLEAN_DILITHIUM3_AVX2_poly_caddq(&w1); + PQCLEAN_DILITHIUM3_AVX2_poly_use_hint(&w1, &w1, &h); + PQCLEAN_DILITHIUM3_AVX2_polyw1_pack(buf.coeffs + i * POLYW1_PACKEDBYTES, &w1); + } + + /* Extra indices are zero for strong unforgeability */ + for (j = pos; j < OMEGA; ++j) { + if (hint[j]) { + return -1; + } + } + + /* Call random oracle and verify PQCLEAN_DILITHIUM3_AVX2_challenge */ + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, buf.coeffs, K * POLYW1_PACKEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf.coeffs, SEEDBYTES, &state); + shake256_inc_ctx_release(&state); + for (i = 0; i < SEEDBYTES; ++i) { + if (buf.coeffs[i] != sig[i]) { + return -1; + } + } + + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_AVX2_crypto_sign_open +* +* Description: Verify signed message. +* +* Arguments: - uint8_t *m: pointer to output message (allocated +* array with smlen bytes), can be equal to sm +* - size_t *mlen: pointer to output length of message +* - const uint8_t *sm: pointer to signed message +* - size_t smlen: length of signed message +* - const uint8_t *pk: pointer to bit-packed public key +* +* Returns 0 if signed message could be verified correctly and -1 otherwise +**************************************************/ +int PQCLEAN_DILITHIUM3_AVX2_crypto_sign_open(uint8_t *m, size_t *mlen, const uint8_t *sm, size_t smlen, const uint8_t *pk) { + size_t i; + + if (smlen < PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES) { + goto badsig; + } + + *mlen = smlen - PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES; + if (PQCLEAN_DILITHIUM3_AVX2_crypto_sign_verify(sm, PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES, sm + PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES, *mlen, pk)) { + goto badsig; + } else { + /* All good, copy msg, return 0 */ + for (i = 0; i < *mlen; ++i) { + m[i] = sm[PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES + i]; + } + return 0; + } + +badsig: + /* Signature verification failed */ + *mlen = -1; + for (i = 0; i < smlen; ++i) { + m[i] = 0; + } + + return -1; +} diff --git a/crypto_sign/dilithium/dilithium3/avx2/sign.h b/crypto_sign/dilithium/dilithium3/avx2/sign.h new file mode 100644 index 00000000..d42631b3 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/sign.h @@ -0,0 +1,29 @@ +#ifndef PQCLEAN_DILITHIUM3_AVX2_SIGN_H +#define PQCLEAN_DILITHIUM3_AVX2_SIGN_H +#include "params.h" +#include "poly.h" +#include "polyvec.h" +#include +#include + +void PQCLEAN_DILITHIUM3_AVX2_challenge(poly *c, const uint8_t seed[SEEDBYTES]); + +int PQCLEAN_DILITHIUM3_AVX2_crypto_sign_keypair(uint8_t *pk, uint8_t *sk); + +int PQCLEAN_DILITHIUM3_AVX2_crypto_sign_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int PQCLEAN_DILITHIUM3_AVX2_crypto_sign(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int PQCLEAN_DILITHIUM3_AVX2_crypto_sign_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +int PQCLEAN_DILITHIUM3_AVX2_crypto_sign_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + +#endif diff --git a/crypto_sign/dilithium/dilithium3/avx2/symmetric-shake.c b/crypto_sign/dilithium/dilithium3/avx2/symmetric-shake.c new file mode 100644 index 00000000..1baa0e8f --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/symmetric-shake.c @@ -0,0 +1,26 @@ +#include "fips202.h" +#include "params.h" +#include "symmetric.h" +#include + +void PQCLEAN_DILITHIUM3_AVX2_dilithium_shake128_stream_init(shake128incctx *state, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + uint8_t t[2]; + t[0] = (uint8_t) nonce; + t[1] = (uint8_t) (nonce >> 8); + + shake128_inc_init(state); + shake128_inc_absorb(state, seed, SEEDBYTES); + shake128_inc_absorb(state, t, 2); + shake128_inc_finalize(state); +} + +void PQCLEAN_DILITHIUM3_AVX2_dilithium_shake256_stream_init(shake256incctx *state, const uint8_t seed[CRHBYTES], uint16_t nonce) { + uint8_t t[2]; + t[0] = (uint8_t) nonce; + t[1] = (uint8_t) (nonce >> 8); + + shake256_inc_init(state); + shake256_inc_absorb(state, seed, CRHBYTES); + shake256_inc_absorb(state, t, 2); + shake256_inc_finalize(state); +} diff --git a/crypto_sign/dilithium/dilithium3/avx2/symmetric.h b/crypto_sign/dilithium/dilithium3/avx2/symmetric.h new file mode 100644 index 00000000..3d97a2b3 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/avx2/symmetric.h @@ -0,0 +1,36 @@ +#ifndef PQCLEAN_DILITHIUM3_AVX2_SYMMETRIC_H +#define PQCLEAN_DILITHIUM3_AVX2_SYMMETRIC_H +#include "fips202.h" +#include "params.h" +#include + + + +typedef shake128incctx stream128_state; +typedef shake256incctx stream256_state; + +void PQCLEAN_DILITHIUM3_AVX2_dilithium_shake128_stream_init(shake128incctx *state, + const uint8_t seed[SEEDBYTES], + uint16_t nonce); + +void PQCLEAN_DILITHIUM3_AVX2_dilithium_shake256_stream_init(shake256incctx *state, + const uint8_t seed[CRHBYTES], + uint16_t nonce); + +#define STREAM128_BLOCKBYTES SHAKE128_RATE +#define STREAM256_BLOCKBYTES SHAKE256_RATE + +#define crh(OUT, IN, INBYTES) shake256(OUT, CRHBYTES, IN, INBYTES) +#define stream128_init(STATE, SEED, NONCE) \ + PQCLEAN_DILITHIUM3_AVX2_dilithium_shake128_stream_init(STATE, SEED, NONCE) +#define stream128_squeezeblocks(OUT, OUTBLOCKS, STATE) \ + shake128_inc_squeeze(OUT, (OUTBLOCKS)*(SHAKE128_RATE), STATE) +#define stream128_release(STATE) shake128_inc_ctx_release(STATE) +#define stream256_init(STATE, SEED, NONCE) \ + PQCLEAN_DILITHIUM3_AVX2_dilithium_shake256_stream_init(STATE, SEED, NONCE) +#define stream256_squeezeblocks(OUT, OUTBLOCKS, STATE) \ + shake256_inc_squeeze(OUT, (OUTBLOCKS)*(SHAKE256_RATE), STATE) +#define stream256_release(STATE) shake256_inc_ctx_release(STATE) + + +#endif diff --git a/crypto_sign/dilithium/dilithium3/clean/LICENSE b/crypto_sign/dilithium/dilithium3/clean/LICENSE new file mode 100644 index 00000000..08473af7 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/clean/LICENSE @@ -0,0 +1,5 @@ +Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/) + +For Keccak and AES we are using public-domain +code from sources and by authors listed in +comments on top of the respective files. diff --git a/crypto_sign/dilithium/dilithium3/clean/Makefile.Microsoft_nmake b/crypto_sign/dilithium/dilithium3/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..79d38690 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/clean/Makefile.Microsoft_nmake @@ -0,0 +1,23 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libdilithium3_clean.lib +OBJECTS=ntt.obj packing.obj poly.obj polyvec.obj reduce.obj rounding.obj sign.obj symmetric-shake.obj + +# Warning C4146 is raised when a unary minus operator is applied to an +# unsigned type; this has nonetheless been standard and portable for as +# long as there has been a C standard, and we need it for constant-time +# computations. Thus, we disable that spurious warning. +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX /wd4146 + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/dilithium/dilithium3/clean/api.h b/crypto_sign/dilithium/dilithium3/clean/api.h new file mode 100644 index 00000000..1799052b --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/clean/api.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_DILITHIUM3_CLEAN_API_H +#define PQCLEAN_DILITHIUM3_CLEAN_API_H + +#include +#include + +#define PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_PUBLICKEYBYTES 1952 +#define PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_SECRETKEYBYTES 4016 +#define PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_BYTES 3293 + +#define PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_ALGNAME "Dilithium3" + + +int PQCLEAN_DILITHIUM3_CLEAN_crypto_sign_keypair(uint8_t *pk, uint8_t *sk); + +int PQCLEAN_DILITHIUM3_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +int PQCLEAN_DILITHIUM3_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +int PQCLEAN_DILITHIUM3_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +int PQCLEAN_DILITHIUM3_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/dilithium/dilithium3/clean/ntt.c b/crypto_sign/dilithium/dilithium3/clean/ntt.c new file mode 100644 index 00000000..75c4ecbf --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/clean/ntt.c @@ -0,0 +1,98 @@ +#include "ntt.h" +#include "params.h" +#include "reduce.h" +#include + +static const int32_t zetas[N] = { + 0, 25847, -2608894, -518909, 237124, -777960, -876248, 466468, + 1826347, 2353451, -359251, -2091905, 3119733, -2884855, 3111497, 2680103, + 2725464, 1024112, -1079900, 3585928, -549488, -1119584, 2619752, -2108549, + -2118186, -3859737, -1399561, -3277672, 1757237, -19422, 4010497, 280005, + 2706023, 95776, 3077325, 3530437, -1661693, -3592148, -2537516, 3915439, + -3861115, -3043716, 3574422, -2867647, 3539968, -300467, 2348700, -539299, + -1699267, -1643818, 3505694, -3821735, 3507263, -2140649, -1600420, 3699596, + 811944, 531354, 954230, 3881043, 3900724, -2556880, 2071892, -2797779, + -3930395, -1528703, -3677745, -3041255, -1452451, 3475950, 2176455, -1585221, + -1257611, 1939314, -4083598, -1000202, -3190144, -3157330, -3632928, 126922, + 3412210, -983419, 2147896, 2715295, -2967645, -3693493, -411027, -2477047, + -671102, -1228525, -22981, -1308169, -381987, 1349076, 1852771, -1430430, + -3343383, 264944, 508951, 3097992, 44288, -1100098, 904516, 3958618, + -3724342, -8578, 1653064, -3249728, 2389356, -210977, 759969, -1316856, + 189548, -3553272, 3159746, -1851402, -2409325, -177440, 1315589, 1341330, + 1285669, -1584928, -812732, -1439742, -3019102, -3881060, -3628969, 3839961, + 2091667, 3407706, 2316500, 3817976, -3342478, 2244091, -2446433, -3562462, + 266997, 2434439, -1235728, 3513181, -3520352, -3759364, -1197226, -3193378, + 900702, 1859098, 909542, 819034, 495491, -1613174, -43260, -522500, + -655327, -3122442, 2031748, 3207046, -3556995, -525098, -768622, -3595838, + 342297, 286988, -2437823, 4108315, 3437287, -3342277, 1735879, 203044, + 2842341, 2691481, -2590150, 1265009, 4055324, 1247620, 2486353, 1595974, + -3767016, 1250494, 2635921, -3548272, -2994039, 1869119, 1903435, -1050970, + -1333058, 1237275, -3318210, -1430225, -451100, 1312455, 3306115, -1962642, + -1279661, 1917081, -2546312, -1374803, 1500165, 777191, 2235880, 3406031, + -542412, -2831860, -1671176, -1846953, -2584293, -3724270, 594136, -3776993, + -2013608, 2432395, 2454455, -164721, 1957272, 3369112, 185531, -1207385, + -3183426, 162844, 1616392, 3014001, 810149, 1652634, -3694233, -1799107, + -3038916, 3523897, 3866901, 269760, 2213111, -975884, 1717735, 472078, + -426683, 1723600, -1803090, 1910376, -1667432, -1104333, -260646, -3833893, + -2939036, -2235985, -420899, -2286327, 183443, -976891, 1612842, -3545687, + -554416, 3919660, -48306, -1362209, 3937738, 1400424, -846154, 1976782 +}; + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_ntt +* +* Description: Forward NTT, in-place. No modular reduction is performed after +* additions or subtractions. Output vector is in bitreversed order. +* +* Arguments: - uint32_t p[N]: input/output coefficient array +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_ntt(int32_t a[N]) { + unsigned int len, start, j, k; + int32_t zeta, t; + + k = 0; + for (len = 128; len > 0; len >>= 1) { + for (start = 0; start < N; start = j + len) { + zeta = zetas[++k]; + for (j = start; j < start + len; ++j) { + t = PQCLEAN_DILITHIUM3_CLEAN_montgomery_reduce((int64_t)zeta * a[j + len]); + a[j + len] = a[j] - t; + a[j] = a[j] + t; + } + } + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_invntt_tomont +* +* Description: Inverse NTT and multiplication by Montgomery factor 2^32. +* In-place. No modular reductions after additions or +* subtractions; input coefficients need to be smaller than +* Q in absolute value. Output coefficient are smaller than Q in +* absolute value. +* +* Arguments: - uint32_t p[N]: input/output coefficient array +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_invntt_tomont(int32_t a[N]) { + unsigned int start, len, j, k; + int32_t t, zeta; + const int32_t f = 41978; // mont^2/256 + + k = 256; + for (len = 1; len < N; len <<= 1) { + for (start = 0; start < N; start = j + len) { + zeta = -zetas[--k]; + for (j = start; j < start + len; ++j) { + t = a[j]; + a[j] = t + a[j + len]; + a[j + len] = t - a[j + len]; + a[j + len] = PQCLEAN_DILITHIUM3_CLEAN_montgomery_reduce((int64_t)zeta * a[j + len]); + } + } + } + + for (j = 0; j < N; ++j) { + a[j] = PQCLEAN_DILITHIUM3_CLEAN_montgomery_reduce((int64_t)f * a[j]); + } +} diff --git a/crypto_sign/dilithium/dilithium3/clean/ntt.h b/crypto_sign/dilithium/dilithium3/clean/ntt.h new file mode 100644 index 00000000..ac07568d --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/clean/ntt.h @@ -0,0 +1,10 @@ +#ifndef PQCLEAN_DILITHIUM3_CLEAN_NTT_H +#define PQCLEAN_DILITHIUM3_CLEAN_NTT_H +#include "params.h" +#include + +void PQCLEAN_DILITHIUM3_CLEAN_ntt(int32_t a[N]); + +void PQCLEAN_DILITHIUM3_CLEAN_invntt_tomont(int32_t a[N]); + +#endif diff --git a/crypto_sign/dilithium/dilithium3/clean/packing.c b/crypto_sign/dilithium/dilithium3/clean/packing.c new file mode 100644 index 00000000..ed75c70d --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/clean/packing.c @@ -0,0 +1,261 @@ +#include "packing.h" +#include "params.h" +#include "poly.h" +#include "polyvec.h" + + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_pack_pk +* +* Description: Bit-pack public key pk = (rho, t1). +* +* Arguments: - uint8_t pk[]: output byte array +* - const uint8_t rho[]: byte array containing rho +* - const polyveck *t1: pointer to vector t1 +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_pack_pk(uint8_t pk[PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_PUBLICKEYBYTES], + const uint8_t rho[SEEDBYTES], + const polyveck *t1) { + unsigned int i; + + for (i = 0; i < SEEDBYTES; ++i) { + pk[i] = rho[i]; + } + pk += SEEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_polyt1_pack(pk + i * POLYT1_PACKEDBYTES, &t1->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_unpack_pk +* +* Description: Unpack public key pk = (rho, t1). +* +* Arguments: - const uint8_t rho[]: output byte array for rho +* - const polyveck *t1: pointer to output vector t1 +* - uint8_t pk[]: byte array containing bit-packed pk +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_unpack_pk(uint8_t rho[SEEDBYTES], + polyveck *t1, + const uint8_t pk[PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_PUBLICKEYBYTES]) { + unsigned int i; + + for (i = 0; i < SEEDBYTES; ++i) { + rho[i] = pk[i]; + } + pk += SEEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_polyt1_unpack(&t1->vec[i], pk + i * POLYT1_PACKEDBYTES); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_pack_sk +* +* Description: Bit-pack secret key sk = (rho, tr, key, t0, s1, s2). +* +* Arguments: - uint8_t sk[]: output byte array +* - const uint8_t rho[]: byte array containing rho +* - const uint8_t tr[]: byte array containing tr +* - const uint8_t key[]: byte array containing key +* - const polyveck *t0: pointer to vector t0 +* - const polyvecl *s1: pointer to vector s1 +* - const polyveck *s2: pointer to vector s2 +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_pack_sk(uint8_t sk[PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_SECRETKEYBYTES], + const uint8_t rho[SEEDBYTES], + const uint8_t tr[CRHBYTES], + const uint8_t key[SEEDBYTES], + const polyveck *t0, + const polyvecl *s1, + const polyveck *s2) { + unsigned int i; + + for (i = 0; i < SEEDBYTES; ++i) { + sk[i] = rho[i]; + } + sk += SEEDBYTES; + + for (i = 0; i < SEEDBYTES; ++i) { + sk[i] = key[i]; + } + sk += SEEDBYTES; + + for (i = 0; i < CRHBYTES; ++i) { + sk[i] = tr[i]; + } + sk += CRHBYTES; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_polyeta_pack(sk + i * POLYETA_PACKEDBYTES, &s1->vec[i]); + } + sk += L * POLYETA_PACKEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_polyeta_pack(sk + i * POLYETA_PACKEDBYTES, &s2->vec[i]); + } + sk += K * POLYETA_PACKEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_polyt0_pack(sk + i * POLYT0_PACKEDBYTES, &t0->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_unpack_sk +* +* Description: Unpack secret key sk = (rho, tr, key, t0, s1, s2). +* +* Arguments: - const uint8_t rho[]: output byte array for rho +* - const uint8_t tr[]: output byte array for tr +* - const uint8_t key[]: output byte array for key +* - const polyveck *t0: pointer to output vector t0 +* - const polyvecl *s1: pointer to output vector s1 +* - const polyveck *s2: pointer to output vector s2 +* - uint8_t sk[]: byte array containing bit-packed sk +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_unpack_sk(uint8_t rho[SEEDBYTES], + uint8_t tr[CRHBYTES], + uint8_t key[SEEDBYTES], + polyveck *t0, + polyvecl *s1, + polyveck *s2, + const uint8_t sk[PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_SECRETKEYBYTES]) { + unsigned int i; + + for (i = 0; i < SEEDBYTES; ++i) { + rho[i] = sk[i]; + } + sk += SEEDBYTES; + + for (i = 0; i < SEEDBYTES; ++i) { + key[i] = sk[i]; + } + sk += SEEDBYTES; + + for (i = 0; i < CRHBYTES; ++i) { + tr[i] = sk[i]; + } + sk += CRHBYTES; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_polyeta_unpack(&s1->vec[i], sk + i * POLYETA_PACKEDBYTES); + } + sk += L * POLYETA_PACKEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_polyeta_unpack(&s2->vec[i], sk + i * POLYETA_PACKEDBYTES); + } + sk += K * POLYETA_PACKEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_polyt0_unpack(&t0->vec[i], sk + i * POLYT0_PACKEDBYTES); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_pack_sig +* +* Description: Bit-pack signature sig = (c, z, h). +* +* Arguments: - uint8_t sig[]: output byte array +* - const uint8_t *c: pointer to PQCLEAN_DILITHIUM3_CLEAN_challenge hash length SEEDBYTES +* - const polyvecl *z: pointer to vector z +* - const polyveck *h: pointer to hint vector h +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_pack_sig(uint8_t sig[PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_BYTES], + const uint8_t c[SEEDBYTES], + const polyvecl *z, + const polyveck *h) { + unsigned int i, j, k; + + for (i = 0; i < SEEDBYTES; ++i) { + sig[i] = c[i]; + } + sig += SEEDBYTES; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_polyz_pack(sig + i * POLYZ_PACKEDBYTES, &z->vec[i]); + } + sig += L * POLYZ_PACKEDBYTES; + + /* Encode h */ + for (i = 0; i < OMEGA + K; ++i) { + sig[i] = 0; + } + + k = 0; + for (i = 0; i < K; ++i) { + for (j = 0; j < N; ++j) { + if (h->vec[i].coeffs[j] != 0) { + sig[k++] = (uint8_t) j; + } + } + + sig[OMEGA + i] = (uint8_t) k; + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_unpack_sig +* +* Description: Unpack signature sig = (c, z, h). +* +* Arguments: - uint8_t *c: pointer to output PQCLEAN_DILITHIUM3_CLEAN_challenge hash +* - polyvecl *z: pointer to output vector z +* - polyveck *h: pointer to output hint vector h +* - const uint8_t sig[]: byte array containing +* bit-packed signature +* +* Returns 1 in case of malformed signature; otherwise 0. +**************************************************/ +int PQCLEAN_DILITHIUM3_CLEAN_unpack_sig(uint8_t c[SEEDBYTES], + polyvecl *z, + polyveck *h, + const uint8_t sig[PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_BYTES]) { + unsigned int i, j, k; + + for (i = 0; i < SEEDBYTES; ++i) { + c[i] = sig[i]; + } + sig += SEEDBYTES; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_polyz_unpack(&z->vec[i], sig + i * POLYZ_PACKEDBYTES); + } + sig += L * POLYZ_PACKEDBYTES; + + /* Decode h */ + k = 0; + for (i = 0; i < K; ++i) { + for (j = 0; j < N; ++j) { + h->vec[i].coeffs[j] = 0; + } + + if (sig[OMEGA + i] < k || sig[OMEGA + i] > OMEGA) { + return 1; + } + + for (j = k; j < sig[OMEGA + i]; ++j) { + /* Coefficients are ordered for strong unforgeability */ + if (j > k && sig[j] <= sig[j - 1]) { + return 1; + } + h->vec[i].coeffs[sig[j]] = 1; + } + + k = sig[OMEGA + i]; + } + + /* Extra indices are zero for strong unforgeability */ + for (j = k; j < OMEGA; ++j) { + if (sig[j]) { + return 1; + } + } + + return 0; +} diff --git a/crypto_sign/dilithium/dilithium3/clean/packing.h b/crypto_sign/dilithium/dilithium3/clean/packing.h new file mode 100644 index 00000000..734c6f10 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/clean/packing.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_DILITHIUM3_CLEAN_PACKING_H +#define PQCLEAN_DILITHIUM3_CLEAN_PACKING_H +#include "params.h" +#include "polyvec.h" +#include + +void PQCLEAN_DILITHIUM3_CLEAN_pack_pk(uint8_t pk[PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_PUBLICKEYBYTES], const uint8_t rho[SEEDBYTES], const polyveck *t1); + +void PQCLEAN_DILITHIUM3_CLEAN_pack_sk(uint8_t sk[PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_SECRETKEYBYTES], + const uint8_t rho[SEEDBYTES], + const uint8_t tr[CRHBYTES], + const uint8_t key[SEEDBYTES], + const polyveck *t0, + const polyvecl *s1, + const polyveck *s2); + +void PQCLEAN_DILITHIUM3_CLEAN_pack_sig(uint8_t sig[PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_BYTES], const uint8_t c[SEEDBYTES], const polyvecl *z, const polyveck *h); + +void PQCLEAN_DILITHIUM3_CLEAN_unpack_pk(uint8_t rho[SEEDBYTES], polyveck *t1, const uint8_t pk[PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_PUBLICKEYBYTES]); + +void PQCLEAN_DILITHIUM3_CLEAN_unpack_sk(uint8_t rho[SEEDBYTES], + uint8_t tr[CRHBYTES], + uint8_t key[SEEDBYTES], + polyveck *t0, + polyvecl *s1, + polyveck *s2, + const uint8_t sk[PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_SECRETKEYBYTES]); + +int PQCLEAN_DILITHIUM3_CLEAN_unpack_sig(uint8_t c[SEEDBYTES], polyvecl *z, polyveck *h, const uint8_t sig[PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_BYTES]); + +#endif diff --git a/crypto_sign/dilithium/dilithium3/clean/params.h b/crypto_sign/dilithium/dilithium3/clean/params.h new file mode 100644 index 00000000..76bca543 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/clean/params.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_DILITHIUM3_CLEAN_PARAMS_H +#define PQCLEAN_DILITHIUM3_CLEAN_PARAMS_H + + + +#define SEEDBYTES 32 +#define CRHBYTES 48 +#define N 256 +#define Q 8380417 +#define D 13 +#define ROOT_OF_UNITY 1753 + +#define K 6 +#define L 5 +#define ETA 4 +#define TAU 49 +#define BETA 196 +#define GAMMA1 (1 << 19) +#define GAMMA2 ((Q-1)/32) +#define OMEGA 55 +#define PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_ALGNAME "Dilithium3" + + +#define POLYT1_PACKEDBYTES 320 +#define POLYT0_PACKEDBYTES 416 +#define POLYVECH_PACKEDBYTES (OMEGA + K) + +#define POLYZ_PACKEDBYTES 640 + +#define POLYW1_PACKEDBYTES 128 + +#define POLYETA_PACKEDBYTES 128 + +#define PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_PUBLICKEYBYTES (SEEDBYTES + K*POLYT1_PACKEDBYTES) +#define PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_SECRETKEYBYTES (2*SEEDBYTES + CRHBYTES \ + + L*POLYETA_PACKEDBYTES \ + + K*POLYETA_PACKEDBYTES \ + + K*POLYT0_PACKEDBYTES) +#define PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_BYTES (SEEDBYTES + L*POLYZ_PACKEDBYTES + POLYVECH_PACKEDBYTES) + +#endif diff --git a/crypto_sign/dilithium/dilithium3/clean/poly.c b/crypto_sign/dilithium/dilithium3/clean/poly.c new file mode 100644 index 00000000..7a956575 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/clean/poly.c @@ -0,0 +1,818 @@ +#include "ntt.h" +#include "params.h" +#include "poly.h" +#include "reduce.h" +#include "rounding.h" +#include "symmetric.h" +#include + +#define DBENCH_START() +#define DBENCH_STOP(t) + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_poly_reduce +* +* Description: Inplace reduction of all coefficients of polynomial to +* representative in [-6283009,6283007]. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_poly_reduce(poly *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + a->coeffs[i] = PQCLEAN_DILITHIUM3_CLEAN_reduce32(a->coeffs[i]); + } + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_poly_caddq +* +* Description: For all coefficients of in/out polynomial add Q if +* coefficient is negative. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_poly_caddq(poly *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + a->coeffs[i] = PQCLEAN_DILITHIUM3_CLEAN_caddq(a->coeffs[i]); + } + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_poly_freeze +* +* Description: Inplace reduction of all coefficients of polynomial to +* standard representatives. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_poly_freeze(poly *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + a->coeffs[i] = PQCLEAN_DILITHIUM3_CLEAN_freeze(a->coeffs[i]); + } + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_poly_add +* +* Description: Add polynomials. No modular reduction is performed. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first summand +* - const poly *b: pointer to second summand +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_poly_add(poly *c, const poly *a, const poly *b) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + c->coeffs[i] = a->coeffs[i] + b->coeffs[i]; + } + + DBENCH_STOP(*tadd); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_poly_sub +* +* Description: Subtract polynomials. No modular reduction is +* performed. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial to be +* subtraced from first input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_poly_sub(poly *c, const poly *a, const poly *b) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + c->coeffs[i] = a->coeffs[i] - b->coeffs[i]; + } + + DBENCH_STOP(*tadd); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_poly_shiftl +* +* Description: Multiply polynomial by 2^D without modular reduction. Assumes +* input coefficients to be less than 2^{31-D} in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_poly_shiftl(poly *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + a->coeffs[i] <<= D; + } + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_poly_ntt +* +* Description: Inplace forward NTT. Coefficients can grow by +* 8*Q in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_poly_ntt(poly *a) { + DBENCH_START(); + + PQCLEAN_DILITHIUM3_CLEAN_ntt(a->coeffs); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_poly_invntt_tomont +* +* Description: Inplace inverse NTT and multiplication by 2^{32}. +* Input coefficients need to be less than Q in absolute +* value and output coefficients are again bounded by Q. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_poly_invntt_tomont(poly *a) { + DBENCH_START(); + + PQCLEAN_DILITHIUM3_CLEAN_invntt_tomont(a->coeffs); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_poly_pointwise_montgomery +* +* Description: Pointwise multiplication of polynomials in NTT domain +* representation and multiplication of resulting polynomial +* by 2^{-32}. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_poly_pointwise_montgomery(poly *c, const poly *a, const poly *b) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + c->coeffs[i] = PQCLEAN_DILITHIUM3_CLEAN_montgomery_reduce((int64_t)a->coeffs[i] * b->coeffs[i]); + } + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_poly_power2round +* +* Description: For all coefficients c of the input polynomial, +* compute c0, c1 such that c mod Q = c1*2^D + c0 +* with -2^{D-1} < c0 <= 2^{D-1}. Assumes coefficients to be +* standard representatives. +* +* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 +* - poly *a0: pointer to output polynomial with coefficients c0 +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_poly_power2round(poly *a1, poly *a0, const poly *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + a1->coeffs[i] = PQCLEAN_DILITHIUM3_CLEAN_power2round(&a0->coeffs[i], a->coeffs[i]); + } + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_poly_decompose +* +* Description: For all coefficients c of the input polynomial, +* compute high and low bits c0, c1 such c mod Q = c1*ALPHA + c0 +* with -ALPHA/2 < c0 <= ALPHA/2 except c1 = (Q-1)/ALPHA where we +* set c1 = 0 and -ALPHA/2 <= c0 = c mod Q - Q < 0. +* Assumes coefficients to be standard representatives. +* +* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 +* - poly *a0: pointer to output polynomial with coefficients c0 +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_poly_decompose(poly *a1, poly *a0, const poly *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + a1->coeffs[i] = PQCLEAN_DILITHIUM3_CLEAN_decompose(&a0->coeffs[i], a->coeffs[i]); + } + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_poly_make_hint +* +* Description: Compute hint polynomial. The coefficients of which indicate +* whether the low bits of the corresponding coefficient of +* the input polynomial overflow into the high bits. +* +* Arguments: - poly *h: pointer to output hint polynomial +* - const poly *a0: pointer to low part of input polynomial +* - const poly *a1: pointer to high part of input polynomial +* +* Returns number of 1 bits. +**************************************************/ +unsigned int PQCLEAN_DILITHIUM3_CLEAN_poly_make_hint(poly *h, const poly *a0, const poly *a1) { + unsigned int i, s = 0; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + h->coeffs[i] = PQCLEAN_DILITHIUM3_CLEAN_make_hint(a0->coeffs[i], a1->coeffs[i]); + s += h->coeffs[i]; + } + + DBENCH_STOP(*tround); + return s; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_poly_use_hint +* +* Description: Use hint polynomial to correct the high bits of a polynomial. +* +* Arguments: - poly *b: pointer to output polynomial with corrected high bits +* - const poly *a: pointer to input polynomial +* - const poly *h: pointer to input hint polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_poly_use_hint(poly *b, const poly *a, const poly *h) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + b->coeffs[i] = PQCLEAN_DILITHIUM3_CLEAN_use_hint(a->coeffs[i], h->coeffs[i]); + } + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_poly_chknorm +* +* Description: Check infinity norm of polynomial against given bound. +* Assumes input coefficients were reduced by PQCLEAN_DILITHIUM3_CLEAN_reduce32(). +* +* Arguments: - const poly *a: pointer to polynomial +* - int32_t B: norm bound +* +* Returns 0 if norm is strictly smaller than B <= (Q-1)/8 and 1 otherwise. +**************************************************/ +int PQCLEAN_DILITHIUM3_CLEAN_poly_chknorm(const poly *a, int32_t B) { + unsigned int i; + int32_t t; + DBENCH_START(); + + if (B > (Q - 1) / 8) { + return 1; + } + + /* It is ok to leak which coefficient violates the bound since + the probability for each coefficient is independent of secret + data but we must not leak the sign of the centralized representative. */ + for (i = 0; i < N; ++i) { + /* Absolute value */ + t = a->coeffs[i] >> 31; + t = a->coeffs[i] - (t & 2 * a->coeffs[i]); + + if (t >= B) { + DBENCH_STOP(*tsample); + return 1; + } + } + + DBENCH_STOP(*tsample); + return 0; +} + +/************************************************* +* Name: rej_uniform +* +* Description: Sample uniformly random coefficients in [0, Q-1] by +* performing rejection sampling on array of random bytes. +* +* Arguments: - int32_t *a: pointer to output array (allocated) +* - unsigned int len: number of coefficients to be sampled +* - const uint8_t *buf: array of random bytes +* - unsigned int buflen: length of array of random bytes +* +* Returns number of sampled coefficients. Can be smaller than len if not enough +* random bytes were given. +**************************************************/ +static unsigned int rej_uniform(int32_t *a, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) { + unsigned int ctr, pos; + uint32_t t; + DBENCH_START(); + + ctr = pos = 0; + while (ctr < len && pos + 3 <= buflen) { + t = buf[pos++]; + t |= (uint32_t)buf[pos++] << 8; + t |= (uint32_t)buf[pos++] << 16; + t &= 0x7FFFFF; + + if (t < Q) { + a[ctr++] = t; + } + } + + DBENCH_STOP(*tsample); + return ctr; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_poly_uniform +* +* Description: Sample polynomial with uniformly random coefficients +* in [0,Q-1] by performing rejection sampling on the +* output stream of SHAKE256(seed|nonce) or AES256CTR(seed,nonce). +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length SEEDBYTES +* - uint16_t nonce: 2-byte nonce +**************************************************/ +#define POLY_UNIFORM_NBLOCKS ((768 + STREAM128_BLOCKBYTES - 1)/STREAM128_BLOCKBYTES) +void PQCLEAN_DILITHIUM3_CLEAN_poly_uniform(poly *a, + const uint8_t seed[SEEDBYTES], + uint16_t nonce) { + unsigned int i, ctr, off; + unsigned int buflen = POLY_UNIFORM_NBLOCKS * STREAM128_BLOCKBYTES; + uint8_t buf[POLY_UNIFORM_NBLOCKS * STREAM128_BLOCKBYTES + 2]; + stream128_state state; + + stream128_init(&state, seed, nonce); + stream128_squeezeblocks(buf, POLY_UNIFORM_NBLOCKS, &state); + + ctr = rej_uniform(a->coeffs, N, buf, buflen); + + while (ctr < N) { + off = buflen % 3; + for (i = 0; i < off; ++i) { + buf[i] = buf[buflen - off + i]; + } + + stream128_squeezeblocks(buf + off, 1, &state); + buflen = STREAM128_BLOCKBYTES + off; + ctr += rej_uniform(a->coeffs + ctr, N - ctr, buf, buflen); + } + stream128_release(&state); +} + +/************************************************* +* Name: rej_eta +* +* Description: Sample uniformly random coefficients in [-ETA, ETA] by +* performing rejection sampling on array of random bytes. +* +* Arguments: - int32_t *a: pointer to output array (allocated) +* - unsigned int len: number of coefficients to be sampled +* - const uint8_t *buf: array of random bytes +* - unsigned int buflen: length of array of random bytes +* +* Returns number of sampled coefficients. Can be smaller than len if not enough +* random bytes were given. +**************************************************/ +static unsigned int rej_eta(int32_t *a, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) { + unsigned int ctr, pos; + uint32_t t0, t1; + DBENCH_START(); + + ctr = pos = 0; + while (ctr < len && pos < buflen) { + t0 = buf[pos] & 0x0F; + t1 = buf[pos++] >> 4; + + if (t0 < 9) { + a[ctr++] = 4 - t0; + } + if (t1 < 9 && ctr < len) { + a[ctr++] = 4 - t1; + } + } + + DBENCH_STOP(*tsample); + return ctr; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_poly_uniform_eta +* +* Description: Sample polynomial with uniformly random coefficients +* in [-ETA,ETA] by performing rejection sampling on the +* output stream from SHAKE256(seed|nonce) or AES256CTR(seed,nonce). +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length SEEDBYTES +* - uint16_t nonce: 2-byte nonce +**************************************************/ +#define POLY_UNIFORM_ETA_NBLOCKS ((227 + STREAM128_BLOCKBYTES - 1)/STREAM128_BLOCKBYTES) +void PQCLEAN_DILITHIUM3_CLEAN_poly_uniform_eta(poly *a, + const uint8_t seed[SEEDBYTES], + uint16_t nonce) { + unsigned int ctr; + unsigned int buflen = POLY_UNIFORM_ETA_NBLOCKS * STREAM128_BLOCKBYTES; + uint8_t buf[POLY_UNIFORM_ETA_NBLOCKS * STREAM128_BLOCKBYTES]; + stream128_state state; + + stream128_init(&state, seed, nonce); + stream128_squeezeblocks(buf, POLY_UNIFORM_ETA_NBLOCKS, &state); + + ctr = rej_eta(a->coeffs, N, buf, buflen); + + while (ctr < N) { + stream128_squeezeblocks(buf, 1, &state); + ctr += rej_eta(a->coeffs + ctr, N - ctr, buf, STREAM128_BLOCKBYTES); + } + stream128_release(&state); +} + +/************************************************* +* Name: poly_uniform_gamma1m1 +* +* Description: Sample polynomial with uniformly random coefficients +* in [-(GAMMA1 - 1), GAMMA1] by unpacking output stream +* of SHAKE256(seed|nonce) or AES256CTR(seed,nonce). +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length CRHBYTES +* - uint16_t nonce: 16-bit nonce +**************************************************/ +#define POLY_UNIFORM_GAMMA1_NBLOCKS ((POLYZ_PACKEDBYTES + STREAM256_BLOCKBYTES - 1)/STREAM256_BLOCKBYTES) +void PQCLEAN_DILITHIUM3_CLEAN_poly_uniform_gamma1(poly *a, + const uint8_t seed[CRHBYTES], + uint16_t nonce) { + uint8_t buf[POLY_UNIFORM_GAMMA1_NBLOCKS * STREAM256_BLOCKBYTES]; + stream256_state state; + + stream256_init(&state, seed, nonce); + stream256_squeezeblocks(buf, POLY_UNIFORM_GAMMA1_NBLOCKS, &state); + stream256_release(&state); + PQCLEAN_DILITHIUM3_CLEAN_polyz_unpack(a, buf); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_challenge +* +* Description: Implementation of H. Samples polynomial with TAU nonzero +* coefficients in {-1,1} using the output stream of +* SHAKE256(seed). +* +* Arguments: - poly *c: pointer to output polynomial +* - const uint8_t mu[]: byte array containing seed of length SEEDBYTES +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_poly_challenge(poly *c, const uint8_t seed[SEEDBYTES]) { + unsigned int i, b, pos; + uint64_t signs; + uint8_t buf[SHAKE256_RATE]; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, seed, SEEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, sizeof buf, &state); + + signs = 0; + for (i = 0; i < 8; ++i) { + signs |= (uint64_t)buf[i] << 8 * i; + } + pos = 8; + + for (i = 0; i < N; ++i) { + c->coeffs[i] = 0; + } + for (i = N - TAU; i < N; ++i) { + do { + if (pos >= SHAKE256_RATE) { + shake256_inc_squeeze(buf, sizeof buf, &state); + pos = 0; + } + + b = buf[pos++]; + } while (b > i); + + c->coeffs[i] = c->coeffs[b]; + c->coeffs[b] = 1 - 2 * (signs & 1); + signs >>= 1; + } + shake256_inc_ctx_release(&state); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyeta_pack +* +* Description: Bit-pack polynomial with coefficients in [-ETA,ETA]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYETA_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_polyeta_pack(uint8_t *r, const poly *a) { + unsigned int i; + uint8_t t[8]; + DBENCH_START(); + + for (i = 0; i < N / 2; ++i) { + t[0] = (uint8_t) (ETA - a->coeffs[2 * i + 0]); + t[1] = (uint8_t) (ETA - a->coeffs[2 * i + 1]); + r[i] = t[0] | (t[1] << 4); + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyeta_unpack +* +* Description: Unpack polynomial with coefficients in [-ETA,ETA]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_polyeta_unpack(poly *r, const uint8_t *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 2; ++i) { + r->coeffs[2 * i + 0] = a[i] & 0x0F; + r->coeffs[2 * i + 1] = a[i] >> 4; + r->coeffs[2 * i + 0] = ETA - r->coeffs[2 * i + 0]; + r->coeffs[2 * i + 1] = ETA - r->coeffs[2 * i + 1]; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyt1_pack +* +* Description: Bit-pack polynomial t1 with coefficients fitting in 10 bits. +* Input coefficients are assumed to be standard representatives. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYT1_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_polyt1_pack(uint8_t *r, const poly *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 4; ++i) { + r[5 * i + 0] = (uint8_t) (a->coeffs[4 * i + 0] >> 0); + r[5 * i + 1] = (uint8_t) ((a->coeffs[4 * i + 0] >> 8) | (a->coeffs[4 * i + 1] << 2)); + r[5 * i + 2] = (uint8_t) ((a->coeffs[4 * i + 1] >> 6) | (a->coeffs[4 * i + 2] << 4)); + r[5 * i + 3] = (uint8_t) ((a->coeffs[4 * i + 2] >> 4) | (a->coeffs[4 * i + 3] << 6)); + r[5 * i + 4] = (uint8_t) (a->coeffs[4 * i + 3] >> 2); + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyt1_unpack +* +* Description: Unpack polynomial t1 with 10-bit coefficients. +* Output coefficients are standard representatives. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_polyt1_unpack(poly *r, const uint8_t *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 4; ++i) { + r->coeffs[4 * i + 0] = ((a[5 * i + 0] >> 0) | ((uint32_t)a[5 * i + 1] << 8)) & 0x3FF; + r->coeffs[4 * i + 1] = ((a[5 * i + 1] >> 2) | ((uint32_t)a[5 * i + 2] << 6)) & 0x3FF; + r->coeffs[4 * i + 2] = ((a[5 * i + 2] >> 4) | ((uint32_t)a[5 * i + 3] << 4)) & 0x3FF; + r->coeffs[4 * i + 3] = ((a[5 * i + 3] >> 6) | ((uint32_t)a[5 * i + 4] << 2)) & 0x3FF; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyt0_pack +* +* Description: Bit-pack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYT0_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_polyt0_pack(uint8_t *r, const poly *a) { + unsigned int i; + uint32_t t[8]; + DBENCH_START(); + + for (i = 0; i < N / 8; ++i) { + t[0] = (1 << (D - 1)) - a->coeffs[8 * i + 0]; + t[1] = (1 << (D - 1)) - a->coeffs[8 * i + 1]; + t[2] = (1 << (D - 1)) - a->coeffs[8 * i + 2]; + t[3] = (1 << (D - 1)) - a->coeffs[8 * i + 3]; + t[4] = (1 << (D - 1)) - a->coeffs[8 * i + 4]; + t[5] = (1 << (D - 1)) - a->coeffs[8 * i + 5]; + t[6] = (1 << (D - 1)) - a->coeffs[8 * i + 6]; + t[7] = (1 << (D - 1)) - a->coeffs[8 * i + 7]; + + r[13 * i + 0] = (uint8_t) t[0]; + r[13 * i + 1] = (uint8_t) (t[0] >> 8); + r[13 * i + 1] |= (uint8_t) (t[1] << 5); + r[13 * i + 2] = (uint8_t) (t[1] >> 3); + r[13 * i + 3] = (uint8_t) (t[1] >> 11); + r[13 * i + 3] |= (uint8_t) (t[2] << 2); + r[13 * i + 4] = (uint8_t) (t[2] >> 6); + r[13 * i + 4] |= (uint8_t) (t[3] << 7); + r[13 * i + 5] = (uint8_t) (t[3] >> 1); + r[13 * i + 6] = (uint8_t) (t[3] >> 9); + r[13 * i + 6] |= (uint8_t) (t[4] << 4); + r[13 * i + 7] = (uint8_t) (t[4] >> 4); + r[13 * i + 8] = (uint8_t) (t[4] >> 12); + r[13 * i + 8] |= (uint8_t) (t[5] << 1); + r[13 * i + 9] = (uint8_t) (t[5] >> 7); + r[13 * i + 9] |= (uint8_t) (t[6] << 6); + r[13 * i + 10] = (uint8_t) (t[6] >> 2); + r[13 * i + 11] = (uint8_t) (t[6] >> 10); + r[13 * i + 11] |= (uint8_t) (t[7] << 3); + r[13 * i + 12] = (uint8_t) (t[7] >> 5); + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyt0_unpack +* +* Description: Unpack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_polyt0_unpack(poly *r, const uint8_t *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 8; ++i) { + r->coeffs[8 * i + 0] = a[13 * i + 0]; + r->coeffs[8 * i + 0] |= (uint32_t)a[13 * i + 1] << 8; + r->coeffs[8 * i + 0] &= 0x1FFF; + + r->coeffs[8 * i + 1] = a[13 * i + 1] >> 5; + r->coeffs[8 * i + 1] |= (uint32_t)a[13 * i + 2] << 3; + r->coeffs[8 * i + 1] |= (uint32_t)a[13 * i + 3] << 11; + r->coeffs[8 * i + 1] &= 0x1FFF; + + r->coeffs[8 * i + 2] = a[13 * i + 3] >> 2; + r->coeffs[8 * i + 2] |= (uint32_t)a[13 * i + 4] << 6; + r->coeffs[8 * i + 2] &= 0x1FFF; + + r->coeffs[8 * i + 3] = a[13 * i + 4] >> 7; + r->coeffs[8 * i + 3] |= (uint32_t)a[13 * i + 5] << 1; + r->coeffs[8 * i + 3] |= (uint32_t)a[13 * i + 6] << 9; + r->coeffs[8 * i + 3] &= 0x1FFF; + + r->coeffs[8 * i + 4] = a[13 * i + 6] >> 4; + r->coeffs[8 * i + 4] |= (uint32_t)a[13 * i + 7] << 4; + r->coeffs[8 * i + 4] |= (uint32_t)a[13 * i + 8] << 12; + r->coeffs[8 * i + 4] &= 0x1FFF; + + r->coeffs[8 * i + 5] = a[13 * i + 8] >> 1; + r->coeffs[8 * i + 5] |= (uint32_t)a[13 * i + 9] << 7; + r->coeffs[8 * i + 5] &= 0x1FFF; + + r->coeffs[8 * i + 6] = a[13 * i + 9] >> 6; + r->coeffs[8 * i + 6] |= (uint32_t)a[13 * i + 10] << 2; + r->coeffs[8 * i + 6] |= (uint32_t)a[13 * i + 11] << 10; + r->coeffs[8 * i + 6] &= 0x1FFF; + + r->coeffs[8 * i + 7] = a[13 * i + 11] >> 3; + r->coeffs[8 * i + 7] |= (uint32_t)a[13 * i + 12] << 5; + r->coeffs[8 * i + 7] &= 0x1FFF; + + r->coeffs[8 * i + 0] = (1 << (D - 1)) - r->coeffs[8 * i + 0]; + r->coeffs[8 * i + 1] = (1 << (D - 1)) - r->coeffs[8 * i + 1]; + r->coeffs[8 * i + 2] = (1 << (D - 1)) - r->coeffs[8 * i + 2]; + r->coeffs[8 * i + 3] = (1 << (D - 1)) - r->coeffs[8 * i + 3]; + r->coeffs[8 * i + 4] = (1 << (D - 1)) - r->coeffs[8 * i + 4]; + r->coeffs[8 * i + 5] = (1 << (D - 1)) - r->coeffs[8 * i + 5]; + r->coeffs[8 * i + 6] = (1 << (D - 1)) - r->coeffs[8 * i + 6]; + r->coeffs[8 * i + 7] = (1 << (D - 1)) - r->coeffs[8 * i + 7]; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyz_pack +* +* Description: Bit-pack polynomial with coefficients +* in [-(GAMMA1 - 1), GAMMA1]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYZ_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_polyz_pack(uint8_t *r, const poly *a) { + unsigned int i; + uint32_t t[4]; + DBENCH_START(); + + for (i = 0; i < N / 2; ++i) { + t[0] = GAMMA1 - a->coeffs[2 * i + 0]; + t[1] = GAMMA1 - a->coeffs[2 * i + 1]; + + r[5 * i + 0] = (uint8_t) t[0]; + r[5 * i + 1] = (uint8_t) (t[0] >> 8); + r[5 * i + 2] = (uint8_t) (t[0] >> 16); + r[5 * i + 2] |= (uint8_t) (t[1] << 4); + r[5 * i + 3] = (uint8_t) (t[1] >> 4); + r[5 * i + 4] = (uint8_t) (t[1] >> 12); + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyz_unpack +* +* Description: Unpack polynomial z with coefficients +* in [-(GAMMA1 - 1), GAMMA1]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_polyz_unpack(poly *r, const uint8_t *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 2; ++i) { + r->coeffs[2 * i + 0] = a[5 * i + 0]; + r->coeffs[2 * i + 0] |= (uint32_t)a[5 * i + 1] << 8; + r->coeffs[2 * i + 0] |= (uint32_t)a[5 * i + 2] << 16; + r->coeffs[2 * i + 0] &= 0xFFFFF; + + r->coeffs[2 * i + 1] = a[5 * i + 2] >> 4; + r->coeffs[2 * i + 1] |= (uint32_t)a[5 * i + 3] << 4; + r->coeffs[2 * i + 1] |= (uint32_t)a[5 * i + 4] << 12; + r->coeffs[2 * i + 0] &= 0xFFFFF; + + r->coeffs[2 * i + 0] = GAMMA1 - r->coeffs[2 * i + 0]; + r->coeffs[2 * i + 1] = GAMMA1 - r->coeffs[2 * i + 1]; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyw1_pack +* +* Description: Bit-pack polynomial w1 with coefficients in [0,15] or [0,43]. +* Input coefficients are assumed to be standard representatives. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYW1_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_polyw1_pack(uint8_t *r, const poly *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 2; ++i) { + r[i] = (uint8_t) (a->coeffs[2 * i + 0] | (a->coeffs[2 * i + 1] << 4)); + } + + DBENCH_STOP(*tpack); +} diff --git a/crypto_sign/dilithium/dilithium3/clean/poly.h b/crypto_sign/dilithium/dilithium3/clean/poly.h new file mode 100644 index 00000000..d5531e26 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/clean/poly.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_DILITHIUM3_CLEAN_POLY_H +#define PQCLEAN_DILITHIUM3_CLEAN_POLY_H +#include "params.h" +#include + +typedef struct { + int32_t coeffs[N]; +} poly; + +void PQCLEAN_DILITHIUM3_CLEAN_poly_reduce(poly *a); +void PQCLEAN_DILITHIUM3_CLEAN_poly_caddq(poly *a); +void PQCLEAN_DILITHIUM3_CLEAN_poly_freeze(poly *a); + +void PQCLEAN_DILITHIUM3_CLEAN_poly_add(poly *c, const poly *a, const poly *b); +void PQCLEAN_DILITHIUM3_CLEAN_poly_sub(poly *c, const poly *a, const poly *b); +void PQCLEAN_DILITHIUM3_CLEAN_poly_shiftl(poly *a); + +void PQCLEAN_DILITHIUM3_CLEAN_poly_ntt(poly *a); +void PQCLEAN_DILITHIUM3_CLEAN_poly_invntt_tomont(poly *a); +void PQCLEAN_DILITHIUM3_CLEAN_poly_pointwise_montgomery(poly *c, const poly *a, const poly *b); + +void PQCLEAN_DILITHIUM3_CLEAN_poly_power2round(poly *a1, poly *a0, const poly *a); +void PQCLEAN_DILITHIUM3_CLEAN_poly_decompose(poly *a1, poly *a0, const poly *a); +unsigned int PQCLEAN_DILITHIUM3_CLEAN_poly_make_hint(poly *h, const poly *a0, const poly *a1); +void PQCLEAN_DILITHIUM3_CLEAN_poly_use_hint(poly *b, const poly *a, const poly *h); + +int PQCLEAN_DILITHIUM3_CLEAN_poly_chknorm(const poly *a, int32_t B); +void PQCLEAN_DILITHIUM3_CLEAN_poly_uniform(poly *a, + const uint8_t seed[SEEDBYTES], + uint16_t nonce); +void PQCLEAN_DILITHIUM3_CLEAN_poly_uniform_eta(poly *a, + const uint8_t seed[SEEDBYTES], + uint16_t nonce); +void PQCLEAN_DILITHIUM3_CLEAN_poly_uniform_gamma1(poly *a, + const uint8_t seed[CRHBYTES], + uint16_t nonce); +void PQCLEAN_DILITHIUM3_CLEAN_poly_challenge(poly *c, const uint8_t seed[SEEDBYTES]); + +void PQCLEAN_DILITHIUM3_CLEAN_polyeta_pack(uint8_t *r, const poly *a); +void PQCLEAN_DILITHIUM3_CLEAN_polyeta_unpack(poly *r, const uint8_t *a); + +void PQCLEAN_DILITHIUM3_CLEAN_polyt1_pack(uint8_t *r, const poly *a); +void PQCLEAN_DILITHIUM3_CLEAN_polyt1_unpack(poly *r, const uint8_t *a); + +void PQCLEAN_DILITHIUM3_CLEAN_polyt0_pack(uint8_t *r, const poly *a); +void PQCLEAN_DILITHIUM3_CLEAN_polyt0_unpack(poly *r, const uint8_t *a); + +void PQCLEAN_DILITHIUM3_CLEAN_polyz_pack(uint8_t *r, const poly *a); +void PQCLEAN_DILITHIUM3_CLEAN_polyz_unpack(poly *r, const uint8_t *a); + +void PQCLEAN_DILITHIUM3_CLEAN_polyw1_pack(uint8_t *r, const poly *a); + +#endif diff --git a/crypto_sign/dilithium/dilithium3/clean/polyvec.c b/crypto_sign/dilithium/dilithium3/clean/polyvec.c new file mode 100644 index 00000000..67212779 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/clean/polyvec.c @@ -0,0 +1,448 @@ +#include "params.h" +#include "poly.h" +#include "polyvec.h" +#include + +/************************************************* +* Name: expand_mat +* +* Description: Implementation of ExpandA. Generates matrix A with uniformly +* random coefficients a_{i,j} by performing rejection +* sampling on the output stream of SHAKE128(rho|j|i) +* or AES256CTR(rho,j|i). +* +* Arguments: - polyvecl mat[K]: output matrix +* - const uint8_t rho[]: byte array containing seed rho +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]) { + unsigned int i, j; + + for (i = 0; i < K; ++i) { + for (j = 0; j < L; ++j) { + PQCLEAN_DILITHIUM3_CLEAN_poly_uniform(&mat[i].vec[j], rho, (uint16_t) ((i << 8) + j)); + } + } +} + +void PQCLEAN_DILITHIUM3_CLEAN_polyvec_matrix_pointwise_montgomery(polyveck *t, const polyvecl mat[K], const polyvecl *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_polyvecl_pointwise_acc_montgomery(&t->vec[i], &mat[i], v); + } +} + +/**************************************************************/ +/************ Vectors of polynomials of length L **************/ +/**************************************************************/ + +void PQCLEAN_DILITHIUM3_CLEAN_polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_poly_uniform_eta(&v->vec[i], seed, nonce++); + } +} + +void PQCLEAN_DILITHIUM3_CLEAN_polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_poly_uniform_gamma1(&v->vec[i], seed, (uint16_t) (L * nonce + i)); + } +} + +void PQCLEAN_DILITHIUM3_CLEAN_polyvecl_reduce(polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_poly_reduce(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyvecl_freeze +* +* Description: Reduce coefficients of polynomials in vector of length L +* to standard representatives. +* +* Arguments: - polyvecl *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_polyvecl_freeze(polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_poly_freeze(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyvecl_add +* +* Description: Add vectors of polynomials of length L. +* No modular reduction is performed. +* +* Arguments: - polyvecl *w: pointer to output vector +* - const polyvecl *u: pointer to first summand +* - const polyvecl *v: pointer to second summand +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyvecl_ntt +* +* Description: Forward NTT of all polynomials in vector of length L. Output +* coefficients can be up to 16*Q larger than input coefficients. +* +* Arguments: - polyvecl *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_polyvecl_ntt(polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_poly_ntt(&v->vec[i]); + } +} + +void PQCLEAN_DILITHIUM3_CLEAN_polyvecl_invntt_tomont(polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_poly_invntt_tomont(&v->vec[i]); + } +} + +void PQCLEAN_DILITHIUM3_CLEAN_polyvecl_pointwise_poly_montgomery(polyvecl *r, const poly *a, const polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_poly_pointwise_montgomery(&r->vec[i], a, &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyvecl_pointwise_acc_montgomery +* +* Description: Pointwise multiply vectors of polynomials of length L, multiply +* resulting vector by 2^{-32} and add (accumulate) polynomials +* in it. Input/output vectors are in NTT domain representation. +* +* Arguments: - poly *w: output polynomial +* - const polyvecl *u: pointer to first input vector +* - const polyvecl *v: pointer to second input vector +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_polyvecl_pointwise_acc_montgomery(poly *w, + const polyvecl *u, + const polyvecl *v) { + unsigned int i; + poly t; + + PQCLEAN_DILITHIUM3_CLEAN_poly_pointwise_montgomery(w, &u->vec[0], &v->vec[0]); + for (i = 1; i < L; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_poly_pointwise_montgomery(&t, &u->vec[i], &v->vec[i]); + PQCLEAN_DILITHIUM3_CLEAN_poly_add(w, w, &t); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyvecl_chknorm +* +* Description: Check infinity norm of polynomials in vector of length L. +* Assumes input polyvecl to be reduced by PQCLEAN_DILITHIUM3_CLEAN_polyvecl_reduce(). +* +* Arguments: - const polyvecl *v: pointer to vector +* - int32_t B: norm bound +* +* Returns 0 if norm of all polynomials is strictly smaller than B <= (Q-1)/8 +* and 1 otherwise. +**************************************************/ +int PQCLEAN_DILITHIUM3_CLEAN_polyvecl_chknorm(const polyvecl *v, int32_t bound) { + unsigned int i; + + for (i = 0; i < L; ++i) { + if (PQCLEAN_DILITHIUM3_CLEAN_poly_chknorm(&v->vec[i], bound)) { + return 1; + } + } + + return 0; +} + +/**************************************************************/ +/************ Vectors of polynomials of length K **************/ +/**************************************************************/ + +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_uniform_eta(polyveck *v, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_poly_uniform_eta(&v->vec[i], seed, nonce++); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyveck_reduce +* +* Description: Reduce coefficients of polynomials in vector of length K +* to representatives in [-6283009,6283007]. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_reduce(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_poly_reduce(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyveck_caddq +* +* Description: For all coefficients of polynomials in vector of length K +* add Q if coefficient is negative. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_caddq(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_poly_caddq(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyveck_freeze +* +* Description: Reduce coefficients of polynomials in vector of length K +* to standard representatives. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_freeze(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_poly_freeze(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyveck_add +* +* Description: Add vectors of polynomials of length K. +* No modular reduction is performed. +* +* Arguments: - polyveck *w: pointer to output vector +* - const polyveck *u: pointer to first summand +* - const polyveck *v: pointer to second summand +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_add(polyveck *w, const polyveck *u, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyveck_sub +* +* Description: Subtract vectors of polynomials of length K. +* No modular reduction is performed. +* +* Arguments: - polyveck *w: pointer to output vector +* - const polyveck *u: pointer to first input vector +* - const polyveck *v: pointer to second input vector to be +* subtracted from first input vector +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_poly_sub(&w->vec[i], &u->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyveck_shiftl +* +* Description: Multiply vector of polynomials of Length K by 2^D without modular +* reduction. Assumes input coefficients to be less than 2^{31-D}. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_shiftl(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_poly_shiftl(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyveck_ntt +* +* Description: Forward NTT of all polynomials in vector of length K. Output +* coefficients can be up to 16*Q larger than input coefficients. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_ntt(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_poly_ntt(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyveck_invntt_tomont +* +* Description: Inverse NTT and multiplication by 2^{32} of polynomials +* in vector of length K. Input coefficients need to be less +* than 2*Q. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_invntt_tomont(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_poly_invntt_tomont(&v->vec[i]); + } +} + +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_pointwise_poly_montgomery(polyveck *r, const poly *a, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_poly_pointwise_montgomery(&r->vec[i], a, &v->vec[i]); + } +} + + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyveck_chknorm +* +* Description: Check infinity norm of polynomials in vector of length K. +* Assumes input polyveck to be reduced by PQCLEAN_DILITHIUM3_CLEAN_polyveck_reduce(). +* +* Arguments: - const polyveck *v: pointer to vector +* - int32_t B: norm bound +* +* Returns 0 if norm of all polynomials are strictly smaller than B <= (Q-1)/8 +* and 1 otherwise. +**************************************************/ +int PQCLEAN_DILITHIUM3_CLEAN_polyveck_chknorm(const polyveck *v, int32_t bound) { + unsigned int i; + + for (i = 0; i < K; ++i) { + if (PQCLEAN_DILITHIUM3_CLEAN_poly_chknorm(&v->vec[i], bound)) { + return 1; + } + } + + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyveck_power2round +* +* Description: For all coefficients a of polynomials in vector of length K, +* compute a0, a1 such that a mod^+ Q = a1*2^D + a0 +* with -2^{D-1} < a0 <= 2^{D-1}. Assumes coefficients to be +* standard representatives. +* +* Arguments: - polyveck *v1: pointer to output vector of polynomials with +* coefficients a1 +* - polyveck *v0: pointer to output vector of polynomials with +* coefficients a0 +* - const polyveck *v: pointer to input vector +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_poly_power2round(&v1->vec[i], &v0->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyveck_decompose +* +* Description: For all coefficients a of polynomials in vector of length K, +* compute high and low bits a0, a1 such a mod^+ Q = a1*ALPHA + a0 +* with -ALPHA/2 < a0 <= ALPHA/2 except a1 = (Q-1)/ALPHA where we +* set a1 = 0 and -ALPHA/2 <= a0 = a mod Q - Q < 0. +* Assumes coefficients to be standard representatives. +* +* Arguments: - polyveck *v1: pointer to output vector of polynomials with +* coefficients a1 +* - polyveck *v0: pointer to output vector of polynomials with +* coefficients a0 +* - const polyveck *v: pointer to input vector +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_poly_decompose(&v1->vec[i], &v0->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyveck_make_hint +* +* Description: Compute hint vector. +* +* Arguments: - polyveck *h: pointer to output vector +* - const polyveck *v0: pointer to low part of input vector +* - const polyveck *v1: pointer to high part of input vector +* +* Returns number of 1 bits. +**************************************************/ +unsigned int PQCLEAN_DILITHIUM3_CLEAN_polyveck_make_hint(polyveck *h, + const polyveck *v0, + const polyveck *v1) { + unsigned int i, s = 0; + + for (i = 0; i < K; ++i) { + s += PQCLEAN_DILITHIUM3_CLEAN_poly_make_hint(&h->vec[i], &v0->vec[i], &v1->vec[i]); + } + + return s; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_polyveck_use_hint +* +* Description: Use hint vector to correct the high bits of input vector. +* +* Arguments: - polyveck *w: pointer to output vector of polynomials with +* corrected high bits +* - const polyveck *u: pointer to input vector +* - const polyveck *h: pointer to input hint vector +**************************************************/ +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_use_hint(polyveck *w, const polyveck *u, const polyveck *h) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_poly_use_hint(&w->vec[i], &u->vec[i], &h->vec[i]); + } +} + +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_pack_w1(uint8_t r[K * POLYW1_PACKEDBYTES], const polyveck *w1) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM3_CLEAN_polyw1_pack(&r[i * POLYW1_PACKEDBYTES], &w1->vec[i]); + } +} diff --git a/crypto_sign/dilithium/dilithium3/clean/polyvec.h b/crypto_sign/dilithium/dilithium3/clean/polyvec.h new file mode 100644 index 00000000..be184a7b --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/clean/polyvec.h @@ -0,0 +1,68 @@ +#ifndef PQCLEAN_DILITHIUM3_CLEAN_POLYVEC_H +#define PQCLEAN_DILITHIUM3_CLEAN_POLYVEC_H +#include "params.h" +#include "poly.h" +#include + +/* Vectors of polynomials of length L */ +typedef struct { + poly vec[L]; +} polyvecl; + +void PQCLEAN_DILITHIUM3_CLEAN_polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[SEEDBYTES], uint16_t nonce); + +void PQCLEAN_DILITHIUM3_CLEAN_polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce); + +void PQCLEAN_DILITHIUM3_CLEAN_polyvecl_reduce(polyvecl *v); + +void PQCLEAN_DILITHIUM3_CLEAN_polyvecl_freeze(polyvecl *v); + +void PQCLEAN_DILITHIUM3_CLEAN_polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v); + +void PQCLEAN_DILITHIUM3_CLEAN_polyvecl_ntt(polyvecl *v); +void PQCLEAN_DILITHIUM3_CLEAN_polyvecl_invntt_tomont(polyvecl *v); +void PQCLEAN_DILITHIUM3_CLEAN_polyvecl_pointwise_poly_montgomery(polyvecl *r, const poly *a, const polyvecl *v); +void PQCLEAN_DILITHIUM3_CLEAN_polyvecl_pointwise_acc_montgomery(poly *w, + const polyvecl *u, + const polyvecl *v); + + +int PQCLEAN_DILITHIUM3_CLEAN_polyvecl_chknorm(const polyvecl *v, int32_t B); + + + +/* Vectors of polynomials of length K */ +typedef struct { + poly vec[K]; +} polyveck; + +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_uniform_eta(polyveck *v, const uint8_t seed[SEEDBYTES], uint16_t nonce); + +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_reduce(polyveck *v); +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_caddq(polyveck *v); +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_freeze(polyveck *v); + +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_add(polyveck *w, const polyveck *u, const polyveck *v); +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v); +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_shiftl(polyveck *v); + +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_ntt(polyveck *v); +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_invntt_tomont(polyveck *v); +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_pointwise_poly_montgomery(polyveck *r, const poly *a, const polyveck *v); + +int PQCLEAN_DILITHIUM3_CLEAN_polyveck_chknorm(const polyveck *v, int32_t B); + +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v); +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v); +unsigned int PQCLEAN_DILITHIUM3_CLEAN_polyveck_make_hint(polyveck *h, + const polyveck *v0, + const polyveck *v1); +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_use_hint(polyveck *w, const polyveck *u, const polyveck *h); + +void PQCLEAN_DILITHIUM3_CLEAN_polyveck_pack_w1(uint8_t r[K * POLYW1_PACKEDBYTES], const polyveck *w1); + +void PQCLEAN_DILITHIUM3_CLEAN_polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]); + +void PQCLEAN_DILITHIUM3_CLEAN_polyvec_matrix_pointwise_montgomery(polyveck *t, const polyvecl mat[K], const polyvecl *v); + +#endif diff --git a/crypto_sign/dilithium/dilithium3/clean/reduce.c b/crypto_sign/dilithium/dilithium3/clean/reduce.c new file mode 100644 index 00000000..ded18523 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/clean/reduce.c @@ -0,0 +1,69 @@ +#include "params.h" +#include "reduce.h" +#include + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_montgomery_reduce +* +* Description: For finite field element a with -2^{31}Q <= a <= Q*2^31, +* compute r \equiv a*2^{-32} (mod Q) such that -Q < r < Q. +* +* Arguments: - int64_t: finite field element a +* +* Returns r. +**************************************************/ +int32_t PQCLEAN_DILITHIUM3_CLEAN_montgomery_reduce(int64_t a) { + int32_t t; + + t = (int32_t)((uint64_t)a * (uint64_t)QINV); + t = (a - (int64_t)t * Q) >> 32; + return t; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_reduce32 +* +* Description: For finite field element a with a <= 2^{31} - 2^{22} - 1, +* compute r \equiv a (mod Q) such that -6283009 <= r <= 6283007. +* +* Arguments: - int32_t: finite field element a +* +* Returns r. +**************************************************/ +int32_t PQCLEAN_DILITHIUM3_CLEAN_reduce32(int32_t a) { + int32_t t; + + t = (a + (1 << 22)) >> 23; + t = a - t * Q; + return t; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_caddq +* +* Description: Add Q if input coefficient is negative. +* +* Arguments: - int32_t: finite field element a +* +* Returns r. +**************************************************/ +int32_t PQCLEAN_DILITHIUM3_CLEAN_caddq(int32_t a) { + a += (a >> 31) & Q; + return a; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_freeze +* +* Description: For finite field element a, compute standard +* representative r = a mod^+ Q. +* +* Arguments: - int32_t: finite field element a +* +* Returns r. +**************************************************/ +int32_t PQCLEAN_DILITHIUM3_CLEAN_freeze(int32_t a) { + a = PQCLEAN_DILITHIUM3_CLEAN_reduce32(a); + a = PQCLEAN_DILITHIUM3_CLEAN_caddq(a); + return a; +} diff --git a/crypto_sign/dilithium/dilithium3/clean/reduce.h b/crypto_sign/dilithium/dilithium3/clean/reduce.h new file mode 100644 index 00000000..e3e3ed36 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/clean/reduce.h @@ -0,0 +1,17 @@ +#ifndef PQCLEAN_DILITHIUM3_CLEAN_REDUCE_H +#define PQCLEAN_DILITHIUM3_CLEAN_REDUCE_H +#include "params.h" +#include + +#define MONT (-4186625) // 2^32 % Q +#define QINV 58728449 // q^(-1) mod 2^32 + +int32_t PQCLEAN_DILITHIUM3_CLEAN_montgomery_reduce(int64_t a); + +int32_t PQCLEAN_DILITHIUM3_CLEAN_reduce32(int32_t a); + +int32_t PQCLEAN_DILITHIUM3_CLEAN_caddq(int32_t a); + +int32_t PQCLEAN_DILITHIUM3_CLEAN_freeze(int32_t a); + +#endif diff --git a/crypto_sign/dilithium/dilithium3/clean/rounding.c b/crypto_sign/dilithium/dilithium3/clean/rounding.c new file mode 100644 index 00000000..f0181477 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/clean/rounding.c @@ -0,0 +1,92 @@ +#include "params.h" +#include "rounding.h" +#include + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_power2round +* +* Description: For finite field element a, compute a0, a1 such that +* a mod^+ Q = a1*2^D + a0 with -2^{D-1} < a0 <= 2^{D-1}. +* Assumes a to be standard representative. +* +* Arguments: - int32_t a: input element +* - int32_t *a0: pointer to output element a0 +* +* Returns a1. +**************************************************/ +int32_t PQCLEAN_DILITHIUM3_CLEAN_power2round(int32_t *a0, int32_t a) { + int32_t a1; + + a1 = (a + (1 << (D - 1)) - 1) >> D; + *a0 = a - (a1 << D); + return a1; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_decompose +* +* Description: For finite field element a, compute high and low bits a0, a1 such +* that a mod^+ Q = a1*ALPHA + a0 with -ALPHA/2 < a0 <= ALPHA/2 except +* if a1 = (Q-1)/ALPHA where we set a1 = 0 and +* -ALPHA/2 <= a0 = a mod^+ Q - Q < 0. Assumes a to be standard +* representative. +* +* Arguments: - int32_t a: input element +* - int32_t *a0: pointer to output element a0 +* +* Returns a1. +**************************************************/ +int32_t PQCLEAN_DILITHIUM3_CLEAN_decompose(int32_t *a0, int32_t a) { + int32_t a1; + + a1 = (a + 127) >> 7; + a1 = (a1 * 1025 + (1 << 21)) >> 22; + a1 &= 15; + + *a0 = a - a1 * 2 * GAMMA2; + *a0 -= (((Q - 1) / 2 - *a0) >> 31) & Q; + return a1; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_make_hint +* +* Description: Compute hint bit indicating whether the low bits of the +* input element overflow into the high bits. +* +* Arguments: - int32_t a0: low bits of input element +* - int32_t a1: high bits of input element +* +* Returns 1 if overflow. +**************************************************/ +unsigned int PQCLEAN_DILITHIUM3_CLEAN_make_hint(int32_t a0, int32_t a1) { + if (a0 > GAMMA2 || a0 < -GAMMA2 || (a0 == -GAMMA2 && a1 != 0)) { + return 1; + } + + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_use_hint +* +* Description: Correct high bits according to hint. +* +* Arguments: - int32_t a: input element +* - unsigned int hint: hint bit +* +* Returns corrected high bits. +**************************************************/ +int32_t PQCLEAN_DILITHIUM3_CLEAN_use_hint(int32_t a, unsigned int hint) { + int32_t a0, a1; + + a1 = PQCLEAN_DILITHIUM3_CLEAN_decompose(&a0, a); + if (hint == 0) { + return a1; + } + + if (a0 > 0) { + return (a1 + 1) & 15; + } + return (a1 - 1) & 15; +} diff --git a/crypto_sign/dilithium/dilithium3/clean/rounding.h b/crypto_sign/dilithium/dilithium3/clean/rounding.h new file mode 100644 index 00000000..0cc52cc0 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/clean/rounding.h @@ -0,0 +1,14 @@ +#ifndef PQCLEAN_DILITHIUM3_CLEAN_ROUNDING_H +#define PQCLEAN_DILITHIUM3_CLEAN_ROUNDING_H +#include "params.h" +#include + +int32_t PQCLEAN_DILITHIUM3_CLEAN_power2round(int32_t *a0, int32_t a); + +int32_t PQCLEAN_DILITHIUM3_CLEAN_decompose(int32_t *a0, int32_t a); + +unsigned int PQCLEAN_DILITHIUM3_CLEAN_make_hint(int32_t a0, int32_t a1); + +int32_t PQCLEAN_DILITHIUM3_CLEAN_use_hint(int32_t a, unsigned int hint); + +#endif diff --git a/crypto_sign/dilithium/dilithium3/clean/sign.c b/crypto_sign/dilithium/dilithium3/clean/sign.c new file mode 100644 index 00000000..a585a509 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/clean/sign.c @@ -0,0 +1,343 @@ +#include "fips202.h" +#include "packing.h" +#include "params.h" +#include "poly.h" +#include "polyvec.h" +#include "randombytes.h" +#include "sign.h" +#include "symmetric.h" +#include + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_crypto_sign_keypair +* +* Description: Generates public and private key. +* +* Arguments: - uint8_t *pk: pointer to output public key (allocated +* array of PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key (allocated +* array of PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_SECRETKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int PQCLEAN_DILITHIUM3_CLEAN_crypto_sign_keypair(uint8_t *pk, uint8_t *sk) { + uint8_t seedbuf[3 * SEEDBYTES]; + uint8_t tr[CRHBYTES]; + const uint8_t *rho, *rhoprime, *key; + polyvecl mat[K]; + polyvecl s1, s1hat; + polyveck s2, t1, t0; + + /* Get randomness for rho, rhoprime and key */ + randombytes(seedbuf, SEEDBYTES); + shake256(seedbuf, 3 * SEEDBYTES, seedbuf, SEEDBYTES); + rho = seedbuf; + rhoprime = seedbuf + SEEDBYTES; + key = seedbuf + 2 * SEEDBYTES; + + /* Expand matrix */ + PQCLEAN_DILITHIUM3_CLEAN_polyvec_matrix_expand(mat, rho); + + /* Sample short vectors s1 and s2 */ + PQCLEAN_DILITHIUM3_CLEAN_polyvecl_uniform_eta(&s1, rhoprime, 0); + PQCLEAN_DILITHIUM3_CLEAN_polyveck_uniform_eta(&s2, rhoprime, L); + + /* Matrix-vector multiplication */ + s1hat = s1; + PQCLEAN_DILITHIUM3_CLEAN_polyvecl_ntt(&s1hat); + PQCLEAN_DILITHIUM3_CLEAN_polyvec_matrix_pointwise_montgomery(&t1, mat, &s1hat); + PQCLEAN_DILITHIUM3_CLEAN_polyveck_reduce(&t1); + PQCLEAN_DILITHIUM3_CLEAN_polyveck_invntt_tomont(&t1); + + /* Add error vector s2 */ + PQCLEAN_DILITHIUM3_CLEAN_polyveck_add(&t1, &t1, &s2); + + /* Extract t1 and write public key */ + PQCLEAN_DILITHIUM3_CLEAN_polyveck_caddq(&t1); + PQCLEAN_DILITHIUM3_CLEAN_polyveck_power2round(&t1, &t0, &t1); + PQCLEAN_DILITHIUM3_CLEAN_pack_pk(pk, rho, &t1); + + /* Compute CRH(rho, t1) and write secret key */ + crh(tr, pk, PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_PUBLICKEYBYTES); + PQCLEAN_DILITHIUM3_CLEAN_pack_sk(sk, rho, tr, key, &t0, &s1, &s2); + + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_crypto_sign_signature +* +* Description: Computes signature. +* +* Arguments: - uint8_t *sig: pointer to output signature (of length PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_BYTES) +* - size_t *siglen: pointer to output length of signature +* - uint8_t *m: pointer to message to be signed +* - size_t mlen: length of message +* - uint8_t *sk: pointer to bit-packed secret key +* +* Returns 0 (success) +**************************************************/ +int PQCLEAN_DILITHIUM3_CLEAN_crypto_sign_signature(uint8_t *sig, + size_t *siglen, + const uint8_t *m, + size_t mlen, + const uint8_t *sk) { + unsigned int n; + uint8_t seedbuf[2 * SEEDBYTES + 3 * CRHBYTES]; + uint8_t *rho, *tr, *key, *mu, *rhoprime; + uint16_t nonce = 0; + polyvecl mat[K], s1, y, z; + polyveck t0, s2, w1, w0, h; + poly cp; + shake256incctx state; + + rho = seedbuf; + tr = rho + SEEDBYTES; + key = tr + CRHBYTES; + mu = key + SEEDBYTES; + rhoprime = mu + CRHBYTES; + PQCLEAN_DILITHIUM3_CLEAN_unpack_sk(rho, tr, key, &t0, &s1, &s2, sk); + + /* Compute CRH(tr, msg) */ + shake256_inc_init(&state); + shake256_inc_absorb(&state, tr, CRHBYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(mu, CRHBYTES, &state); + shake256_inc_ctx_release(&state); + + crh(rhoprime, key, SEEDBYTES + CRHBYTES); + + /* Expand matrix and transform vectors */ + PQCLEAN_DILITHIUM3_CLEAN_polyvec_matrix_expand(mat, rho); + PQCLEAN_DILITHIUM3_CLEAN_polyvecl_ntt(&s1); + PQCLEAN_DILITHIUM3_CLEAN_polyveck_ntt(&s2); + PQCLEAN_DILITHIUM3_CLEAN_polyveck_ntt(&t0); + +rej: + /* Sample intermediate vector y */ + PQCLEAN_DILITHIUM3_CLEAN_polyvecl_uniform_gamma1(&y, rhoprime, nonce++); + + /* Matrix-vector multiplication */ + z = y; + PQCLEAN_DILITHIUM3_CLEAN_polyvecl_ntt(&z); + PQCLEAN_DILITHIUM3_CLEAN_polyvec_matrix_pointwise_montgomery(&w1, mat, &z); + PQCLEAN_DILITHIUM3_CLEAN_polyveck_reduce(&w1); + PQCLEAN_DILITHIUM3_CLEAN_polyveck_invntt_tomont(&w1); + + /* Decompose w and call the random oracle */ + PQCLEAN_DILITHIUM3_CLEAN_polyveck_caddq(&w1); + PQCLEAN_DILITHIUM3_CLEAN_polyveck_decompose(&w1, &w0, &w1); + PQCLEAN_DILITHIUM3_CLEAN_polyveck_pack_w1(sig, &w1); + + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, sig, K * POLYW1_PACKEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(sig, SEEDBYTES, &state); + shake256_inc_ctx_release(&state); + PQCLEAN_DILITHIUM3_CLEAN_poly_challenge(&cp, sig); + PQCLEAN_DILITHIUM3_CLEAN_poly_ntt(&cp); + + /* Compute z, reject if it reveals secret */ + PQCLEAN_DILITHIUM3_CLEAN_polyvecl_pointwise_poly_montgomery(&z, &cp, &s1); + PQCLEAN_DILITHIUM3_CLEAN_polyvecl_invntt_tomont(&z); + PQCLEAN_DILITHIUM3_CLEAN_polyvecl_add(&z, &z, &y); + PQCLEAN_DILITHIUM3_CLEAN_polyvecl_reduce(&z); + if (PQCLEAN_DILITHIUM3_CLEAN_polyvecl_chknorm(&z, GAMMA1 - BETA)) { + goto rej; + } + + /* Check that subtracting cs2 does not change high bits of w and low bits + * do not reveal secret information */ + PQCLEAN_DILITHIUM3_CLEAN_polyveck_pointwise_poly_montgomery(&h, &cp, &s2); + PQCLEAN_DILITHIUM3_CLEAN_polyveck_invntt_tomont(&h); + PQCLEAN_DILITHIUM3_CLEAN_polyveck_sub(&w0, &w0, &h); + PQCLEAN_DILITHIUM3_CLEAN_polyveck_reduce(&w0); + if (PQCLEAN_DILITHIUM3_CLEAN_polyveck_chknorm(&w0, GAMMA2 - BETA)) { + goto rej; + } + + /* Compute hints for w1 */ + PQCLEAN_DILITHIUM3_CLEAN_polyveck_pointwise_poly_montgomery(&h, &cp, &t0); + PQCLEAN_DILITHIUM3_CLEAN_polyveck_invntt_tomont(&h); + PQCLEAN_DILITHIUM3_CLEAN_polyveck_reduce(&h); + if (PQCLEAN_DILITHIUM3_CLEAN_polyveck_chknorm(&h, GAMMA2)) { + goto rej; + } + + PQCLEAN_DILITHIUM3_CLEAN_polyveck_add(&w0, &w0, &h); + n = PQCLEAN_DILITHIUM3_CLEAN_polyveck_make_hint(&h, &w0, &w1); + if (n > OMEGA) { + goto rej; + } + + /* Write signature */ + PQCLEAN_DILITHIUM3_CLEAN_pack_sig(sig, sig, &z, &h); + *siglen = PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_BYTES; + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_crypto_sign +* +* Description: Compute signed message. +* +* Arguments: - uint8_t *sm: pointer to output signed message (allocated +* array with PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_BYTES + mlen bytes), +* can be equal to m +* - size_t *smlen: pointer to output length of signed +* message +* - const uint8_t *m: pointer to message to be signed +* - size_t mlen: length of message +* - const uint8_t *sk: pointer to bit-packed secret key +* +* Returns 0 (success) +**************************************************/ +int PQCLEAN_DILITHIUM3_CLEAN_crypto_sign(uint8_t *sm, + size_t *smlen, + const uint8_t *m, + size_t mlen, + const uint8_t *sk) { + size_t i; + + for (i = 0; i < mlen; ++i) { + sm[PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_BYTES + mlen - 1 - i] = m[mlen - 1 - i]; + } + PQCLEAN_DILITHIUM3_CLEAN_crypto_sign_signature(sm, smlen, sm + PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_BYTES, mlen, sk); + *smlen += mlen; + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_crypto_sign_verify +* +* Description: Verifies signature. +* +* Arguments: - uint8_t *m: pointer to input signature +* - size_t siglen: length of signature +* - const uint8_t *m: pointer to message +* - size_t mlen: length of message +* - const uint8_t *pk: pointer to bit-packed public key +* +* Returns 0 if signature could be verified correctly and -1 otherwise +**************************************************/ +int PQCLEAN_DILITHIUM3_CLEAN_crypto_sign_verify(const uint8_t *sig, + size_t siglen, + const uint8_t *m, + size_t mlen, + const uint8_t *pk) { + unsigned int i; + uint8_t buf[K * POLYW1_PACKEDBYTES]; + uint8_t rho[SEEDBYTES]; + uint8_t mu[CRHBYTES]; + uint8_t c[SEEDBYTES]; + uint8_t c2[SEEDBYTES]; + poly cp; + polyvecl mat[K], z; + polyveck t1, w1, h; + shake256incctx state; + + if (siglen != PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_BYTES) { + return -1; + } + + PQCLEAN_DILITHIUM3_CLEAN_unpack_pk(rho, &t1, pk); + if (PQCLEAN_DILITHIUM3_CLEAN_unpack_sig(c, &z, &h, sig)) { + return -1; + } + if (PQCLEAN_DILITHIUM3_CLEAN_polyvecl_chknorm(&z, GAMMA1 - BETA)) { + return -1; + } + + /* Compute CRH(CRH(rho, t1), msg) */ + crh(mu, pk, PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_PUBLICKEYBYTES); + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(mu, CRHBYTES, &state); + shake256_inc_ctx_release(&state); + + /* Matrix-vector multiplication; compute Az - c2^dt1 */ + PQCLEAN_DILITHIUM3_CLEAN_poly_challenge(&cp, c); + PQCLEAN_DILITHIUM3_CLEAN_polyvec_matrix_expand(mat, rho); + + PQCLEAN_DILITHIUM3_CLEAN_polyvecl_ntt(&z); + PQCLEAN_DILITHIUM3_CLEAN_polyvec_matrix_pointwise_montgomery(&w1, mat, &z); + + PQCLEAN_DILITHIUM3_CLEAN_poly_ntt(&cp); + PQCLEAN_DILITHIUM3_CLEAN_polyveck_shiftl(&t1); + PQCLEAN_DILITHIUM3_CLEAN_polyveck_ntt(&t1); + PQCLEAN_DILITHIUM3_CLEAN_polyveck_pointwise_poly_montgomery(&t1, &cp, &t1); + + PQCLEAN_DILITHIUM3_CLEAN_polyveck_sub(&w1, &w1, &t1); + PQCLEAN_DILITHIUM3_CLEAN_polyveck_reduce(&w1); + PQCLEAN_DILITHIUM3_CLEAN_polyveck_invntt_tomont(&w1); + + /* Reconstruct w1 */ + PQCLEAN_DILITHIUM3_CLEAN_polyveck_caddq(&w1); + PQCLEAN_DILITHIUM3_CLEAN_polyveck_use_hint(&w1, &w1, &h); + PQCLEAN_DILITHIUM3_CLEAN_polyveck_pack_w1(buf, &w1); + + /* Call random oracle and verify PQCLEAN_DILITHIUM3_CLEAN_challenge */ + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, buf, K * POLYW1_PACKEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(c2, SEEDBYTES, &state); + shake256_inc_ctx_release(&state); + for (i = 0; i < SEEDBYTES; ++i) { + if (c[i] != c2[i]) { + return -1; + } + } + + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM3_CLEAN_crypto_sign_open +* +* Description: Verify signed message. +* +* Arguments: - uint8_t *m: pointer to output message (allocated +* array with smlen bytes), can be equal to sm +* - size_t *mlen: pointer to output length of message +* - const uint8_t *sm: pointer to signed message +* - size_t smlen: length of signed message +* - const uint8_t *pk: pointer to bit-packed public key +* +* Returns 0 if signed message could be verified correctly and -1 otherwise +**************************************************/ +int PQCLEAN_DILITHIUM3_CLEAN_crypto_sign_open(uint8_t *m, + size_t *mlen, + const uint8_t *sm, + size_t smlen, + const uint8_t *pk) { + size_t i; + + if (smlen < PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_BYTES) { + goto badsig; + } + + *mlen = smlen - PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_BYTES; + if (PQCLEAN_DILITHIUM3_CLEAN_crypto_sign_verify(sm, PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_BYTES, sm + PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_BYTES, *mlen, pk)) { + goto badsig; + } else { + /* All good, copy msg, return 0 */ + for (i = 0; i < *mlen; ++i) { + m[i] = sm[PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_BYTES + i]; + } + return 0; + } + +badsig: + /* Signature verification failed */ + *mlen = (size_t) -1; + for (i = 0; i < smlen; ++i) { + m[i] = 0; + } + + return -1; +} diff --git a/crypto_sign/dilithium/dilithium3/clean/sign.h b/crypto_sign/dilithium/dilithium3/clean/sign.h new file mode 100644 index 00000000..20336537 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/clean/sign.h @@ -0,0 +1,29 @@ +#ifndef PQCLEAN_DILITHIUM3_CLEAN_SIGN_H +#define PQCLEAN_DILITHIUM3_CLEAN_SIGN_H +#include "params.h" +#include "poly.h" +#include "polyvec.h" +#include +#include + +void PQCLEAN_DILITHIUM3_CLEAN_challenge(poly *c, const uint8_t seed[SEEDBYTES]); + +int PQCLEAN_DILITHIUM3_CLEAN_crypto_sign_keypair(uint8_t *pk, uint8_t *sk); + +int PQCLEAN_DILITHIUM3_CLEAN_crypto_sign_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int PQCLEAN_DILITHIUM3_CLEAN_crypto_sign(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int PQCLEAN_DILITHIUM3_CLEAN_crypto_sign_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +int PQCLEAN_DILITHIUM3_CLEAN_crypto_sign_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + +#endif diff --git a/crypto_sign/dilithium/dilithium3/clean/symmetric-shake.c b/crypto_sign/dilithium/dilithium3/clean/symmetric-shake.c new file mode 100644 index 00000000..a09dbd64 --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/clean/symmetric-shake.c @@ -0,0 +1,26 @@ +#include "fips202.h" +#include "params.h" +#include "symmetric.h" +#include + +void PQCLEAN_DILITHIUM3_CLEAN_dilithium_shake128_stream_init(shake128incctx *state, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + uint8_t t[2]; + t[0] = (uint8_t) nonce; + t[1] = (uint8_t) (nonce >> 8); + + shake128_inc_init(state); + shake128_inc_absorb(state, seed, SEEDBYTES); + shake128_inc_absorb(state, t, 2); + shake128_inc_finalize(state); +} + +void PQCLEAN_DILITHIUM3_CLEAN_dilithium_shake256_stream_init(shake256incctx *state, const uint8_t seed[CRHBYTES], uint16_t nonce) { + uint8_t t[2]; + t[0] = (uint8_t) nonce; + t[1] = (uint8_t) (nonce >> 8); + + shake256_inc_init(state); + shake256_inc_absorb(state, seed, CRHBYTES); + shake256_inc_absorb(state, t, 2); + shake256_inc_finalize(state); +} diff --git a/crypto_sign/dilithium/dilithium3/clean/symmetric.h b/crypto_sign/dilithium/dilithium3/clean/symmetric.h new file mode 100644 index 00000000..dd88beef --- /dev/null +++ b/crypto_sign/dilithium/dilithium3/clean/symmetric.h @@ -0,0 +1,36 @@ +#ifndef PQCLEAN_DILITHIUM3_CLEAN_SYMMETRIC_H +#define PQCLEAN_DILITHIUM3_CLEAN_SYMMETRIC_H +#include "fips202.h" +#include "params.h" +#include + + + +typedef shake128incctx stream128_state; +typedef shake256incctx stream256_state; + +void PQCLEAN_DILITHIUM3_CLEAN_dilithium_shake128_stream_init(shake128incctx *state, + const uint8_t seed[SEEDBYTES], + uint16_t nonce); + +void PQCLEAN_DILITHIUM3_CLEAN_dilithium_shake256_stream_init(shake256incctx *state, + const uint8_t seed[CRHBYTES], + uint16_t nonce); + +#define STREAM128_BLOCKBYTES SHAKE128_RATE +#define STREAM256_BLOCKBYTES SHAKE256_RATE + +#define crh(OUT, IN, INBYTES) shake256(OUT, CRHBYTES, IN, INBYTES) +#define stream128_init(STATE, SEED, NONCE) \ + PQCLEAN_DILITHIUM3_CLEAN_dilithium_shake128_stream_init(STATE, SEED, NONCE) +#define stream128_squeezeblocks(OUT, OUTBLOCKS, STATE) \ + shake128_inc_squeeze(OUT, (OUTBLOCKS)*(SHAKE128_RATE), STATE) +#define stream128_release(STATE) shake128_inc_ctx_release(STATE) +#define stream256_init(STATE, SEED, NONCE) \ + PQCLEAN_DILITHIUM3_CLEAN_dilithium_shake256_stream_init(STATE, SEED, NONCE) +#define stream256_squeezeblocks(OUT, OUTBLOCKS, STATE) \ + shake256_inc_squeeze(OUT, (OUTBLOCKS)*(SHAKE256_RATE), STATE) +#define stream256_release(STATE) shake256_inc_ctx_release(STATE) + + +#endif diff --git a/crypto_sign/dilithium/dilithium5/META.yml b/crypto_sign/dilithium/dilithium5/META.yml new file mode 100644 index 00000000..e498d4bc --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/META.yml @@ -0,0 +1,31 @@ +name: Dilithium5 +type: signature +claimed-nist-level: 5 +length-public-key: 2592 +length-secret-key: 4880 +length-signature: 4595 +nistkat-sha256: 1d1ee6fb14b864bcc564ad9c416593b2ee1bf93cd65dfe70d9e400bc66be3229 +testvectors-sha256: 9bc663cbfc1b43cff759cfeddd365b665762bc36e1f1d0777ae1196f59617a70 +principal-submitters: + - Vadim Lyubashevsky +auxiliary-submitters: + - Léo Ducas + - Eike Kiltz + - Tancrède Lepoint + - Peter Schwabe + - Gregor Seiler + - Damien Stehlé +implementations: + - name: clean + version: https://github.com/pq-crystals/dilithium/commit/1e63a1e880401166f105ab44ec67464c9714a315 via https://github.com/jschanck/package-pqclean/tree/b158a891/dilithium + - name: avx2 + version: https://github.com/pq-crystals/dilithium/commit/1e63a1e880401166f105ab44ec67464c9714a315 via https://github.com/jschanck/package-pqclean/tree/b158a891/dilithium + supported_platforms: + - architecture: x86_64 + operating_systems: + - Linux + - Darwin + required_flags: + - aes + - avx2 + - popcnt diff --git a/crypto_sign/dilithium/dilithium5/avx2/LICENSE b/crypto_sign/dilithium/dilithium5/avx2/LICENSE new file mode 100644 index 00000000..08473af7 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/LICENSE @@ -0,0 +1,5 @@ +Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/) + +For Keccak and AES we are using public-domain +code from sources and by authors listed in +comments on top of the respective files. diff --git a/crypto_sign/dilithium/dilithium5/avx2/align.h b/crypto_sign/dilithium/dilithium5/avx2/align.h new file mode 100644 index 00000000..e54cb4a8 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/align.h @@ -0,0 +1,19 @@ +#ifndef PQCLEAN_DILITHIUM5_AVX2_ALIGN_H +#define PQCLEAN_DILITHIUM5_AVX2_ALIGN_H + +#include +#include + +#define ALIGNED_UINT8(N) \ + union { \ + uint8_t coeffs[N]; \ + __m256i vec[((N)+31)/32]; \ + } + +#define ALIGNED_INT32(N) \ + union { \ + int32_t coeffs[N]; \ + __m256i vec[((N)+7)/8]; \ + } + +#endif diff --git a/crypto_sign/dilithium/dilithium5/avx2/api.h b/crypto_sign/dilithium/dilithium5/avx2/api.h new file mode 100644 index 00000000..7586ee9c --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/api.h @@ -0,0 +1,30 @@ +#ifndef PQCLEAN_DILITHIUM5_AVX2_API_H +#define PQCLEAN_DILITHIUM5_AVX2_API_H + +#include +#include + +#define PQCLEAN_DILITHIUM5_AVX2_CRYPTO_PUBLICKEYBYTES 2592 +#define PQCLEAN_DILITHIUM5_AVX2_CRYPTO_SECRETKEYBYTES 4880 +#define PQCLEAN_DILITHIUM5_AVX2_CRYPTO_BYTES 4595 +#define PQCLEAN_DILITHIUM5_AVX2_CRYPTO_ALGNAME "Dilithium5" + +int PQCLEAN_DILITHIUM5_AVX2_crypto_sign_keypair(uint8_t *pk, uint8_t *sk); + +int PQCLEAN_DILITHIUM5_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +int PQCLEAN_DILITHIUM5_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +int PQCLEAN_DILITHIUM5_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +int PQCLEAN_DILITHIUM5_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/dilithium/dilithium5/avx2/cdecl.h b/crypto_sign/dilithium/dilithium5/avx2/cdecl.h new file mode 100644 index 00000000..e961164d --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/cdecl.h @@ -0,0 +1,24 @@ +#ifndef PQCLEAN_DILITHIUM5_AVX2_CDECL_H +#define PQCLEAN_DILITHIUM5_AVX2_CDECL_H + + + +#define _8XQ 0 +#define _8XQINV 8 +#define _8XDIV_QINV 16 +#define _8XDIV 24 +#define _ZETAS_QINV 32 +#define _ZETAS 328 + +/* The C ABI on MacOS exports all symbols with a leading + * underscore. This means that any symbols we refer to from + * C files (functions) can't be found, and all symbols we + * refer to from ASM also can't be found (nttconsts.c). + * + * This define helps us get around this + */ + +#define _cdecl(s) _##s +#define cdecl(s) s + +#endif diff --git a/crypto_sign/dilithium/dilithium5/avx2/consts.c b/crypto_sign/dilithium/dilithium5/avx2/consts.c new file mode 100644 index 00000000..1c4a9828 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/consts.c @@ -0,0 +1,101 @@ +#include "consts.h" +#include "params.h" +#include + +#define QINV 58728449 // q^(-1) mod 2^32 +#define MONT (-4186625) // 2^32 mod q +#define DIV 41978 // mont^2/256 +#define DIV_QINV (-8395782) + +const qdata_t PQCLEAN_DILITHIUM5_AVX2_qdata = {{ +//#define _8XQ 0 + Q, Q, Q, Q, Q, Q, Q, Q, + +//#define _8XQINV 8 + QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV, + +//#define _8XDIV_QINV 16 + DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, DIV_QINV, + +//#define _8XDIV 24 + DIV, DIV, DIV, DIV, DIV, DIV, DIV, DIV, + +//#define _ZETAS_QINV 32 + -151046689, 1830765815, -1929875198, -1927777021, 1640767044, 1477910808, 1612161320, 1640734244, + 308362795, 308362795, 308362795, 308362795, -1815525077, -1815525077, -1815525077, -1815525077, + -1374673747, -1374673747, -1374673747, -1374673747, -1091570561, -1091570561, -1091570561, -1091570561, + -1929495947, -1929495947, -1929495947, -1929495947, 515185417, 515185417, 515185417, 515185417, + -285697463, -285697463, -285697463, -285697463, 625853735, 625853735, 625853735, 625853735, + 1727305304, 1727305304, 2082316400, 2082316400, -1364982364, -1364982364, 858240904, 858240904, + 1806278032, 1806278032, 222489248, 222489248, -346752664, -346752664, 684667771, 684667771, + 1654287830, 1654287830, -878576921, -878576921, -1257667337, -1257667337, -748618600, -748618600, + 329347125, 329347125, 1837364258, 1837364258, -1443016191, -1443016191, -1170414139, -1170414139, + -1846138265, -1631226336, -1404529459, 1838055109, 1594295555, -1076973524, -1898723372, -594436433, + -202001019, -475984260, -561427818, 1797021249, -1061813248, 2059733581, -1661512036, -1104976547, + -1750224323, -901666090, 418987550, 1831915353, -1925356481, 992097815, 879957084, 2024403852, + 1484874664, -1636082790, -285388938, -1983539117, -1495136972, -950076368, -1714807468, -952438995, + -1574918427, 1350681039, -2143979939, 1599739335, -1285853323, -993005454, -1440787840, 568627424, + -783134478, -588790216, 289871779, -1262003603, 2135294594, -1018755525, -889861155, 1665705315, + 1321868265, 1225434135, -1784632064, 666258756, 675310538, -1555941048, -1999506068, -1499481951, + -695180180, -1375177022, 1777179795, 334803717, -178766299, -518252220, 1957047970, 1146323031, + -654783359, -1974159335, 1651689966, 140455867, -1039411342, 1955560694, 1529189038, -2131021878, + -247357819, 1518161567, -86965173, 1708872713, 1787797779, 1638590967, -120646188, -1669960606, + -916321552, 1155548552, 2143745726, 1210558298, -1261461890, -318346816, 628664287, -1729304568, + 1422575624, 1424130038, -1185330464, 235321234, 168022240, 1206536194, 985155484, -894060583, + -898413, -1363460238, -605900043, 2027833504, 14253662, 1014493059, 863641633, 1819892093, + 2124962073, -1223601433, -1920467227, -1637785316, -1536588520, 694382729, 235104446, -1045062172, + 831969619, -300448763, 756955444, -260312805, 1554794072, 1339088280, -2040058690, -853476187, + -2047270596, -1723816713, -1591599803, -440824168, 1119856484, 1544891539, 155290192, -973777462, + 991903578, 912367099, -44694137, 1176904444, -421552614, -818371958, 1747917558, -325927722, + 908452108, 1851023419, -1176751719, -1354528380, -72690498, -314284737, 985022747, 963438279, + -1078959975, 604552167, -1021949428, 608791570, 173440395, -2126092136, -1316619236, -1039370342, + 6087993, -110126092, 565464272, -1758099917, -1600929361, 879867909, -1809756372, 400711272, + 1363007700, 30313375, -326425360, 1683520342, -517299994, 2027935492, -1372618620, 128353682, + -1123881663, 137583815, -635454918, -642772911, 45766801, 671509323, -2070602178, 419615363, + 1216882040, -270590488, -1276805128, 371462360, -1357098057, -384158533, 827959816, -596344473, + 702390549, -279505433, -260424530, -71875110, -1208667171, -1499603926, 2036925262, -540420426, + 746144248, -1420958686, 2032221021, 1904936414, 1257750362, 1926727420, 1931587462, 1258381762, + 885133339, 1629985060, 1967222129, 6363718, -1287922800, 1136965286, 1779436847, 1116720494, + 1042326957, 1405999311, 713994583, 940195359, -1542497137, 2061661095, -883155599, 1726753853, + -1547952704, 394851342, 283780712, 776003547, 1123958025, 201262505, 1934038751, 374860238, + +//#define _ZETAS 328 + -3975713, 25847, -2608894, -518909, 237124, -777960, -876248, 466468, + 1826347, 1826347, 1826347, 1826347, 2353451, 2353451, 2353451, 2353451, + -359251, -359251, -359251, -359251, -2091905, -2091905, -2091905, -2091905, + 3119733, 3119733, 3119733, 3119733, -2884855, -2884855, -2884855, -2884855, + 3111497, 3111497, 3111497, 3111497, 2680103, 2680103, 2680103, 2680103, + 2725464, 2725464, 1024112, 1024112, -1079900, -1079900, 3585928, 3585928, + -549488, -549488, -1119584, -1119584, 2619752, 2619752, -2108549, -2108549, + -2118186, -2118186, -3859737, -3859737, -1399561, -1399561, -3277672, -3277672, + 1757237, 1757237, -19422, -19422, 4010497, 4010497, 280005, 280005, + 2706023, 95776, 3077325, 3530437, -1661693, -3592148, -2537516, 3915439, + -3861115, -3043716, 3574422, -2867647, 3539968, -300467, 2348700, -539299, + -1699267, -1643818, 3505694, -3821735, 3507263, -2140649, -1600420, 3699596, + 811944, 531354, 954230, 3881043, 3900724, -2556880, 2071892, -2797779, + -3930395, -3677745, -1452451, 2176455, -1257611, -4083598, -3190144, -3632928, + 3412210, 2147896, -2967645, -411027, -671102, -22981, -381987, 1852771, + -3343383, 508951, 44288, 904516, -3724342, 1653064, 2389356, 759969, + 189548, 3159746, -2409325, 1315589, 1285669, -812732, -3019102, -3628969, + -1528703, -3041255, 3475950, -1585221, 1939314, -1000202, -3157330, 126922, + -983419, 2715295, -3693493, -2477047, -1228525, -1308169, 1349076, -1430430, + 264944, 3097992, -1100098, 3958618, -8578, -3249728, -210977, -1316856, + -3553272, -1851402, -177440, 1341330, -1584928, -1439742, -3881060, 3839961, + 2091667, -3342478, 266997, -3520352, 900702, 495491, -655327, -3556995, + 342297, 3437287, 2842341, 4055324, -3767016, -2994039, -1333058, -451100, + -1279661, 1500165, -542412, -2584293, -2013608, 1957272, -3183426, 810149, + -3038916, 2213111, -426683, -1667432, -2939036, 183443, -554416, 3937738, + 3407706, 2244091, 2434439, -3759364, 1859098, -1613174, -3122442, -525098, + 286988, -3342277, 2691481, 1247620, 1250494, 1869119, 1237275, 1312455, + 1917081, 777191, -2831860, -3724270, 2432395, 3369112, 162844, 1652634, + 3523897, -975884, 1723600, -1104333, -2235985, -976891, 3919660, 1400424, + 2316500, -2446433, -1235728, -1197226, 909542, -43260, 2031748, -768622, + -2437823, 1735879, -2590150, 2486353, 2635921, 1903435, -3318210, 3306115, + -2546312, 2235880, -1671176, 594136, 2454455, 185531, 1616392, -3694233, + 3866901, 1717735, -1803090, -260646, -420899, 1612842, -48306, -846154, + 3817976, -3562462, 3513181, -3193378, 819034, -522500, 3207046, -3595838, + 4108315, 203044, 1265009, 1595974, -3548272, -1050970, -1430225, -1962642, + -1374803, 3406031, -1846953, -3776993, -164721, -1207385, 3014001, -1799107, + 269760, 472078, 1910376, -3833893, -2286327, -3545687, -1362209, 1976782, + } +}; diff --git a/crypto_sign/dilithium/dilithium5/avx2/consts.h b/crypto_sign/dilithium/dilithium5/avx2/consts.h new file mode 100644 index 00000000..ff251a04 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/consts.h @@ -0,0 +1,10 @@ +#ifndef PQCLEAN_DILITHIUM5_AVX2_CONSTS_H +#define PQCLEAN_DILITHIUM5_AVX2_CONSTS_H +#include "align.h" +#include "cdecl.h" + + +typedef ALIGNED_INT32(624) qdata_t; +extern const qdata_t PQCLEAN_DILITHIUM5_AVX2_qdata; + +#endif diff --git a/crypto_sign/dilithium/dilithium5/avx2/f1600x4.S b/crypto_sign/dilithium/dilithium5/avx2/f1600x4.S new file mode 100644 index 00000000..a21aab88 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/f1600x4.S @@ -0,0 +1,909 @@ +/* Taken from Bas Westerbaan's new 4-way SHAKE implementation + * for Sphincs+ (https://github.com/sphincs/sphincsplus/pull/14/), + * but uses vpshufb for byte-granular rotations as in the Keccak Code Package. */ + +#include "cdecl.h" + +.data +.p2align 5 +rho8: +.byte 7,0,1,2,3,4,5,6,15,8,9,10,11,12,13,14,7,0,1,2,3,4,5,6,15,8,9,10,11,12,13,14 +rho56: +.byte 1,2,3,4,5,6,7,0,9,10,11,12,13,14,15,8,1,2,3,4,5,6,7,0,9,10,11,12,13,14,15,8 + +.text +.global cdecl(PQCLEAN_DILITHIUM5_AVX2_f1600x4) +.global _cdecl(PQCLEAN_DILITHIUM5_AVX2_f1600x4) +cdecl(PQCLEAN_DILITHIUM5_AVX2_f1600x4): +_cdecl(PQCLEAN_DILITHIUM5_AVX2_f1600x4): +vmovdqa rho8(%rip), %ymm0 +movq $6, %rax +looptop: +vmovdqa 0(%rdi), %ymm8 +vmovdqa 32(%rdi), %ymm9 +vmovdqa 64(%rdi), %ymm10 +vmovdqa 96(%rdi), %ymm11 +vmovdqa 128(%rdi), %ymm12 +vpxor 160(%rdi), %ymm8, %ymm8 +vpxor 192(%rdi), %ymm9, %ymm9 +vpxor 224(%rdi), %ymm10, %ymm10 +vpxor 256(%rdi), %ymm11, %ymm11 +vpxor 288(%rdi), %ymm12, %ymm12 +vpxor 320(%rdi), %ymm8, %ymm8 +vpxor 352(%rdi), %ymm9, %ymm9 +vpxor 384(%rdi), %ymm10, %ymm10 +vpxor 416(%rdi), %ymm11, %ymm11 +vpxor 448(%rdi), %ymm12, %ymm12 +vpxor 480(%rdi), %ymm8, %ymm8 +vpxor 512(%rdi), %ymm9, %ymm9 +vpxor 544(%rdi), %ymm10, %ymm10 +vpxor 576(%rdi), %ymm11, %ymm11 +vpxor 608(%rdi), %ymm12, %ymm12 +vpxor 640(%rdi), %ymm8, %ymm8 +vpxor 672(%rdi), %ymm9, %ymm9 +vpxor 704(%rdi), %ymm10, %ymm10 +vpxor 736(%rdi), %ymm11, %ymm11 +vpxor 768(%rdi), %ymm12, %ymm12 +vpsllq $1, %ymm9, %ymm13 +vpsllq $1, %ymm10, %ymm14 +vpsllq $1, %ymm11, %ymm15 +vpsllq $1, %ymm12, %ymm7 +vpsllq $1, %ymm8, %ymm6 +vpsrlq $63, %ymm9, %ymm5 +vpsrlq $63, %ymm10, %ymm4 +vpsrlq $63, %ymm11, %ymm3 +vpsrlq $63, %ymm12, %ymm2 +vpsrlq $63, %ymm8, %ymm1 +vpor %ymm13, %ymm5, %ymm5 +vpor %ymm14, %ymm4, %ymm4 +vpor %ymm15, %ymm3, %ymm3 +vpor %ymm7, %ymm2, %ymm2 +vpor %ymm6, %ymm1, %ymm1 +vpxor %ymm5, %ymm12, %ymm5 +vpxor %ymm4, %ymm8, %ymm4 +vpxor %ymm3, %ymm9, %ymm3 +vpxor %ymm2, %ymm10, %ymm2 +vpxor %ymm1, %ymm11, %ymm1 +vpxor 0(%rdi), %ymm5, %ymm8 +vpxor 192(%rdi), %ymm4, %ymm9 +vpxor 384(%rdi), %ymm3, %ymm10 +vpxor 576(%rdi), %ymm2, %ymm11 +vpxor 768(%rdi), %ymm1, %ymm12 +vpsllq $44, %ymm9, %ymm14 +vpsllq $43, %ymm10, %ymm15 +vpsllq $21, %ymm11, %ymm7 +vpsllq $14, %ymm12, %ymm6 +vpsrlq $20, %ymm9, %ymm9 +vpsrlq $21, %ymm10, %ymm10 +vpsrlq $43, %ymm11, %ymm11 +vpsrlq $50, %ymm12, %ymm12 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vpbroadcastq 0(%rsi), %ymm8 +vpxor %ymm8, %ymm13, %ymm13 +vmovdqa %ymm13, 0(%rdi) +vmovdqa %ymm14, 192(%rdi) +vmovdqa %ymm15, 384(%rdi) +vmovdqa %ymm7, 576(%rdi) +vmovdqa %ymm6, 768(%rdi) +vpxor 96(%rdi), %ymm2, %ymm8 +vpxor 288(%rdi), %ymm1, %ymm9 +vpxor 320(%rdi), %ymm5, %ymm10 +vpxor 512(%rdi), %ymm4, %ymm11 +vpxor 704(%rdi), %ymm3, %ymm12 +vpsllq $28, %ymm8, %ymm13 +vpsllq $20, %ymm9, %ymm14 +vpsllq $3, %ymm10, %ymm15 +vpsllq $45, %ymm11, %ymm7 +vpsllq $61, %ymm12, %ymm6 +vpsrlq $36, %ymm8, %ymm8 +vpsrlq $44, %ymm9, %ymm9 +vpsrlq $61, %ymm10, %ymm10 +vpsrlq $19, %ymm11, %ymm11 +vpsrlq $3, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 320(%rdi) +vmovdqa %ymm14, 512(%rdi) +vmovdqa %ymm15, 704(%rdi) +vmovdqa %ymm7, 96(%rdi) +vmovdqa %ymm6, 288(%rdi) +vpxor 32(%rdi), %ymm4, %ymm8 +vpxor 224(%rdi), %ymm3, %ymm9 +vpxor 416(%rdi), %ymm2, %ymm10 +vpxor 608(%rdi), %ymm1, %ymm11 +vpxor 640(%rdi), %ymm5, %ymm12 +vpsllq $1, %ymm8, %ymm13 +vpsllq $6, %ymm9, %ymm14 +vpsllq $25, %ymm10, %ymm15 +#vpsllq $8, %ymm11, %ymm7 +vpsllq $18, %ymm12, %ymm6 +vpsrlq $63, %ymm8, %ymm8 +vpsrlq $58, %ymm9, %ymm9 +vpsrlq $39, %ymm10, %ymm10 +#vpsrlq $56, %ymm11, %ymm11 +vpsrlq $46, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +#vpor %ymm7, %ymm11, %ymm11 +vpshufb %ymm0, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 640(%rdi) +vmovdqa %ymm14, 32(%rdi) +vmovdqa %ymm15, 224(%rdi) +vmovdqa %ymm7, 416(%rdi) +vmovdqa %ymm6, 608(%rdi) +vpxor 128(%rdi), %ymm1, %ymm8 +vpxor 160(%rdi), %ymm5, %ymm9 +vpxor 352(%rdi), %ymm4, %ymm10 +vpxor 544(%rdi), %ymm3, %ymm11 +vpxor 736(%rdi), %ymm2, %ymm12 +vpsllq $27, %ymm8, %ymm13 +vpsllq $36, %ymm9, %ymm14 +vpsllq $10, %ymm10, %ymm15 +vpsllq $15, %ymm11, %ymm7 +#vpsllq $56, %ymm12, %ymm6 +vpsrlq $37, %ymm8, %ymm8 +vpsrlq $28, %ymm9, %ymm9 +vpsrlq $54, %ymm10, %ymm10 +vpsrlq $49, %ymm11, %ymm11 +#vpsrlq $8, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +#vpor %ymm6, %ymm12, %ymm12 +vpshufb rho56(%rip), %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 160(%rdi) +vmovdqa %ymm14, 352(%rdi) +vmovdqa %ymm15, 544(%rdi) +vmovdqa %ymm7, 736(%rdi) +vmovdqa %ymm6, 128(%rdi) +vpxor 64(%rdi), %ymm3, %ymm8 +vpxor 256(%rdi), %ymm2, %ymm9 +vpxor 448(%rdi), %ymm1, %ymm10 +vpxor 480(%rdi), %ymm5, %ymm11 +vpxor 672(%rdi), %ymm4, %ymm12 +vpsllq $62, %ymm8, %ymm13 +vpsllq $55, %ymm9, %ymm14 +vpsllq $39, %ymm10, %ymm15 +vpsllq $41, %ymm11, %ymm7 +vpsllq $2, %ymm12, %ymm6 +vpsrlq $2, %ymm8, %ymm8 +vpsrlq $9, %ymm9, %ymm9 +vpsrlq $25, %ymm10, %ymm10 +vpsrlq $23, %ymm11, %ymm11 +vpsrlq $62, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 480(%rdi) +vmovdqa %ymm14, 672(%rdi) +vmovdqa %ymm15, 64(%rdi) +vmovdqa %ymm7, 256(%rdi) +vmovdqa %ymm6, 448(%rdi) +vmovdqa 0(%rdi), %ymm8 +vmovdqa 32(%rdi), %ymm9 +vmovdqa 64(%rdi), %ymm10 +vmovdqa 96(%rdi), %ymm11 +vmovdqa 128(%rdi), %ymm12 +vpxor 160(%rdi), %ymm8, %ymm8 +vpxor 192(%rdi), %ymm9, %ymm9 +vpxor 224(%rdi), %ymm10, %ymm10 +vpxor 256(%rdi), %ymm11, %ymm11 +vpxor 288(%rdi), %ymm12, %ymm12 +vpxor 320(%rdi), %ymm8, %ymm8 +vpxor 352(%rdi), %ymm9, %ymm9 +vpxor 384(%rdi), %ymm10, %ymm10 +vpxor 416(%rdi), %ymm11, %ymm11 +vpxor 448(%rdi), %ymm12, %ymm12 +vpxor 480(%rdi), %ymm8, %ymm8 +vpxor 512(%rdi), %ymm9, %ymm9 +vpxor 544(%rdi), %ymm10, %ymm10 +vpxor 576(%rdi), %ymm11, %ymm11 +vpxor 608(%rdi), %ymm12, %ymm12 +vpxor 640(%rdi), %ymm8, %ymm8 +vpxor 672(%rdi), %ymm9, %ymm9 +vpxor 704(%rdi), %ymm10, %ymm10 +vpxor 736(%rdi), %ymm11, %ymm11 +vpxor 768(%rdi), %ymm12, %ymm12 +vpsllq $1, %ymm9, %ymm13 +vpsllq $1, %ymm10, %ymm14 +vpsllq $1, %ymm11, %ymm15 +vpsllq $1, %ymm12, %ymm7 +vpsllq $1, %ymm8, %ymm6 +vpsrlq $63, %ymm9, %ymm5 +vpsrlq $63, %ymm10, %ymm4 +vpsrlq $63, %ymm11, %ymm3 +vpsrlq $63, %ymm12, %ymm2 +vpsrlq $63, %ymm8, %ymm1 +vpor %ymm13, %ymm5, %ymm5 +vpor %ymm14, %ymm4, %ymm4 +vpor %ymm15, %ymm3, %ymm3 +vpor %ymm7, %ymm2, %ymm2 +vpor %ymm6, %ymm1, %ymm1 +vpxor %ymm5, %ymm12, %ymm5 +vpxor %ymm4, %ymm8, %ymm4 +vpxor %ymm3, %ymm9, %ymm3 +vpxor %ymm2, %ymm10, %ymm2 +vpxor %ymm1, %ymm11, %ymm1 +vpxor 0(%rdi), %ymm5, %ymm8 +vpxor 512(%rdi), %ymm4, %ymm9 +vpxor 224(%rdi), %ymm3, %ymm10 +vpxor 736(%rdi), %ymm2, %ymm11 +vpxor 448(%rdi), %ymm1, %ymm12 +vpsllq $44, %ymm9, %ymm14 +vpsllq $43, %ymm10, %ymm15 +vpsllq $21, %ymm11, %ymm7 +vpsllq $14, %ymm12, %ymm6 +vpsrlq $20, %ymm9, %ymm9 +vpsrlq $21, %ymm10, %ymm10 +vpsrlq $43, %ymm11, %ymm11 +vpsrlq $50, %ymm12, %ymm12 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vpbroadcastq 8(%rsi), %ymm8 +vpxor %ymm8, %ymm13, %ymm13 +vmovdqa %ymm13, 0(%rdi) +vmovdqa %ymm14, 512(%rdi) +vmovdqa %ymm15, 224(%rdi) +vmovdqa %ymm7, 736(%rdi) +vmovdqa %ymm6, 448(%rdi) +vpxor 576(%rdi), %ymm2, %ymm8 +vpxor 288(%rdi), %ymm1, %ymm9 +vpxor 640(%rdi), %ymm5, %ymm10 +vpxor 352(%rdi), %ymm4, %ymm11 +vpxor 64(%rdi), %ymm3, %ymm12 +vpsllq $28, %ymm8, %ymm13 +vpsllq $20, %ymm9, %ymm14 +vpsllq $3, %ymm10, %ymm15 +vpsllq $45, %ymm11, %ymm7 +vpsllq $61, %ymm12, %ymm6 +vpsrlq $36, %ymm8, %ymm8 +vpsrlq $44, %ymm9, %ymm9 +vpsrlq $61, %ymm10, %ymm10 +vpsrlq $19, %ymm11, %ymm11 +vpsrlq $3, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 640(%rdi) +vmovdqa %ymm14, 352(%rdi) +vmovdqa %ymm15, 64(%rdi) +vmovdqa %ymm7, 576(%rdi) +vmovdqa %ymm6, 288(%rdi) +vpxor 192(%rdi), %ymm4, %ymm8 +vpxor 704(%rdi), %ymm3, %ymm9 +vpxor 416(%rdi), %ymm2, %ymm10 +vpxor 128(%rdi), %ymm1, %ymm11 +vpxor 480(%rdi), %ymm5, %ymm12 +vpsllq $1, %ymm8, %ymm13 +vpsllq $6, %ymm9, %ymm14 +vpsllq $25, %ymm10, %ymm15 +#vpsllq $8, %ymm11, %ymm7 +vpsllq $18, %ymm12, %ymm6 +vpsrlq $63, %ymm8, %ymm8 +vpsrlq $58, %ymm9, %ymm9 +vpsrlq $39, %ymm10, %ymm10 +#vpsrlq $56, %ymm11, %ymm11 +vpsrlq $46, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +#vpor %ymm7, %ymm11, %ymm11 +vpshufb %ymm0, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 480(%rdi) +vmovdqa %ymm14, 192(%rdi) +vmovdqa %ymm15, 704(%rdi) +vmovdqa %ymm7, 416(%rdi) +vmovdqa %ymm6, 128(%rdi) +vpxor 768(%rdi), %ymm1, %ymm8 +vpxor 320(%rdi), %ymm5, %ymm9 +vpxor 32(%rdi), %ymm4, %ymm10 +vpxor 544(%rdi), %ymm3, %ymm11 +vpxor 256(%rdi), %ymm2, %ymm12 +vpsllq $27, %ymm8, %ymm13 +vpsllq $36, %ymm9, %ymm14 +vpsllq $10, %ymm10, %ymm15 +vpsllq $15, %ymm11, %ymm7 +#vpsllq $56, %ymm12, %ymm6 +vpsrlq $37, %ymm8, %ymm8 +vpsrlq $28, %ymm9, %ymm9 +vpsrlq $54, %ymm10, %ymm10 +vpsrlq $49, %ymm11, %ymm11 +#vpsrlq $8, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +#vpor %ymm6, %ymm12, %ymm12 +vpshufb rho56(%rip), %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 320(%rdi) +vmovdqa %ymm14, 32(%rdi) +vmovdqa %ymm15, 544(%rdi) +vmovdqa %ymm7, 256(%rdi) +vmovdqa %ymm6, 768(%rdi) +vpxor 384(%rdi), %ymm3, %ymm8 +vpxor 96(%rdi), %ymm2, %ymm9 +vpxor 608(%rdi), %ymm1, %ymm10 +vpxor 160(%rdi), %ymm5, %ymm11 +vpxor 672(%rdi), %ymm4, %ymm12 +vpsllq $62, %ymm8, %ymm13 +vpsllq $55, %ymm9, %ymm14 +vpsllq $39, %ymm10, %ymm15 +vpsllq $41, %ymm11, %ymm7 +vpsllq $2, %ymm12, %ymm6 +vpsrlq $2, %ymm8, %ymm8 +vpsrlq $9, %ymm9, %ymm9 +vpsrlq $25, %ymm10, %ymm10 +vpsrlq $23, %ymm11, %ymm11 +vpsrlq $62, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 160(%rdi) +vmovdqa %ymm14, 672(%rdi) +vmovdqa %ymm15, 384(%rdi) +vmovdqa %ymm7, 96(%rdi) +vmovdqa %ymm6, 608(%rdi) +vmovdqa 0(%rdi), %ymm8 +vmovdqa 32(%rdi), %ymm9 +vmovdqa 64(%rdi), %ymm10 +vmovdqa 96(%rdi), %ymm11 +vmovdqa 128(%rdi), %ymm12 +vpxor 160(%rdi), %ymm8, %ymm8 +vpxor 192(%rdi), %ymm9, %ymm9 +vpxor 224(%rdi), %ymm10, %ymm10 +vpxor 256(%rdi), %ymm11, %ymm11 +vpxor 288(%rdi), %ymm12, %ymm12 +vpxor 320(%rdi), %ymm8, %ymm8 +vpxor 352(%rdi), %ymm9, %ymm9 +vpxor 384(%rdi), %ymm10, %ymm10 +vpxor 416(%rdi), %ymm11, %ymm11 +vpxor 448(%rdi), %ymm12, %ymm12 +vpxor 480(%rdi), %ymm8, %ymm8 +vpxor 512(%rdi), %ymm9, %ymm9 +vpxor 544(%rdi), %ymm10, %ymm10 +vpxor 576(%rdi), %ymm11, %ymm11 +vpxor 608(%rdi), %ymm12, %ymm12 +vpxor 640(%rdi), %ymm8, %ymm8 +vpxor 672(%rdi), %ymm9, %ymm9 +vpxor 704(%rdi), %ymm10, %ymm10 +vpxor 736(%rdi), %ymm11, %ymm11 +vpxor 768(%rdi), %ymm12, %ymm12 +vpsllq $1, %ymm9, %ymm13 +vpsllq $1, %ymm10, %ymm14 +vpsllq $1, %ymm11, %ymm15 +vpsllq $1, %ymm12, %ymm7 +vpsllq $1, %ymm8, %ymm6 +vpsrlq $63, %ymm9, %ymm5 +vpsrlq $63, %ymm10, %ymm4 +vpsrlq $63, %ymm11, %ymm3 +vpsrlq $63, %ymm12, %ymm2 +vpsrlq $63, %ymm8, %ymm1 +vpor %ymm13, %ymm5, %ymm5 +vpor %ymm14, %ymm4, %ymm4 +vpor %ymm15, %ymm3, %ymm3 +vpor %ymm7, %ymm2, %ymm2 +vpor %ymm6, %ymm1, %ymm1 +vpxor %ymm5, %ymm12, %ymm5 +vpxor %ymm4, %ymm8, %ymm4 +vpxor %ymm3, %ymm9, %ymm3 +vpxor %ymm2, %ymm10, %ymm2 +vpxor %ymm1, %ymm11, %ymm1 +vpxor 0(%rdi), %ymm5, %ymm8 +vpxor 352(%rdi), %ymm4, %ymm9 +vpxor 704(%rdi), %ymm3, %ymm10 +vpxor 256(%rdi), %ymm2, %ymm11 +vpxor 608(%rdi), %ymm1, %ymm12 +vpsllq $44, %ymm9, %ymm14 +vpsllq $43, %ymm10, %ymm15 +vpsllq $21, %ymm11, %ymm7 +vpsllq $14, %ymm12, %ymm6 +vpsrlq $20, %ymm9, %ymm9 +vpsrlq $21, %ymm10, %ymm10 +vpsrlq $43, %ymm11, %ymm11 +vpsrlq $50, %ymm12, %ymm12 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vpbroadcastq 16(%rsi), %ymm8 +vpxor %ymm8, %ymm13, %ymm13 +vmovdqa %ymm13, 0(%rdi) +vmovdqa %ymm14, 352(%rdi) +vmovdqa %ymm15, 704(%rdi) +vmovdqa %ymm7, 256(%rdi) +vmovdqa %ymm6, 608(%rdi) +vpxor 736(%rdi), %ymm2, %ymm8 +vpxor 288(%rdi), %ymm1, %ymm9 +vpxor 480(%rdi), %ymm5, %ymm10 +vpxor 32(%rdi), %ymm4, %ymm11 +vpxor 384(%rdi), %ymm3, %ymm12 +vpsllq $28, %ymm8, %ymm13 +vpsllq $20, %ymm9, %ymm14 +vpsllq $3, %ymm10, %ymm15 +vpsllq $45, %ymm11, %ymm7 +vpsllq $61, %ymm12, %ymm6 +vpsrlq $36, %ymm8, %ymm8 +vpsrlq $44, %ymm9, %ymm9 +vpsrlq $61, %ymm10, %ymm10 +vpsrlq $19, %ymm11, %ymm11 +vpsrlq $3, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 480(%rdi) +vmovdqa %ymm14, 32(%rdi) +vmovdqa %ymm15, 384(%rdi) +vmovdqa %ymm7, 736(%rdi) +vmovdqa %ymm6, 288(%rdi) +vpxor 512(%rdi), %ymm4, %ymm8 +vpxor 64(%rdi), %ymm3, %ymm9 +vpxor 416(%rdi), %ymm2, %ymm10 +vpxor 768(%rdi), %ymm1, %ymm11 +vpxor 160(%rdi), %ymm5, %ymm12 +vpsllq $1, %ymm8, %ymm13 +vpsllq $6, %ymm9, %ymm14 +vpsllq $25, %ymm10, %ymm15 +#vpsllq $8, %ymm11, %ymm7 +vpsllq $18, %ymm12, %ymm6 +vpsrlq $63, %ymm8, %ymm8 +vpsrlq $58, %ymm9, %ymm9 +vpsrlq $39, %ymm10, %ymm10 +#vpsrlq $56, %ymm11, %ymm11 +vpsrlq $46, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +#vpor %ymm7, %ymm11, %ymm11 +vpshufb %ymm0, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 160(%rdi) +vmovdqa %ymm14, 512(%rdi) +vmovdqa %ymm15, 64(%rdi) +vmovdqa %ymm7, 416(%rdi) +vmovdqa %ymm6, 768(%rdi) +vpxor 448(%rdi), %ymm1, %ymm8 +vpxor 640(%rdi), %ymm5, %ymm9 +vpxor 192(%rdi), %ymm4, %ymm10 +vpxor 544(%rdi), %ymm3, %ymm11 +vpxor 96(%rdi), %ymm2, %ymm12 +vpsllq $27, %ymm8, %ymm13 +vpsllq $36, %ymm9, %ymm14 +vpsllq $10, %ymm10, %ymm15 +vpsllq $15, %ymm11, %ymm7 +#vpsllq $56, %ymm12, %ymm6 +vpsrlq $37, %ymm8, %ymm8 +vpsrlq $28, %ymm9, %ymm9 +vpsrlq $54, %ymm10, %ymm10 +vpsrlq $49, %ymm11, %ymm11 +#vpsrlq $8, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +#vpor %ymm6, %ymm12, %ymm12 +vpshufb rho56(%rip), %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 640(%rdi) +vmovdqa %ymm14, 192(%rdi) +vmovdqa %ymm15, 544(%rdi) +vmovdqa %ymm7, 96(%rdi) +vmovdqa %ymm6, 448(%rdi) +vpxor 224(%rdi), %ymm3, %ymm8 +vpxor 576(%rdi), %ymm2, %ymm9 +vpxor 128(%rdi), %ymm1, %ymm10 +vpxor 320(%rdi), %ymm5, %ymm11 +vpxor 672(%rdi), %ymm4, %ymm12 +vpsllq $62, %ymm8, %ymm13 +vpsllq $55, %ymm9, %ymm14 +vpsllq $39, %ymm10, %ymm15 +vpsllq $41, %ymm11, %ymm7 +vpsllq $2, %ymm12, %ymm6 +vpsrlq $2, %ymm8, %ymm8 +vpsrlq $9, %ymm9, %ymm9 +vpsrlq $25, %ymm10, %ymm10 +vpsrlq $23, %ymm11, %ymm11 +vpsrlq $62, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 320(%rdi) +vmovdqa %ymm14, 672(%rdi) +vmovdqa %ymm15, 224(%rdi) +vmovdqa %ymm7, 576(%rdi) +vmovdqa %ymm6, 128(%rdi) +vmovdqa 0(%rdi), %ymm8 +vmovdqa 32(%rdi), %ymm9 +vmovdqa 64(%rdi), %ymm10 +vmovdqa 96(%rdi), %ymm11 +vmovdqa 128(%rdi), %ymm12 +vpxor 160(%rdi), %ymm8, %ymm8 +vpxor 192(%rdi), %ymm9, %ymm9 +vpxor 224(%rdi), %ymm10, %ymm10 +vpxor 256(%rdi), %ymm11, %ymm11 +vpxor 288(%rdi), %ymm12, %ymm12 +vpxor 320(%rdi), %ymm8, %ymm8 +vpxor 352(%rdi), %ymm9, %ymm9 +vpxor 384(%rdi), %ymm10, %ymm10 +vpxor 416(%rdi), %ymm11, %ymm11 +vpxor 448(%rdi), %ymm12, %ymm12 +vpxor 480(%rdi), %ymm8, %ymm8 +vpxor 512(%rdi), %ymm9, %ymm9 +vpxor 544(%rdi), %ymm10, %ymm10 +vpxor 576(%rdi), %ymm11, %ymm11 +vpxor 608(%rdi), %ymm12, %ymm12 +vpxor 640(%rdi), %ymm8, %ymm8 +vpxor 672(%rdi), %ymm9, %ymm9 +vpxor 704(%rdi), %ymm10, %ymm10 +vpxor 736(%rdi), %ymm11, %ymm11 +vpxor 768(%rdi), %ymm12, %ymm12 +vpsllq $1, %ymm9, %ymm13 +vpsllq $1, %ymm10, %ymm14 +vpsllq $1, %ymm11, %ymm15 +vpsllq $1, %ymm12, %ymm7 +vpsllq $1, %ymm8, %ymm6 +vpsrlq $63, %ymm9, %ymm5 +vpsrlq $63, %ymm10, %ymm4 +vpsrlq $63, %ymm11, %ymm3 +vpsrlq $63, %ymm12, %ymm2 +vpsrlq $63, %ymm8, %ymm1 +vpor %ymm13, %ymm5, %ymm5 +vpor %ymm14, %ymm4, %ymm4 +vpor %ymm15, %ymm3, %ymm3 +vpor %ymm7, %ymm2, %ymm2 +vpor %ymm6, %ymm1, %ymm1 +vpxor %ymm5, %ymm12, %ymm5 +vpxor %ymm4, %ymm8, %ymm4 +vpxor %ymm3, %ymm9, %ymm3 +vpxor %ymm2, %ymm10, %ymm2 +vpxor %ymm1, %ymm11, %ymm1 +vpxor 0(%rdi), %ymm5, %ymm8 +vpxor 32(%rdi), %ymm4, %ymm9 +vpxor 64(%rdi), %ymm3, %ymm10 +vpxor 96(%rdi), %ymm2, %ymm11 +vpxor 128(%rdi), %ymm1, %ymm12 +vpsllq $44, %ymm9, %ymm14 +vpsllq $43, %ymm10, %ymm15 +vpsllq $21, %ymm11, %ymm7 +vpsllq $14, %ymm12, %ymm6 +vpsrlq $20, %ymm9, %ymm9 +vpsrlq $21, %ymm10, %ymm10 +vpsrlq $43, %ymm11, %ymm11 +vpsrlq $50, %ymm12, %ymm12 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vpbroadcastq 24(%rsi), %ymm8 +vpxor %ymm8, %ymm13, %ymm13 +vmovdqa %ymm13, 0(%rdi) +vmovdqa %ymm14, 32(%rdi) +vmovdqa %ymm15, 64(%rdi) +vmovdqa %ymm7, 96(%rdi) +vmovdqa %ymm6, 128(%rdi) +vpxor 256(%rdi), %ymm2, %ymm8 +vpxor 288(%rdi), %ymm1, %ymm9 +vpxor 160(%rdi), %ymm5, %ymm10 +vpxor 192(%rdi), %ymm4, %ymm11 +vpxor 224(%rdi), %ymm3, %ymm12 +vpsllq $28, %ymm8, %ymm13 +vpsllq $20, %ymm9, %ymm14 +vpsllq $3, %ymm10, %ymm15 +vpsllq $45, %ymm11, %ymm7 +vpsllq $61, %ymm12, %ymm6 +vpsrlq $36, %ymm8, %ymm8 +vpsrlq $44, %ymm9, %ymm9 +vpsrlq $61, %ymm10, %ymm10 +vpsrlq $19, %ymm11, %ymm11 +vpsrlq $3, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 160(%rdi) +vmovdqa %ymm14, 192(%rdi) +vmovdqa %ymm15, 224(%rdi) +vmovdqa %ymm7, 256(%rdi) +vmovdqa %ymm6, 288(%rdi) +vpxor 352(%rdi), %ymm4, %ymm8 +vpxor 384(%rdi), %ymm3, %ymm9 +vpxor 416(%rdi), %ymm2, %ymm10 +vpxor 448(%rdi), %ymm1, %ymm11 +vpxor 320(%rdi), %ymm5, %ymm12 +vpsllq $1, %ymm8, %ymm13 +vpsllq $6, %ymm9, %ymm14 +vpsllq $25, %ymm10, %ymm15 +#vpsllq $8, %ymm11, %ymm7 +vpsllq $18, %ymm12, %ymm6 +vpsrlq $63, %ymm8, %ymm8 +vpsrlq $58, %ymm9, %ymm9 +vpsrlq $39, %ymm10, %ymm10 +#vpsrlq $56, %ymm11, %ymm11 +vpsrlq $46, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +#vpor %ymm7, %ymm11, %ymm11 +vpshufb %ymm0, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 320(%rdi) +vmovdqa %ymm14, 352(%rdi) +vmovdqa %ymm15, 384(%rdi) +vmovdqa %ymm7, 416(%rdi) +vmovdqa %ymm6, 448(%rdi) +vpxor 608(%rdi), %ymm1, %ymm8 +vpxor 480(%rdi), %ymm5, %ymm9 +vpxor 512(%rdi), %ymm4, %ymm10 +vpxor 544(%rdi), %ymm3, %ymm11 +vpxor 576(%rdi), %ymm2, %ymm12 +vpsllq $27, %ymm8, %ymm13 +vpsllq $36, %ymm9, %ymm14 +vpsllq $10, %ymm10, %ymm15 +vpsllq $15, %ymm11, %ymm7 +#vpsllq $56, %ymm12, %ymm6 +vpsrlq $37, %ymm8, %ymm8 +vpsrlq $28, %ymm9, %ymm9 +vpsrlq $54, %ymm10, %ymm10 +vpsrlq $49, %ymm11, %ymm11 +#vpsrlq $8, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +#vpor %ymm6, %ymm12, %ymm12 +vpshufb rho56(%rip), %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 480(%rdi) +vmovdqa %ymm14, 512(%rdi) +vmovdqa %ymm15, 544(%rdi) +vmovdqa %ymm7, 576(%rdi) +vmovdqa %ymm6, 608(%rdi) +vpxor 704(%rdi), %ymm3, %ymm8 +vpxor 736(%rdi), %ymm2, %ymm9 +vpxor 768(%rdi), %ymm1, %ymm10 +vpxor 640(%rdi), %ymm5, %ymm11 +vpxor 672(%rdi), %ymm4, %ymm12 +vpsllq $62, %ymm8, %ymm13 +vpsllq $55, %ymm9, %ymm14 +vpsllq $39, %ymm10, %ymm15 +vpsllq $41, %ymm11, %ymm7 +vpsllq $2, %ymm12, %ymm6 +vpsrlq $2, %ymm8, %ymm8 +vpsrlq $9, %ymm9, %ymm9 +vpsrlq $25, %ymm10, %ymm10 +vpsrlq $23, %ymm11, %ymm11 +vpsrlq $62, %ymm12, %ymm12 +vpor %ymm13, %ymm8, %ymm8 +vpor %ymm14, %ymm9, %ymm9 +vpor %ymm15, %ymm10, %ymm10 +vpor %ymm7, %ymm11, %ymm11 +vpor %ymm6, %ymm12, %ymm12 +vpandn %ymm10, %ymm9, %ymm13 +vpandn %ymm11, %ymm10, %ymm14 +vpandn %ymm12, %ymm11, %ymm15 +vpandn %ymm8, %ymm12, %ymm7 +vpandn %ymm9, %ymm8, %ymm6 +vpxor %ymm8, %ymm13, %ymm13 +vpxor %ymm9, %ymm14, %ymm14 +vpxor %ymm10, %ymm15, %ymm15 +vpxor %ymm11, %ymm7, %ymm7 +vpxor %ymm12, %ymm6, %ymm6 +vmovdqa %ymm13, 640(%rdi) +vmovdqa %ymm14, 672(%rdi) +vmovdqa %ymm15, 704(%rdi) +vmovdqa %ymm7, 736(%rdi) +vmovdqa %ymm6, 768(%rdi) +addq $32, %rsi +subq $1, %rax +jnz looptop +ret diff --git a/crypto_sign/dilithium/dilithium5/avx2/fips202x4.c b/crypto_sign/dilithium/dilithium5/avx2/fips202x4.c new file mode 100644 index 00000000..6636b507 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/fips202x4.c @@ -0,0 +1,219 @@ +#include "fips202.h" +#include "fips202x4.h" +#include +#include +#include +#include + +#define NROUNDS 24 + +/* Keccak round constants */ +static const uint64_t KeccakF_RoundConstants[NROUNDS] = { + (uint64_t)0x0000000000000001ULL, + (uint64_t)0x0000000000008082ULL, + (uint64_t)0x800000000000808aULL, + (uint64_t)0x8000000080008000ULL, + (uint64_t)0x000000000000808bULL, + (uint64_t)0x0000000080000001ULL, + (uint64_t)0x8000000080008081ULL, + (uint64_t)0x8000000000008009ULL, + (uint64_t)0x000000000000008aULL, + (uint64_t)0x0000000000000088ULL, + (uint64_t)0x0000000080008009ULL, + (uint64_t)0x000000008000000aULL, + (uint64_t)0x000000008000808bULL, + (uint64_t)0x800000000000008bULL, + (uint64_t)0x8000000000008089ULL, + (uint64_t)0x8000000000008003ULL, + (uint64_t)0x8000000000008002ULL, + (uint64_t)0x8000000000000080ULL, + (uint64_t)0x000000000000800aULL, + (uint64_t)0x800000008000000aULL, + (uint64_t)0x8000000080008081ULL, + (uint64_t)0x8000000000008080ULL, + (uint64_t)0x0000000080000001ULL, + (uint64_t)0x8000000080008008ULL +}; + +static void keccakx4_absorb_once(__m256i s[25], + unsigned int r, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen, + uint8_t p) { + size_t i; + uint64_t pos = 0; + __m256i t, idx; + + for (i = 0; i < 25; ++i) { + s[i] = _mm256_setzero_si256(); + } + + idx = _mm256_set_epi64x((long long)in3, (long long)in2, (long long)in1, (long long)in0); + while (inlen >= r) { + for (i = 0; i < r / 8; ++i) { + t = _mm256_i64gather_epi64((long long *)pos, idx, 1); + s[i] = _mm256_xor_si256(s[i], t); + pos += 8; + } + inlen -= r; + + PQCLEAN_DILITHIUM5_AVX2_f1600x4(s, KeccakF_RoundConstants); + } + + for (i = 0; i < inlen / 8; ++i) { + t = _mm256_i64gather_epi64((long long *)pos, idx, 1); + s[i] = _mm256_xor_si256(s[i], t); + pos += 8; + } + inlen -= 8 * i; + + if (inlen) { + t = _mm256_i64gather_epi64((long long *)pos, idx, 1); + idx = _mm256_set1_epi64x((long long)((1ULL << (8 * inlen)) - 1)); + t = _mm256_and_si256(t, idx); + s[i] = _mm256_xor_si256(s[i], t); + } + + t = _mm256_set1_epi64x((uint64_t)p << 8 * inlen); + s[i] = _mm256_xor_si256(s[i], t); + t = _mm256_set1_epi64x((long long)(1ULL << 63)); + s[r / 8 - 1] = _mm256_xor_si256(s[r / 8 - 1], t); +} + +static void keccakx4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + unsigned int r, + __m256i s[25]) { + unsigned int i; + __m128d t; + + while (nblocks > 0) { + PQCLEAN_DILITHIUM5_AVX2_f1600x4(s, KeccakF_RoundConstants); + for (i = 0; i < r / 8; ++i) { + t = _mm_castsi128_pd(_mm256_castsi256_si128(s[i])); + _mm_storel_pd((double *)&out0[8 * i], t); + _mm_storeh_pd((double *)&out1[8 * i], t); + t = _mm_castsi128_pd(_mm256_extracti128_si256(s[i], 1)); + _mm_storel_pd((double *)&out2[8 * i], t); + _mm_storeh_pd((double *)&out3[8 * i], t); + } + + out0 += r; + out1 += r; + out2 += r; + out3 += r; + --nblocks; + } +} + +void PQCLEAN_DILITHIUM5_AVX2_shake128x4_absorb_once(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen) { + keccakx4_absorb_once(state->s, SHAKE128_RATE, in0, in1, in2, in3, inlen, 0x1F); +} + +void PQCLEAN_DILITHIUM5_AVX2_shake128x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state) { + keccakx4_squeezeblocks(out0, out1, out2, out3, nblocks, SHAKE128_RATE, state->s); +} + +void PQCLEAN_DILITHIUM5_AVX2_shake256x4_absorb_once(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen) { + keccakx4_absorb_once(state->s, SHAKE256_RATE, in0, in1, in2, in3, inlen, 0x1F); +} + +void PQCLEAN_DILITHIUM5_AVX2_shake256x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state) { + keccakx4_squeezeblocks(out0, out1, out2, out3, nblocks, SHAKE256_RATE, state->s); +} + +void PQCLEAN_DILITHIUM5_AVX2_shake128x4(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t outlen, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen) { + unsigned int i; + size_t nblocks = outlen / SHAKE128_RATE; + uint8_t t[4][SHAKE128_RATE]; + keccakx4_state state; + + PQCLEAN_DILITHIUM5_AVX2_shake128x4_absorb_once(&state, in0, in1, in2, in3, inlen); + PQCLEAN_DILITHIUM5_AVX2_shake128x4_squeezeblocks(out0, out1, out2, out3, nblocks, &state); + + out0 += nblocks * SHAKE128_RATE; + out1 += nblocks * SHAKE128_RATE; + out2 += nblocks * SHAKE128_RATE; + out3 += nblocks * SHAKE128_RATE; + outlen -= nblocks * SHAKE128_RATE; + + if (outlen) { + PQCLEAN_DILITHIUM5_AVX2_shake128x4_squeezeblocks(t[0], t[1], t[2], t[3], 1, &state); + for (i = 0; i < outlen; ++i) { + out0[i] = t[0][i]; + out1[i] = t[1][i]; + out2[i] = t[2][i]; + out3[i] = t[3][i]; + } + } +} + +void PQCLEAN_DILITHIUM5_AVX2_shake256x4(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t outlen, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen) { + unsigned int i; + size_t nblocks = outlen / SHAKE256_RATE; + uint8_t t[4][SHAKE256_RATE]; + keccakx4_state state; + + PQCLEAN_DILITHIUM5_AVX2_shake256x4_absorb_once(&state, in0, in1, in2, in3, inlen); + PQCLEAN_DILITHIUM5_AVX2_shake256x4_squeezeblocks(out0, out1, out2, out3, nblocks, &state); + + out0 += nblocks * SHAKE256_RATE; + out1 += nblocks * SHAKE256_RATE; + out2 += nblocks * SHAKE256_RATE; + out3 += nblocks * SHAKE256_RATE; + outlen -= nblocks * SHAKE256_RATE; + + if (outlen) { + PQCLEAN_DILITHIUM5_AVX2_shake256x4_squeezeblocks(t[0], t[1], t[2], t[3], 1, &state); + for (i = 0; i < outlen; ++i) { + out0[i] = t[0][i]; + out1[i] = t[1][i]; + out2[i] = t[2][i]; + out3[i] = t[3][i]; + } + } +} diff --git a/crypto_sign/dilithium/dilithium5/avx2/fips202x4.h b/crypto_sign/dilithium/dilithium5/avx2/fips202x4.h new file mode 100644 index 00000000..826688a3 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/fips202x4.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_DILITHIUM5_AVX2_FIPS202X4_H +#define PQCLEAN_DILITHIUM5_AVX2_FIPS202X4_H + +#include +#include +#include + +typedef struct { + __m256i s[25]; +} keccakx4_state; + +void PQCLEAN_DILITHIUM5_AVX2_f1600x4(__m256i *s, const uint64_t *rc); + +void PQCLEAN_DILITHIUM5_AVX2_shake128x4_absorb_once(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen); + +void PQCLEAN_DILITHIUM5_AVX2_shake128x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state); + +void PQCLEAN_DILITHIUM5_AVX2_shake256x4_absorb_once(keccakx4_state *state, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen); + +void PQCLEAN_DILITHIUM5_AVX2_shake256x4_squeezeblocks(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t nblocks, + keccakx4_state *state); + +void PQCLEAN_DILITHIUM5_AVX2_shake128x4(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t outlen, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen); + +void PQCLEAN_DILITHIUM5_AVX2_shake256x4(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + size_t outlen, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + size_t inlen); + +#endif diff --git a/crypto_sign/dilithium/dilithium5/avx2/invntt.S b/crypto_sign/dilithium/dilithium5/avx2/invntt.S new file mode 100644 index 00000000..0ed38a11 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/invntt.S @@ -0,0 +1,240 @@ +#include "cdecl.h" +.include "shuffle.inc" + +.macro butterfly l,h,zl0=1,zl1=1,zh0=2,zh1=2 +vpsubd %ymm\l,%ymm\h,%ymm12 +vpaddd %ymm\h,%ymm\l,%ymm\l + +vpmuldq %ymm\zl0,%ymm12,%ymm13 +vmovshdup %ymm12,%ymm\h +vpmuldq %ymm\zl1,%ymm\h,%ymm14 + +vpmuldq %ymm\zh0,%ymm12,%ymm12 +vpmuldq %ymm\zh1,%ymm\h,%ymm\h + +vpmuldq %ymm0,%ymm13,%ymm13 +vpmuldq %ymm0,%ymm14,%ymm14 + +vpsubd %ymm13,%ymm12,%ymm12 +vpsubd %ymm14,%ymm\h,%ymm\h + +vmovshdup %ymm12,%ymm12 +vpblendd $0xAA,%ymm\h,%ymm12,%ymm\h +.endm + +.macro levels0t5 off +vmovdqa 256*\off+ 0(%rdi),%ymm4 +vmovdqa 256*\off+ 32(%rdi),%ymm5 +vmovdqa 256*\off+ 64(%rdi),%ymm6 +vmovdqa 256*\off+ 96(%rdi),%ymm7 +vmovdqa 256*\off+128(%rdi),%ymm8 +vmovdqa 256*\off+160(%rdi),%ymm9 +vmovdqa 256*\off+192(%rdi),%ymm10 +vmovdqa 256*\off+224(%rdi),%ymm11 + +/* level 0 */ +vpermq $0x1B,(_ZETAS_QINV+296-8*\off-8)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+296-8*\off-8)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 4,5,1,3,2,15 + +vpermq $0x1B,(_ZETAS_QINV+296-8*\off-40)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+296-8*\off-40)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 6,7,1,3,2,15 + +vpermq $0x1B,(_ZETAS_QINV+296-8*\off-72)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+296-8*\off-72)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 8,9,1,3,2,15 + +vpermq $0x1B,(_ZETAS_QINV+296-8*\off-104)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+296-8*\off-104)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 10,11,1,3,2,15 + +/* level 1 */ +vpermq $0x1B,(_ZETAS_QINV+168-8*\off-8)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+168-8*\off-8)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 4,6,1,3,2,15 +butterfly 5,7,1,3,2,15 + +vpermq $0x1B,(_ZETAS_QINV+168-8*\off-40)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+168-8*\off-40)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 8,10,1,3,2,15 +butterfly 9,11,1,3,2,15 + +/* level 2 */ +vpermq $0x1B,(_ZETAS_QINV+104-8*\off-8)*4(%rsi),%ymm3 +vpermq $0x1B,(_ZETAS+104-8*\off-8)*4(%rsi),%ymm15 +vmovshdup %ymm3,%ymm1 +vmovshdup %ymm15,%ymm2 +butterfly 4,8,1,3,2,15 +butterfly 5,9,1,3,2,15 +butterfly 6,10,1,3,2,15 +butterfly 7,11,1,3,2,15 + +/* level 3 */ +shuffle2 4,5,3,5 +shuffle2 6,7,4,7 +shuffle2 8,9,6,9 +shuffle2 10,11,8,11 + +vpermq $0x1B,(_ZETAS_QINV+72-8*\off-8)*4(%rsi),%ymm1 +vpermq $0x1B,(_ZETAS+72-8*\off-8)*4(%rsi),%ymm2 +butterfly 3,5 +butterfly 4,7 +butterfly 6,9 +butterfly 8,11 + +/* level 4 */ +shuffle4 3,4,10,4 +shuffle4 6,8,3,8 +shuffle4 5,7,6,7 +shuffle4 9,11,5,11 + +vpermq $0x1B,(_ZETAS_QINV+40-8*\off-8)*4(%rsi),%ymm1 +vpermq $0x1B,(_ZETAS+40-8*\off-8)*4(%rsi),%ymm2 +butterfly 10,4 +butterfly 3,8 +butterfly 6,7 +butterfly 5,11 + +/* level 5 */ +shuffle8 10,3,9,3 +shuffle8 6,5,10,5 +shuffle8 4,8,6,8 +shuffle8 7,11,4,11 + +vpbroadcastd (_ZETAS_QINV+7-\off)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+7-\off)*4(%rsi),%ymm2 +butterfly 9,3 +butterfly 10,5 +butterfly 6,8 +butterfly 4,11 + +vmovdqa %ymm9,256*\off+ 0(%rdi) +vmovdqa %ymm10,256*\off+ 32(%rdi) +vmovdqa %ymm6,256*\off+ 64(%rdi) +vmovdqa %ymm4,256*\off+ 96(%rdi) +vmovdqa %ymm3,256*\off+128(%rdi) +vmovdqa %ymm5,256*\off+160(%rdi) +vmovdqa %ymm8,256*\off+192(%rdi) +vmovdqa %ymm11,256*\off+224(%rdi) +.endm + +.macro levels6t7 off +vmovdqa 0+32*\off(%rdi),%ymm4 +vmovdqa 128+32*\off(%rdi),%ymm5 +vmovdqa 256+32*\off(%rdi),%ymm6 +vmovdqa 384+32*\off(%rdi),%ymm7 +vmovdqa 512+32*\off(%rdi),%ymm8 +vmovdqa 640+32*\off(%rdi),%ymm9 +vmovdqa 768+32*\off(%rdi),%ymm10 +vmovdqa 896+32*\off(%rdi),%ymm11 + +/* level 6 */ +vpbroadcastd (_ZETAS_QINV+3)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+3)*4(%rsi),%ymm2 +butterfly 4,6 +butterfly 5,7 + +vpbroadcastd (_ZETAS_QINV+2)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+2)*4(%rsi),%ymm2 +butterfly 8,10 +butterfly 9,11 + +/* level 7 */ +vpbroadcastd (_ZETAS_QINV+0)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+0)*4(%rsi),%ymm2 + +butterfly 4,8 +butterfly 5,9 +butterfly 6,10 +butterfly 7,11 + +vmovdqa %ymm8,512+32*\off(%rdi) +vmovdqa %ymm9,640+32*\off(%rdi) +vmovdqa %ymm10,768+32*\off(%rdi) +vmovdqa %ymm11,896+32*\off(%rdi) + +vmovdqa (_8XDIV_QINV)*4(%rsi),%ymm1 +vmovdqa (_8XDIV)*4(%rsi),%ymm2 +vpmuldq %ymm1,%ymm4,%ymm12 +vpmuldq %ymm1,%ymm5,%ymm13 +vmovshdup %ymm4,%ymm8 +vmovshdup %ymm5,%ymm9 +vpmuldq %ymm1,%ymm8,%ymm14 +vpmuldq %ymm1,%ymm9,%ymm15 +vpmuldq %ymm2,%ymm4,%ymm4 +vpmuldq %ymm2,%ymm5,%ymm5 +vpmuldq %ymm2,%ymm8,%ymm8 +vpmuldq %ymm2,%ymm9,%ymm9 +vpmuldq %ymm0,%ymm12,%ymm12 +vpmuldq %ymm0,%ymm13,%ymm13 +vpmuldq %ymm0,%ymm14,%ymm14 +vpmuldq %ymm0,%ymm15,%ymm15 +vpsubd %ymm12,%ymm4,%ymm4 +vpsubd %ymm13,%ymm5,%ymm5 +vpsubd %ymm14,%ymm8,%ymm8 +vpsubd %ymm15,%ymm9,%ymm9 +vmovshdup %ymm4,%ymm4 +vmovshdup %ymm5,%ymm5 +vpblendd $0xAA,%ymm8,%ymm4,%ymm4 +vpblendd $0xAA,%ymm9,%ymm5,%ymm5 + +vpmuldq %ymm1,%ymm6,%ymm12 +vpmuldq %ymm1,%ymm7,%ymm13 +vmovshdup %ymm6,%ymm8 +vmovshdup %ymm7,%ymm9 +vpmuldq %ymm1,%ymm8,%ymm14 +vpmuldq %ymm1,%ymm9,%ymm15 +vpmuldq %ymm2,%ymm6,%ymm6 +vpmuldq %ymm2,%ymm7,%ymm7 +vpmuldq %ymm2,%ymm8,%ymm8 +vpmuldq %ymm2,%ymm9,%ymm9 +vpmuldq %ymm0,%ymm12,%ymm12 +vpmuldq %ymm0,%ymm13,%ymm13 +vpmuldq %ymm0,%ymm14,%ymm14 +vpmuldq %ymm0,%ymm15,%ymm15 +vpsubd %ymm12,%ymm6,%ymm6 +vpsubd %ymm13,%ymm7,%ymm7 +vpsubd %ymm14,%ymm8,%ymm8 +vpsubd %ymm15,%ymm9,%ymm9 +vmovshdup %ymm6,%ymm6 +vmovshdup %ymm7,%ymm7 +vpblendd $0xAA,%ymm8,%ymm6,%ymm6 +vpblendd $0xAA,%ymm9,%ymm7,%ymm7 + +vmovdqa %ymm4, 0+32*\off(%rdi) +vmovdqa %ymm5,128+32*\off(%rdi) +vmovdqa %ymm6,256+32*\off(%rdi) +vmovdqa %ymm7,384+32*\off(%rdi) +.endm + +.text +.global cdecl(PQCLEAN_DILITHIUM5_AVX2_invntt_avx) +.global _cdecl(PQCLEAN_DILITHIUM5_AVX2_invntt_avx) +cdecl(PQCLEAN_DILITHIUM5_AVX2_invntt_avx): +_cdecl(PQCLEAN_DILITHIUM5_AVX2_invntt_avx): +vmovdqa _8XQ*4(%rsi),%ymm0 + +levels0t5 0 +levels0t5 1 +levels0t5 2 +levels0t5 3 + +levels6t7 0 +levels6t7 1 +levels6t7 2 +levels6t7 3 + +ret diff --git a/crypto_sign/dilithium/dilithium5/avx2/ntt.S b/crypto_sign/dilithium/dilithium5/avx2/ntt.S new file mode 100644 index 00000000..971cc84b --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/ntt.S @@ -0,0 +1,199 @@ +#include "cdecl.h" +.include "shuffle.inc" + +.macro butterfly l,h,zl0=1,zl1=1,zh0=2,zh1=2 +vpmuldq %ymm\zl0,%ymm\h,%ymm13 +vmovshdup %ymm\h,%ymm12 +vpmuldq %ymm\zl1,%ymm12,%ymm14 + +vpmuldq %ymm\zh0,%ymm\h,%ymm\h +vpmuldq %ymm\zh1,%ymm12,%ymm12 + +vpmuldq %ymm0,%ymm13,%ymm13 +vpmuldq %ymm0,%ymm14,%ymm14 + +vmovshdup %ymm\h,%ymm\h +vpblendd $0xAA,%ymm12,%ymm\h,%ymm\h + +vpsubd %ymm\h,%ymm\l,%ymm12 +vpaddd %ymm\h,%ymm\l,%ymm\l + +vmovshdup %ymm13,%ymm13 +vpblendd $0xAA,%ymm14,%ymm13,%ymm13 + +vpaddd %ymm13,%ymm12,%ymm\h +vpsubd %ymm13,%ymm\l,%ymm\l +.endm + +.macro levels0t1 off +/* level 0 */ +vpbroadcastd (_ZETAS_QINV+1)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+1)*4(%rsi),%ymm2 + +vmovdqa 0+32*\off(%rdi),%ymm4 +vmovdqa 128+32*\off(%rdi),%ymm5 +vmovdqa 256+32*\off(%rdi),%ymm6 +vmovdqa 384+32*\off(%rdi),%ymm7 +vmovdqa 512+32*\off(%rdi),%ymm8 +vmovdqa 640+32*\off(%rdi),%ymm9 +vmovdqa 768+32*\off(%rdi),%ymm10 +vmovdqa 896+32*\off(%rdi),%ymm11 + +butterfly 4,8 +butterfly 5,9 +butterfly 6,10 +butterfly 7,11 + +/* level 1 */ +vpbroadcastd (_ZETAS_QINV+2)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+2)*4(%rsi),%ymm2 +butterfly 4,6 +butterfly 5,7 + +vpbroadcastd (_ZETAS_QINV+3)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+3)*4(%rsi),%ymm2 +butterfly 8,10 +butterfly 9,11 + +vmovdqa %ymm4, 0+32*\off(%rdi) +vmovdqa %ymm5,128+32*\off(%rdi) +vmovdqa %ymm6,256+32*\off(%rdi) +vmovdqa %ymm7,384+32*\off(%rdi) +vmovdqa %ymm8,512+32*\off(%rdi) +vmovdqa %ymm9,640+32*\off(%rdi) +vmovdqa %ymm10,768+32*\off(%rdi) +vmovdqa %ymm11,896+32*\off(%rdi) +.endm + +.macro levels2t7 off +/* level 2 */ +vmovdqa 256*\off+ 0(%rdi),%ymm4 +vmovdqa 256*\off+ 32(%rdi),%ymm5 +vmovdqa 256*\off+ 64(%rdi),%ymm6 +vmovdqa 256*\off+ 96(%rdi),%ymm7 +vmovdqa 256*\off+128(%rdi),%ymm8 +vmovdqa 256*\off+160(%rdi),%ymm9 +vmovdqa 256*\off+192(%rdi),%ymm10 +vmovdqa 256*\off+224(%rdi),%ymm11 + +vpbroadcastd (_ZETAS_QINV+4+\off)*4(%rsi),%ymm1 +vpbroadcastd (_ZETAS+4+\off)*4(%rsi),%ymm2 + +butterfly 4,8 +butterfly 5,9 +butterfly 6,10 +butterfly 7,11 + +shuffle8 4,8,3,8 +shuffle8 5,9,4,9 +shuffle8 6,10,5,10 +shuffle8 7,11,6,11 + +/* level 3 */ +vmovdqa (_ZETAS_QINV+8+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+8+8*\off)*4(%rsi),%ymm2 + +butterfly 3,5 +butterfly 8,10 +butterfly 4,6 +butterfly 9,11 + +shuffle4 3,5,7,5 +shuffle4 8,10,3,10 +shuffle4 4,6,8,6 +shuffle4 9,11,4,11 + +/* level 4 */ +vmovdqa (_ZETAS_QINV+40+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+40+8*\off)*4(%rsi),%ymm2 + +butterfly 7,8 +butterfly 5,6 +butterfly 3,4 +butterfly 10,11 + +shuffle2 7,8,9,8 +shuffle2 5,6,7,6 +shuffle2 3,4,5,4 +shuffle2 10,11,3,11 + +/* level 5 */ +vmovdqa (_ZETAS_QINV+72+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+72+8*\off)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 + +butterfly 9,5,1,10,2,15 +butterfly 8,4,1,10,2,15 +butterfly 7,3,1,10,2,15 +butterfly 6,11,1,10,2,15 + +/* level 6 */ +vmovdqa (_ZETAS_QINV+104+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+104+8*\off)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 9,7,1,10,2,15 +butterfly 8,6,1,10,2,15 + +vmovdqa (_ZETAS_QINV+104+8*\off+32)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+104+8*\off+32)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 5,3,1,10,2,15 +butterfly 4,11,1,10,2,15 + +/* level 7 */ +vmovdqa (_ZETAS_QINV+168+8*\off)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+168+8*\off)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 9,8,1,10,2,15 + +vmovdqa (_ZETAS_QINV+168+8*\off+32)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+168+8*\off+32)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 7,6,1,10,2,15 + +vmovdqa (_ZETAS_QINV+168+8*\off+64)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+168+8*\off+64)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 5,4,1,10,2,15 + +vmovdqa (_ZETAS_QINV+168+8*\off+96)*4(%rsi),%ymm1 +vmovdqa (_ZETAS+168+8*\off+96)*4(%rsi),%ymm2 +vpsrlq $32,%ymm1,%ymm10 +vmovshdup %ymm2,%ymm15 +butterfly 3,11,1,10,2,15 + +vmovdqa %ymm9,256*\off+ 0(%rdi) +vmovdqa %ymm8,256*\off+ 32(%rdi) +vmovdqa %ymm7,256*\off+ 64(%rdi) +vmovdqa %ymm6,256*\off+ 96(%rdi) +vmovdqa %ymm5,256*\off+128(%rdi) +vmovdqa %ymm4,256*\off+160(%rdi) +vmovdqa %ymm3,256*\off+192(%rdi) +vmovdqa %ymm11,256*\off+224(%rdi) +.endm + +.text +.global cdecl(PQCLEAN_DILITHIUM5_AVX2_ntt_avx) +.global _cdecl(PQCLEAN_DILITHIUM5_AVX2_ntt_avx) +cdecl(PQCLEAN_DILITHIUM5_AVX2_ntt_avx): +_cdecl(PQCLEAN_DILITHIUM5_AVX2_ntt_avx): +vmovdqa _8XQ*4(%rsi),%ymm0 + +levels0t1 0 +levels0t1 1 +levels0t1 2 +levels0t1 3 + +levels2t7 0 +levels2t7 1 +levels2t7 2 +levels2t7 3 + +ret + diff --git a/crypto_sign/dilithium/dilithium5/avx2/ntt.h b/crypto_sign/dilithium/dilithium5/avx2/ntt.h new file mode 100644 index 00000000..98ac1f21 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/ntt.h @@ -0,0 +1,14 @@ +#ifndef PQCLEAN_DILITHIUM5_AVX2_NTT_H +#define PQCLEAN_DILITHIUM5_AVX2_NTT_H + +#include + +void PQCLEAN_DILITHIUM5_AVX2_ntt_avx(__m256i *a, const __m256i *PQCLEAN_DILITHIUM5_AVX2_qdata); +void PQCLEAN_DILITHIUM5_AVX2_invntt_avx(__m256i *a, const __m256i *PQCLEAN_DILITHIUM5_AVX2_qdata); + +void PQCLEAN_DILITHIUM5_AVX2_nttunpack_avx(__m256i *a); + +void PQCLEAN_DILITHIUM5_AVX2_pointwise_avx(__m256i *c, const __m256i *a, const __m256i *b, const __m256i *PQCLEAN_DILITHIUM5_AVX2_qdata); +void PQCLEAN_DILITHIUM5_AVX2_pointwise_acc_avx(__m256i *c, const __m256i *a, const __m256i *b, const __m256i *PQCLEAN_DILITHIUM5_AVX2_qdata); + +#endif diff --git a/crypto_sign/dilithium/dilithium5/avx2/packing.c b/crypto_sign/dilithium/dilithium5/avx2/packing.c new file mode 100644 index 00000000..2aeec579 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/packing.c @@ -0,0 +1,261 @@ +#include "packing.h" +#include "params.h" +#include "poly.h" +#include "polyvec.h" + + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_pack_pk +* +* Description: Bit-pack public key pk = (rho, t1). +* +* Arguments: - uint8_t pk[]: output byte array +* - const uint8_t rho[]: byte array containing rho +* - const polyveck *t1: pointer to vector t1 +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_pack_pk(uint8_t pk[PQCLEAN_DILITHIUM5_AVX2_CRYPTO_PUBLICKEYBYTES], + const uint8_t rho[SEEDBYTES], + const polyveck *t1) { + unsigned int i; + + for (i = 0; i < SEEDBYTES; ++i) { + pk[i] = rho[i]; + } + pk += SEEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_AVX2_polyt1_pack(pk + i * POLYT1_PACKEDBYTES, &t1->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_unpack_pk +* +* Description: Unpack public key pk = (rho, t1). +* +* Arguments: - const uint8_t rho[]: output byte array for rho +* - const polyveck *t1: pointer to output vector t1 +* - uint8_t pk[]: byte array containing bit-packed pk +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_unpack_pk(uint8_t rho[SEEDBYTES], + polyveck *t1, + const uint8_t pk[PQCLEAN_DILITHIUM5_AVX2_CRYPTO_PUBLICKEYBYTES]) { + unsigned int i; + + for (i = 0; i < SEEDBYTES; ++i) { + rho[i] = pk[i]; + } + pk += SEEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_AVX2_polyt1_unpack(&t1->vec[i], pk + i * POLYT1_PACKEDBYTES); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_pack_sk +* +* Description: Bit-pack secret key sk = (rho, tr, key, t0, s1, s2). +* +* Arguments: - uint8_t sk[]: output byte array +* - const uint8_t rho[]: byte array containing rho +* - const uint8_t tr[]: byte array containing tr +* - const uint8_t key[]: byte array containing key +* - const polyveck *t0: pointer to vector t0 +* - const polyvecl *s1: pointer to vector s1 +* - const polyveck *s2: pointer to vector s2 +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_pack_sk(uint8_t sk[PQCLEAN_DILITHIUM5_AVX2_CRYPTO_SECRETKEYBYTES], + const uint8_t rho[SEEDBYTES], + const uint8_t tr[CRHBYTES], + const uint8_t key[SEEDBYTES], + const polyveck *t0, + const polyvecl *s1, + const polyveck *s2) { + unsigned int i; + + for (i = 0; i < SEEDBYTES; ++i) { + sk[i] = rho[i]; + } + sk += SEEDBYTES; + + for (i = 0; i < SEEDBYTES; ++i) { + sk[i] = key[i]; + } + sk += SEEDBYTES; + + for (i = 0; i < CRHBYTES; ++i) { + sk[i] = tr[i]; + } + sk += CRHBYTES; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM5_AVX2_polyeta_pack(sk + i * POLYETA_PACKEDBYTES, &s1->vec[i]); + } + sk += L * POLYETA_PACKEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_AVX2_polyeta_pack(sk + i * POLYETA_PACKEDBYTES, &s2->vec[i]); + } + sk += K * POLYETA_PACKEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_AVX2_polyt0_pack(sk + i * POLYT0_PACKEDBYTES, &t0->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_unpack_sk +* +* Description: Unpack secret key sk = (rho, tr, key, t0, s1, s2). +* +* Arguments: - const uint8_t rho[]: output byte array for rho +* - const uint8_t tr[]: output byte array for tr +* - const uint8_t key[]: output byte array for key +* - const polyveck *t0: pointer to output vector t0 +* - const polyvecl *s1: pointer to output vector s1 +* - const polyveck *s2: pointer to output vector s2 +* - uint8_t sk[]: byte array containing bit-packed sk +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_unpack_sk(uint8_t rho[SEEDBYTES], + uint8_t tr[CRHBYTES], + uint8_t key[SEEDBYTES], + polyveck *t0, + polyvecl *s1, + polyveck *s2, + const uint8_t sk[PQCLEAN_DILITHIUM5_AVX2_CRYPTO_SECRETKEYBYTES]) { + unsigned int i; + + for (i = 0; i < SEEDBYTES; ++i) { + rho[i] = sk[i]; + } + sk += SEEDBYTES; + + for (i = 0; i < SEEDBYTES; ++i) { + key[i] = sk[i]; + } + sk += SEEDBYTES; + + for (i = 0; i < CRHBYTES; ++i) { + tr[i] = sk[i]; + } + sk += CRHBYTES; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM5_AVX2_polyeta_unpack(&s1->vec[i], sk + i * POLYETA_PACKEDBYTES); + } + sk += L * POLYETA_PACKEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_AVX2_polyeta_unpack(&s2->vec[i], sk + i * POLYETA_PACKEDBYTES); + } + sk += K * POLYETA_PACKEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_AVX2_polyt0_unpack(&t0->vec[i], sk + i * POLYT0_PACKEDBYTES); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_pack_sig +* +* Description: Bit-pack signature sig = (c, z, h). +* +* Arguments: - uint8_t sig[]: output byte array +* - const uint8_t *c: pointer to PQCLEAN_DILITHIUM5_AVX2_challenge hash length SEEDBYTES +* - const polyvecl *z: pointer to vector z +* - const polyveck *h: pointer to hint vector h +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_pack_sig(uint8_t sig[PQCLEAN_DILITHIUM5_AVX2_CRYPTO_BYTES], + const uint8_t c[SEEDBYTES], + const polyvecl *z, + const polyveck *h) { + unsigned int i, j, k; + + for (i = 0; i < SEEDBYTES; ++i) { + sig[i] = c[i]; + } + sig += SEEDBYTES; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM5_AVX2_polyz_pack(sig + i * POLYZ_PACKEDBYTES, &z->vec[i]); + } + sig += L * POLYZ_PACKEDBYTES; + + /* Encode h */ + for (i = 0; i < OMEGA + K; ++i) { + sig[i] = 0; + } + + k = 0; + for (i = 0; i < K; ++i) { + for (j = 0; j < N; ++j) { + if (h->vec[i].coeffs[j] != 0) { + sig[k++] = (uint8_t) j; + } + } + + sig[OMEGA + i] = (uint8_t) k; + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_unpack_sig +* +* Description: Unpack signature sig = (c, z, h). +* +* Arguments: - uint8_t *c: pointer to output PQCLEAN_DILITHIUM5_AVX2_challenge hash +* - polyvecl *z: pointer to output vector z +* - polyveck *h: pointer to output hint vector h +* - const uint8_t sig[]: byte array containing +* bit-packed signature +* +* Returns 1 in case of malformed signature; otherwise 0. +**************************************************/ +int PQCLEAN_DILITHIUM5_AVX2_unpack_sig(uint8_t c[SEEDBYTES], + polyvecl *z, + polyveck *h, + const uint8_t sig[PQCLEAN_DILITHIUM5_AVX2_CRYPTO_BYTES]) { + unsigned int i, j, k; + + for (i = 0; i < SEEDBYTES; ++i) { + c[i] = sig[i]; + } + sig += SEEDBYTES; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM5_AVX2_polyz_unpack(&z->vec[i], sig + i * POLYZ_PACKEDBYTES); + } + sig += L * POLYZ_PACKEDBYTES; + + /* Decode h */ + k = 0; + for (i = 0; i < K; ++i) { + for (j = 0; j < N; ++j) { + h->vec[i].coeffs[j] = 0; + } + + if (sig[OMEGA + i] < k || sig[OMEGA + i] > OMEGA) { + return 1; + } + + for (j = k; j < sig[OMEGA + i]; ++j) { + /* Coefficients are ordered for strong unforgeability */ + if (j > k && sig[j] <= sig[j - 1]) { + return 1; + } + h->vec[i].coeffs[sig[j]] = 1; + } + + k = sig[OMEGA + i]; + } + + /* Extra indices are zero for strong unforgeability */ + for (j = k; j < OMEGA; ++j) { + if (sig[j]) { + return 1; + } + } + + return 0; +} diff --git a/crypto_sign/dilithium/dilithium5/avx2/packing.h b/crypto_sign/dilithium/dilithium5/avx2/packing.h new file mode 100644 index 00000000..d69bac5a --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/packing.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_DILITHIUM5_AVX2_PACKING_H +#define PQCLEAN_DILITHIUM5_AVX2_PACKING_H +#include "params.h" +#include "polyvec.h" +#include + +void PQCLEAN_DILITHIUM5_AVX2_pack_pk(uint8_t pk[PQCLEAN_DILITHIUM5_AVX2_CRYPTO_PUBLICKEYBYTES], const uint8_t rho[SEEDBYTES], const polyveck *t1); + +void PQCLEAN_DILITHIUM5_AVX2_pack_sk(uint8_t sk[PQCLEAN_DILITHIUM5_AVX2_CRYPTO_SECRETKEYBYTES], + const uint8_t rho[SEEDBYTES], + const uint8_t tr[CRHBYTES], + const uint8_t key[SEEDBYTES], + const polyveck *t0, + const polyvecl *s1, + const polyveck *s2); + +void PQCLEAN_DILITHIUM5_AVX2_pack_sig(uint8_t sig[PQCLEAN_DILITHIUM5_AVX2_CRYPTO_BYTES], const uint8_t c[SEEDBYTES], const polyvecl *z, const polyveck *h); + +void PQCLEAN_DILITHIUM5_AVX2_unpack_pk(uint8_t rho[SEEDBYTES], polyveck *t1, const uint8_t pk[PQCLEAN_DILITHIUM5_AVX2_CRYPTO_PUBLICKEYBYTES]); + +void PQCLEAN_DILITHIUM5_AVX2_unpack_sk(uint8_t rho[SEEDBYTES], + uint8_t tr[CRHBYTES], + uint8_t key[SEEDBYTES], + polyveck *t0, + polyvecl *s1, + polyveck *s2, + const uint8_t sk[PQCLEAN_DILITHIUM5_AVX2_CRYPTO_SECRETKEYBYTES]); + +int PQCLEAN_DILITHIUM5_AVX2_unpack_sig(uint8_t c[SEEDBYTES], polyvecl *z, polyveck *h, const uint8_t sig[PQCLEAN_DILITHIUM5_AVX2_CRYPTO_BYTES]); + +#endif diff --git a/crypto_sign/dilithium/dilithium5/avx2/params.h b/crypto_sign/dilithium/dilithium5/avx2/params.h new file mode 100644 index 00000000..70ddfabd --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/params.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_DILITHIUM5_AVX2_PARAMS_H +#define PQCLEAN_DILITHIUM5_AVX2_PARAMS_H + + + +#define SEEDBYTES 32 +#define CRHBYTES 48 +#define N 256 +#define Q 8380417 +#define D 13 +#define ROOT_OF_UNITY 1753 + +#define K 8 +#define L 7 +#define ETA 2 +#define TAU 60 +#define BETA 120 +#define GAMMA1 (1 << 19) +#define GAMMA2 ((Q-1)/32) +#define OMEGA 75 +#define PQCLEAN_DILITHIUM5_AVX2_CRYPTO_ALGNAME "Dilithium5" + + +#define POLYT1_PACKEDBYTES 320 +#define POLYT0_PACKEDBYTES 416 +#define POLYVECH_PACKEDBYTES (OMEGA + K) + +#define POLYZ_PACKEDBYTES 640 + +#define POLYW1_PACKEDBYTES 128 + +#define POLYETA_PACKEDBYTES 96 + +#define PQCLEAN_DILITHIUM5_AVX2_CRYPTO_PUBLICKEYBYTES (SEEDBYTES + K*POLYT1_PACKEDBYTES) +#define PQCLEAN_DILITHIUM5_AVX2_CRYPTO_SECRETKEYBYTES (2*SEEDBYTES + CRHBYTES \ + + L*POLYETA_PACKEDBYTES \ + + K*POLYETA_PACKEDBYTES \ + + K*POLYT0_PACKEDBYTES) +#define PQCLEAN_DILITHIUM5_AVX2_CRYPTO_BYTES (SEEDBYTES + L*POLYZ_PACKEDBYTES + POLYVECH_PACKEDBYTES) + +#endif diff --git a/crypto_sign/dilithium/dilithium5/avx2/pointwise.S b/crypto_sign/dilithium/dilithium5/avx2/pointwise.S new file mode 100644 index 00000000..73c0589e --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/pointwise.S @@ -0,0 +1,205 @@ +#include "params.h" +#include "cdecl.h" + +.text +.global cdecl(PQCLEAN_DILITHIUM5_AVX2_pointwise_avx) +.global _cdecl(PQCLEAN_DILITHIUM5_AVX2_pointwise_avx) +cdecl(PQCLEAN_DILITHIUM5_AVX2_pointwise_avx): +_cdecl(PQCLEAN_DILITHIUM5_AVX2_pointwise_avx): +#consts +vmovdqa _8XQINV*4(%rcx),%ymm0 +vmovdqa _8XQ*4(%rcx),%ymm1 + +xor %eax,%eax +_looptop1: +#load +vmovdqa (%rsi),%ymm2 +vmovdqa 32(%rsi),%ymm4 +vmovdqa 64(%rsi),%ymm6 +vmovdqa (%rdx),%ymm10 +vmovdqa 32(%rdx),%ymm12 +vmovdqa 64(%rdx),%ymm14 +vpsrlq $32,%ymm2,%ymm3 +vpsrlq $32,%ymm4,%ymm5 +vmovshdup %ymm6,%ymm7 +vpsrlq $32,%ymm10,%ymm11 +vpsrlq $32,%ymm12,%ymm13 +vmovshdup %ymm14,%ymm15 + +#mul +vpmuldq %ymm2,%ymm10,%ymm2 +vpmuldq %ymm3,%ymm11,%ymm3 +vpmuldq %ymm4,%ymm12,%ymm4 +vpmuldq %ymm5,%ymm13,%ymm5 +vpmuldq %ymm6,%ymm14,%ymm6 +vpmuldq %ymm7,%ymm15,%ymm7 + +#reduce +vpmuldq %ymm0,%ymm2,%ymm10 +vpmuldq %ymm0,%ymm3,%ymm11 +vpmuldq %ymm0,%ymm4,%ymm12 +vpmuldq %ymm0,%ymm5,%ymm13 +vpmuldq %ymm0,%ymm6,%ymm14 +vpmuldq %ymm0,%ymm7,%ymm15 +vpmuldq %ymm1,%ymm10,%ymm10 +vpmuldq %ymm1,%ymm11,%ymm11 +vpmuldq %ymm1,%ymm12,%ymm12 +vpmuldq %ymm1,%ymm13,%ymm13 +vpmuldq %ymm1,%ymm14,%ymm14 +vpmuldq %ymm1,%ymm15,%ymm15 +vpsubq %ymm10,%ymm2,%ymm2 +vpsubq %ymm11,%ymm3,%ymm3 +vpsubq %ymm12,%ymm4,%ymm4 +vpsubq %ymm13,%ymm5,%ymm5 +vpsubq %ymm14,%ymm6,%ymm6 +vpsubq %ymm15,%ymm7,%ymm7 +vpsrlq $32,%ymm2,%ymm2 +vpsrlq $32,%ymm4,%ymm4 +vmovshdup %ymm6,%ymm6 + +#store +vpblendd $0xAA,%ymm3,%ymm2,%ymm2 +vpblendd $0xAA,%ymm5,%ymm4,%ymm4 +vpblendd $0xAA,%ymm7,%ymm6,%ymm6 +vmovdqa %ymm2,(%rdi) +vmovdqa %ymm4,32(%rdi) +vmovdqa %ymm6,64(%rdi) + +add $96,%rdi +add $96,%rsi +add $96,%rdx +add $1,%eax +cmp $10,%eax +jb _looptop1 + +vmovdqa (%rsi),%ymm2 +vmovdqa 32(%rsi),%ymm4 +vmovdqa (%rdx),%ymm10 +vmovdqa 32(%rdx),%ymm12 +vpsrlq $32,%ymm2,%ymm3 +vpsrlq $32,%ymm4,%ymm5 +vmovshdup %ymm10,%ymm11 +vmovshdup %ymm12,%ymm13 + +#mul +vpmuldq %ymm2,%ymm10,%ymm2 +vpmuldq %ymm3,%ymm11,%ymm3 +vpmuldq %ymm4,%ymm12,%ymm4 +vpmuldq %ymm5,%ymm13,%ymm5 + +#reduce +vpmuldq %ymm0,%ymm2,%ymm10 +vpmuldq %ymm0,%ymm3,%ymm11 +vpmuldq %ymm0,%ymm4,%ymm12 +vpmuldq %ymm0,%ymm5,%ymm13 +vpmuldq %ymm1,%ymm10,%ymm10 +vpmuldq %ymm1,%ymm11,%ymm11 +vpmuldq %ymm1,%ymm12,%ymm12 +vpmuldq %ymm1,%ymm13,%ymm13 +vpsubq %ymm10,%ymm2,%ymm2 +vpsubq %ymm11,%ymm3,%ymm3 +vpsubq %ymm12,%ymm4,%ymm4 +vpsubq %ymm13,%ymm5,%ymm5 +vpsrlq $32,%ymm2,%ymm2 +vmovshdup %ymm4,%ymm4 + +#store +vpblendd $0x55,%ymm2,%ymm3,%ymm2 +vpblendd $0x55,%ymm4,%ymm5,%ymm4 +vmovdqa %ymm2,(%rdi) +vmovdqa %ymm4,32(%rdi) + +ret + +.macro pointwise off +#load +vmovdqa \off(%rsi),%ymm6 +vmovdqa \off+32(%rsi),%ymm8 +vmovdqa \off(%rdx),%ymm10 +vmovdqa \off+32(%rdx),%ymm12 +vpsrlq $32,%ymm6,%ymm7 +vpsrlq $32,%ymm8,%ymm9 +vmovshdup %ymm10,%ymm11 +vmovshdup %ymm12,%ymm13 + +#mul +vpmuldq %ymm6,%ymm10,%ymm6 +vpmuldq %ymm7,%ymm11,%ymm7 +vpmuldq %ymm8,%ymm12,%ymm8 +vpmuldq %ymm9,%ymm13,%ymm9 +.endm + +.macro acc +vpaddq %ymm6,%ymm2,%ymm2 +vpaddq %ymm7,%ymm3,%ymm3 +vpaddq %ymm8,%ymm4,%ymm4 +vpaddq %ymm9,%ymm5,%ymm5 +.endm + +.global cdecl(PQCLEAN_DILITHIUM5_AVX2_pointwise_acc_avx) +.global _cdecl(PQCLEAN_DILITHIUM5_AVX2_pointwise_acc_avx) +cdecl(PQCLEAN_DILITHIUM5_AVX2_pointwise_acc_avx): +_cdecl(PQCLEAN_DILITHIUM5_AVX2_pointwise_acc_avx): +#consts +vmovdqa _8XQINV*4(%rcx),%ymm0 +vmovdqa _8XQ*4(%rcx),%ymm1 + +xor %eax,%eax +_looptop2: +pointwise 0 + +#mov +vmovdqa %ymm6,%ymm2 +vmovdqa %ymm7,%ymm3 +vmovdqa %ymm8,%ymm4 +vmovdqa %ymm9,%ymm5 + +pointwise 1024 +acc + +pointwise 2048 +acc + +pointwise 3072 +acc + +pointwise 4096 +acc + +pointwise 5120 +acc + +pointwise 6144 +acc + +#reduce +vpmuldq %ymm0,%ymm2,%ymm6 +vpmuldq %ymm0,%ymm3,%ymm7 +vpmuldq %ymm0,%ymm4,%ymm8 +vpmuldq %ymm0,%ymm5,%ymm9 +vpmuldq %ymm1,%ymm6,%ymm6 +vpmuldq %ymm1,%ymm7,%ymm7 +vpmuldq %ymm1,%ymm8,%ymm8 +vpmuldq %ymm1,%ymm9,%ymm9 +vpsubq %ymm6,%ymm2,%ymm2 +vpsubq %ymm7,%ymm3,%ymm3 +vpsubq %ymm8,%ymm4,%ymm4 +vpsubq %ymm9,%ymm5,%ymm5 +vpsrlq $32,%ymm2,%ymm2 +vmovshdup %ymm4,%ymm4 + +#store +vpblendd $0xAA,%ymm3,%ymm2,%ymm2 +vpblendd $0xAA,%ymm5,%ymm4,%ymm4 + +vmovdqa %ymm2,(%rdi) +vmovdqa %ymm4,32(%rdi) + +add $64,%rsi +add $64,%rdx +add $64,%rdi +add $1,%eax +cmp $16,%eax +jb _looptop2 + +ret diff --git a/crypto_sign/dilithium/dilithium5/avx2/poly.c b/crypto_sign/dilithium/dilithium5/avx2/poly.c new file mode 100644 index 00000000..e6bfd3c8 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/poly.c @@ -0,0 +1,1022 @@ +#include "align.h" +#include "consts.h" +#include "fips202x4.h" +#include "ntt.h" +#include "params.h" +#include "poly.h" +#include "rejsample.h" +#include "rounding.h" +#include "symmetric.h" +#include +#include +#include + +#define DBENCH_START() +#define DBENCH_STOP(t) + +#define _mm256_blendv_epi32(a,b,mask) \ + _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(a), \ + _mm256_castsi256_ps(b), \ + _mm256_castsi256_ps(mask))) + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_poly_reduce +* +* Description: Inplace reduction of all coefficients of polynomial to +* representative in [-6283009,6283007]. Assumes input +* coefficients to be at most 2^31 - 2^22 - 1 in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_poly_reduce(poly *a) { + unsigned int i; + __m256i f, g; + const __m256i q = _mm256_load_si256(&PQCLEAN_DILITHIUM5_AVX2_qdata.vec[_8XQ / 8]); + const __m256i off = _mm256_set1_epi32(1 << 22); + DBENCH_START(); + + for (i = 0; i < N / 8; i++) { + f = _mm256_load_si256(&a->vec[i]); + g = _mm256_add_epi32(f, off); + g = _mm256_srai_epi32(g, 23); + g = _mm256_mullo_epi32(g, q); + f = _mm256_sub_epi32(f, g); + _mm256_store_si256(&a->vec[i], f); + } + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: poly_addq +* +* Description: For all coefficients of in/out polynomial add Q if +* coefficient is negative. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_poly_caddq(poly *a) { + unsigned int i; + __m256i f, g; + const __m256i q = _mm256_load_si256(&PQCLEAN_DILITHIUM5_AVX2_qdata.vec[_8XQ / 8]); + const __m256i zero = _mm256_setzero_si256(); + DBENCH_START(); + + for (i = 0; i < N / 8; i++) { + f = _mm256_load_si256(&a->vec[i]); + g = _mm256_blendv_epi32(zero, q, f); + f = _mm256_add_epi32(f, g); + _mm256_store_si256(&a->vec[i], f); + } + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_poly_freeze +* +* Description: Inplace reduction of all coefficients of polynomial to +* positive standard representatives. Assumes input +* coefficients to be at most 2^31 - 2^22 + 1 in +* absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_poly_freeze(poly *a) { + DBENCH_START(); + + PQCLEAN_DILITHIUM5_AVX2_poly_reduce(a); + PQCLEAN_DILITHIUM5_AVX2_poly_caddq(a); + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_poly_add +* +* Description: Add polynomials. No modular reduction is performed. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first summand +* - const poly *b: pointer to second summand +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_poly_add(poly *c, const poly *a, const poly *b) { + unsigned int i; + __m256i f, g; + DBENCH_START(); + + for (i = 0; i < N / 8; i++) { + f = _mm256_load_si256(&a->vec[i]); + g = _mm256_load_si256(&b->vec[i]); + f = _mm256_add_epi32(f, g); + _mm256_store_si256(&c->vec[i], f); + } + + DBENCH_STOP(*tadd); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_poly_sub +* +* Description: Subtract polynomials. No modular reduction is +* performed. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial to be +* subtraced from first input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_poly_sub(poly *c, const poly *a, const poly *b) { + unsigned int i; + __m256i f, g; + DBENCH_START(); + + for (i = 0; i < N / 8; i++) { + f = _mm256_load_si256(&a->vec[i]); + g = _mm256_load_si256(&b->vec[i]); + f = _mm256_sub_epi32(f, g); + _mm256_store_si256(&c->vec[i], f); + } + + DBENCH_STOP(*tadd); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_poly_shiftl +* +* Description: Multiply polynomial by 2^D without modular reduction. Assumes +* input coefficients to be less than 2^{31-D} in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_poly_shiftl(poly *a) { + unsigned int i; + __m256i f; + DBENCH_START(); + + for (i = 0; i < N / 8; i++) { + f = _mm256_load_si256(&a->vec[i]); + f = _mm256_slli_epi32(f, D); + _mm256_store_si256(&a->vec[i], f); + } + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_poly_ntt +* +* Description: Inplace forward NTT. Coefficients can grow by up to +* 8*Q in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_poly_ntt(poly *a) { + DBENCH_START(); + + PQCLEAN_DILITHIUM5_AVX2_ntt_avx(a->vec, PQCLEAN_DILITHIUM5_AVX2_qdata.vec); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_poly_invntt_tomont +* +* Description: Inplace inverse NTT and multiplication by 2^{32}. +* Input coefficients need to be less than Q in absolute +* value and output coefficients are again bounded by Q. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_poly_invntt_tomont(poly *a) { + DBENCH_START(); + + PQCLEAN_DILITHIUM5_AVX2_invntt_avx(a->vec, PQCLEAN_DILITHIUM5_AVX2_qdata.vec); + + DBENCH_STOP(*tmul); +} + +void PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(poly *a) { + DBENCH_START(); + + PQCLEAN_DILITHIUM5_AVX2_nttunpack_avx(a->vec); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_poly_pointwise_montgomery +* +* Description: Pointwise multiplication of polynomials in NTT domain +* representation and multiplication of resulting polynomial +* by 2^{-32}. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_poly_pointwise_montgomery(poly *c, const poly *a, const poly *b) { + DBENCH_START(); + + PQCLEAN_DILITHIUM5_AVX2_pointwise_avx(c->vec, a->vec, b->vec, PQCLEAN_DILITHIUM5_AVX2_qdata.vec); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_poly_power2round +* +* Description: For all coefficients c of the input polynomial, +* compute c0, c1 such that c mod^+ Q = c1*2^D + c0 +* with -2^{D-1} < c0 <= 2^{D-1}. Assumes coefficients to be +* positive standard representatives. +* +* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 +* - poly *a0: pointer to output polynomial with coefficients c0 +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_poly_power2round(poly *a1, poly *a0, const poly *a) { + DBENCH_START(); + + PQCLEAN_DILITHIUM5_AVX2_power2round_avx(a1->vec, a0->vec, a->vec); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_poly_decompose +* +* Description: For all coefficients c of the input polynomial, +* compute high and low bits c0, c1 such c mod^+ Q = c1*ALPHA + c0 +* with -ALPHA/2 < c0 <= ALPHA/2 except if c1 = (Q-1)/ALPHA where we +* set c1 = 0 and -ALPHA/2 <= c0 = c mod Q - Q < 0. +* Assumes coefficients to be positive standard representatives. +* +* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 +* - poly *a0: pointer to output polynomial with coefficients c0 +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_poly_decompose(poly *a1, poly *a0, const poly *a) { + DBENCH_START(); + + PQCLEAN_DILITHIUM5_AVX2_decompose_avx(a1->vec, a0->vec, a->vec); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_poly_make_hint +* +* Description: Compute hint array. The coefficients of which are the +* indices of the coefficients of the input polynomial +* whose low bits overflow into the high bits. +* +* Arguments: - uint8_t *h: pointer to output hint array (preallocated of length N) +* - const poly *a0: pointer to low part of input polynomial +* - const poly *a1: pointer to high part of input polynomial +* +* Returns number of hints, i.e. length of hint array. +**************************************************/ +unsigned int PQCLEAN_DILITHIUM5_AVX2_poly_make_hint(uint8_t hint[N], const poly *a0, const poly *a1) { + unsigned int r; + DBENCH_START(); + + r = PQCLEAN_DILITHIUM5_AVX2_make_hint_avx(hint, a0->vec, a1->vec); + + DBENCH_STOP(*tround); + return r; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_poly_use_hint +* +* Description: Use hint polynomial to correct the high bits of a polynomial. +* +* Arguments: - poly *b: pointer to output polynomial with corrected high bits +* - const poly *a: pointer to input polynomial +* - const poly *h: pointer to input hint polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_poly_use_hint(poly *b, const poly *a, const poly *h) { + DBENCH_START(); + + PQCLEAN_DILITHIUM5_AVX2_use_hint_avx(b->vec, a->vec, h->vec); + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_poly_chknorm +* +* Description: Check infinity norm of polynomial against given bound. +* Assumes input polynomial to be reduced by PQCLEAN_DILITHIUM5_AVX2_poly_reduce(). +* +* Arguments: - const poly *a: pointer to polynomial +* - int32_t B: norm bound +* +* Returns 0 if norm is strictly smaller than B <= (Q-1)/8 and 1 otherwise. +**************************************************/ +int PQCLEAN_DILITHIUM5_AVX2_poly_chknorm(const poly *a, int32_t B) { + unsigned int i; + int r; + __m256i f, t; + const __m256i bound = _mm256_set1_epi32(B - 1); + DBENCH_START(); + + if (B > (Q - 1) / 8) { + return 1; + } + + t = _mm256_setzero_si256(); + for (i = 0; i < N / 8; i++) { + f = _mm256_load_si256(&a->vec[i]); + f = _mm256_abs_epi32(f); + f = _mm256_cmpgt_epi32(f, bound); + t = _mm256_or_si256(t, f); + } + + r = 1 - _mm256_testz_si256(t, t); + DBENCH_STOP(*tsample); + return r; +} + +/************************************************* +* Name: rej_uniform +* +* Description: Sample uniformly random coefficients in [0, Q-1] by +* performing rejection sampling on array of random bytes. +* +* Arguments: - int32_t *a: pointer to output array (allocated) +* - unsigned int len: number of coefficients to be sampled +* - const uint8_t *buf: array of random bytes +* - unsigned int buflen: length of array of random bytes +* +* Returns number of sampled coefficients. Can be smaller than len if not enough +* random bytes were given. +**************************************************/ +static unsigned int rej_uniform(int32_t *a, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) { + unsigned int ctr, pos; + uint32_t t; + DBENCH_START(); + + ctr = pos = 0; + while (ctr < len && pos + 3 <= buflen) { + t = buf[pos++]; + t |= (uint32_t)buf[pos++] << 8; + t |= (uint32_t)buf[pos++] << 16; + t &= 0x7FFFFF; + + if (t < Q) { + a[ctr++] = t; + } + } + + DBENCH_STOP(*tsample); + return ctr; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_poly_uniform +* +* Description: Sample polynomial with uniformly random coefficients +* in [0,Q-1] by performing rejection sampling on the +* output stream of SHAKE256(seed|nonce) or AES256CTR(seed,nonce). +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length SEEDBYTES +* - uint16_t nonce: 2-byte nonce +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_poly_uniform_preinit(poly *a, stream128_state *state) { + unsigned int ctr; + /* PQCLEAN_DILITHIUM5_AVX2_rej_uniform_avx reads up to 8 additional bytes */ + ALIGNED_UINT8(REJ_UNIFORM_BUFLEN + 8) buf; + + stream128_squeezeblocks(buf.coeffs, REJ_UNIFORM_NBLOCKS, state); + ctr = PQCLEAN_DILITHIUM5_AVX2_rej_uniform_avx(a->coeffs, buf.coeffs); + + while (ctr < N) { + /* length of buf is always divisible by 3; hence, no bytes left */ + stream128_squeezeblocks(buf.coeffs, 1, state); + ctr += rej_uniform(a->coeffs + ctr, N - ctr, buf.coeffs, STREAM128_BLOCKBYTES); + } +} + +void PQCLEAN_DILITHIUM5_AVX2_poly_uniform(poly *a, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + stream128_state state; + stream128_init(&state, seed, nonce); + PQCLEAN_DILITHIUM5_AVX2_poly_uniform_preinit(a, &state); + stream128_release(&state); +} + +void PQCLEAN_DILITHIUM5_AVX2_poly_uniform_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[32], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3) { + unsigned int ctr0, ctr1, ctr2, ctr3; + ALIGNED_UINT8(REJ_UNIFORM_BUFLEN + 8) buf[4]; + keccakx4_state state; + __m256i f; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + + buf[0].coeffs[SEEDBYTES + 0] = nonce0; + buf[0].coeffs[SEEDBYTES + 1] = nonce0 >> 8; + buf[1].coeffs[SEEDBYTES + 0] = nonce1; + buf[1].coeffs[SEEDBYTES + 1] = nonce1 >> 8; + buf[2].coeffs[SEEDBYTES + 0] = nonce2; + buf[2].coeffs[SEEDBYTES + 1] = nonce2 >> 8; + buf[3].coeffs[SEEDBYTES + 0] = nonce3; + buf[3].coeffs[SEEDBYTES + 1] = nonce3 >> 8; + + PQCLEAN_DILITHIUM5_AVX2_shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, SEEDBYTES + 2); + PQCLEAN_DILITHIUM5_AVX2_shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_NBLOCKS, &state); + + ctr0 = PQCLEAN_DILITHIUM5_AVX2_rej_uniform_avx(a0->coeffs, buf[0].coeffs); + ctr1 = PQCLEAN_DILITHIUM5_AVX2_rej_uniform_avx(a1->coeffs, buf[1].coeffs); + ctr2 = PQCLEAN_DILITHIUM5_AVX2_rej_uniform_avx(a2->coeffs, buf[2].coeffs); + ctr3 = PQCLEAN_DILITHIUM5_AVX2_rej_uniform_avx(a3->coeffs, buf[3].coeffs); + + while (ctr0 < N || ctr1 < N || ctr2 < N || ctr3 < N) { + PQCLEAN_DILITHIUM5_AVX2_shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); + + ctr0 += rej_uniform(a0->coeffs + ctr0, N - ctr0, buf[0].coeffs, SHAKE128_RATE); + ctr1 += rej_uniform(a1->coeffs + ctr1, N - ctr1, buf[1].coeffs, SHAKE128_RATE); + ctr2 += rej_uniform(a2->coeffs + ctr2, N - ctr2, buf[2].coeffs, SHAKE128_RATE); + ctr3 += rej_uniform(a3->coeffs + ctr3, N - ctr3, buf[3].coeffs, SHAKE128_RATE); + } +} + +/************************************************* +* Name: rej_eta +* +* Description: Sample uniformly random coefficients in [-ETA, ETA] by +* performing rejection sampling on array of random bytes. +* +* Arguments: - int32_t *a: pointer to output array (allocated) +* - unsigned int len: number of coefficients to be sampled +* - const uint8_t *buf: array of random bytes +* - unsigned int buflen: length of array of random bytes +* +* Returns number of sampled coefficients. Can be smaller than len if not enough +* random bytes were given. +**************************************************/ +static unsigned int rej_eta(int32_t *a, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) { + unsigned int ctr, pos; + uint32_t t0, t1; + DBENCH_START(); + + ctr = pos = 0; + while (ctr < len && pos < buflen) { + t0 = buf[pos] & 0x0F; + t1 = buf[pos++] >> 4; + + if (t0 < 15) { + t0 = t0 - (205 * t0 >> 10) * 5; + a[ctr++] = 2 - t0; + } + if (t1 < 15 && ctr < len) { + t1 = t1 - (205 * t1 >> 10) * 5; + a[ctr++] = 2 - t1; + } + } + + DBENCH_STOP(*tsample); + return ctr; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_poly_uniform_eta +* +* Description: Sample polynomial with uniformly random coefficients +* in [-ETA,ETA] by performing rejection sampling using the +* output stream of SHAKE256(seed|nonce) +* or AES256CTR(seed,nonce). +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length SEEDBYTES +* - uint16_t nonce: 2-byte nonce +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_poly_uniform_eta_preinit(poly *a, stream128_state *state) { + unsigned int ctr; + ALIGNED_UINT8(REJ_UNIFORM_BUFLEN * STREAM128_BLOCKBYTES) buf; + + stream128_squeezeblocks(buf.coeffs, REJ_UNIFORM_ETA_NBLOCKS, state); + ctr = PQCLEAN_DILITHIUM5_AVX2_rej_eta_avx(a->coeffs, buf.coeffs); + + while (ctr < N) { + stream128_squeezeblocks(buf.coeffs, 1, state); + ctr += rej_eta(a->coeffs + ctr, N - ctr, buf.coeffs, STREAM128_BLOCKBYTES); + } +} + +void PQCLEAN_DILITHIUM5_AVX2_poly_uniform_eta(poly *a, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + stream128_state state; + stream128_init(&state, seed, nonce); + PQCLEAN_DILITHIUM5_AVX2_poly_uniform_eta_preinit(a, &state); + stream128_release(&state); +} + +void PQCLEAN_DILITHIUM5_AVX2_poly_uniform_eta_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[32], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3) { + unsigned int ctr0, ctr1, ctr2, ctr3; + ALIGNED_UINT8(REJ_UNIFORM_ETA_BUFLEN) buf[4]; + + __m256i f; + keccakx4_state state; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + + buf[0].coeffs[SEEDBYTES + 0] = nonce0; + buf[0].coeffs[SEEDBYTES + 1] = nonce0 >> 8; + buf[1].coeffs[SEEDBYTES + 0] = nonce1; + buf[1].coeffs[SEEDBYTES + 1] = nonce1 >> 8; + buf[2].coeffs[SEEDBYTES + 0] = nonce2; + buf[2].coeffs[SEEDBYTES + 1] = nonce2 >> 8; + buf[3].coeffs[SEEDBYTES + 0] = nonce3; + buf[3].coeffs[SEEDBYTES + 1] = nonce3 >> 8; + + PQCLEAN_DILITHIUM5_AVX2_shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, SEEDBYTES + 2); + PQCLEAN_DILITHIUM5_AVX2_shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, REJ_UNIFORM_ETA_NBLOCKS, &state); + + ctr0 = PQCLEAN_DILITHIUM5_AVX2_rej_eta_avx(a0->coeffs, buf[0].coeffs); + ctr1 = PQCLEAN_DILITHIUM5_AVX2_rej_eta_avx(a1->coeffs, buf[1].coeffs); + ctr2 = PQCLEAN_DILITHIUM5_AVX2_rej_eta_avx(a2->coeffs, buf[2].coeffs); + ctr3 = PQCLEAN_DILITHIUM5_AVX2_rej_eta_avx(a3->coeffs, buf[3].coeffs); + + while (ctr0 < N || ctr1 < N || ctr2 < N || ctr3 < N) { + PQCLEAN_DILITHIUM5_AVX2_shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); + + ctr0 += rej_eta(a0->coeffs + ctr0, N - ctr0, buf[0].coeffs, SHAKE128_RATE); + ctr1 += rej_eta(a1->coeffs + ctr1, N - ctr1, buf[1].coeffs, SHAKE128_RATE); + ctr2 += rej_eta(a2->coeffs + ctr2, N - ctr2, buf[2].coeffs, SHAKE128_RATE); + ctr3 += rej_eta(a3->coeffs + ctr3, N - ctr3, buf[3].coeffs, SHAKE128_RATE); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_poly_uniform_gamma1 +* +* Description: Sample polynomial with uniformly random coefficients +* in [-(GAMMA1 - 1), GAMMA1] by unpacking output stream +* of SHAKE256(seed|nonce) or AES256CTR(seed,nonce). +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length CRHBYTES +* - uint16_t nonce: 16-bit nonce +**************************************************/ +#define POLY_UNIFORM_GAMMA1_NBLOCKS ((POLYZ_PACKEDBYTES+STREAM256_BLOCKBYTES-1)/STREAM256_BLOCKBYTES) +void PQCLEAN_DILITHIUM5_AVX2_poly_uniform_gamma1_preinit(poly *a, stream256_state *state) { + /* PQCLEAN_DILITHIUM5_AVX2_polyz_unpack reads 14 additional bytes */ + ALIGNED_UINT8(POLY_UNIFORM_GAMMA1_NBLOCKS * STREAM256_BLOCKBYTES + 14) buf; + stream256_squeezeblocks(buf.coeffs, POLY_UNIFORM_GAMMA1_NBLOCKS, state); + PQCLEAN_DILITHIUM5_AVX2_polyz_unpack(a, buf.coeffs); +} + +void PQCLEAN_DILITHIUM5_AVX2_poly_uniform_gamma1(poly *a, const uint8_t seed[CRHBYTES], uint16_t nonce) { + stream256_state state; + stream256_init(&state, seed, nonce); + PQCLEAN_DILITHIUM5_AVX2_poly_uniform_gamma1_preinit(a, &state); + stream256_release(&state); +} + +void PQCLEAN_DILITHIUM5_AVX2_poly_uniform_gamma1_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[48], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3) { + ALIGNED_UINT8(POLY_UNIFORM_GAMMA1_NBLOCKS * STREAM256_BLOCKBYTES + 14) buf[4]; + keccakx4_state state; + __m256i f; + __m128i g; + + f = _mm256_loadu_si256((__m256i *)seed); + _mm256_store_si256(buf[0].vec, f); + _mm256_store_si256(buf[1].vec, f); + _mm256_store_si256(buf[2].vec, f); + _mm256_store_si256(buf[3].vec, f); + g = _mm_loadu_si128((__m128i *)&seed[32]); + _mm_store_si128((__m128i *)&buf[0].vec[1], g); + _mm_store_si128((__m128i *)&buf[1].vec[1], g); + _mm_store_si128((__m128i *)&buf[2].vec[1], g); + _mm_store_si128((__m128i *)&buf[3].vec[1], g); + + buf[0].coeffs[CRHBYTES + 0] = nonce0; + buf[0].coeffs[CRHBYTES + 1] = nonce0 >> 8; + buf[1].coeffs[CRHBYTES + 0] = nonce1; + buf[1].coeffs[CRHBYTES + 1] = nonce1 >> 8; + buf[2].coeffs[CRHBYTES + 0] = nonce2; + buf[2].coeffs[CRHBYTES + 1] = nonce2 >> 8; + buf[3].coeffs[CRHBYTES + 0] = nonce3; + buf[3].coeffs[CRHBYTES + 1] = nonce3 >> 8; + + PQCLEAN_DILITHIUM5_AVX2_shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, CRHBYTES + 2); + PQCLEAN_DILITHIUM5_AVX2_shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, POLY_UNIFORM_GAMMA1_NBLOCKS, &state); + + PQCLEAN_DILITHIUM5_AVX2_polyz_unpack(a0, buf[0].coeffs); + PQCLEAN_DILITHIUM5_AVX2_polyz_unpack(a1, buf[1].coeffs); + PQCLEAN_DILITHIUM5_AVX2_polyz_unpack(a2, buf[2].coeffs); + PQCLEAN_DILITHIUM5_AVX2_polyz_unpack(a3, buf[3].coeffs); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_challenge +* +* Description: Implementation of H. Samples polynomial with TAU nonzero +* coefficients in {-1,1} using the output stream of +* SHAKE256(seed). +* +* Arguments: - poly *c: pointer to output polynomial +* - const uint8_t mu[]: byte array containing seed of length SEEDBYTES +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_poly_challenge(poly *restrict c, const uint8_t seed[SEEDBYTES]) { + unsigned int i, b, pos; + uint64_t signs; + ALIGNED_UINT8(SHAKE256_RATE) buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, seed, SEEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf.coeffs, SHAKE256_RATE, &state); + + memcpy(&signs, buf.coeffs, 8); + pos = 8; + + memset(c->vec, 0, sizeof(poly)); + for (i = N - TAU; i < N; ++i) { + do { + if (pos >= SHAKE256_RATE) { + shake256_inc_squeeze(buf.coeffs, SHAKE256_RATE, &state); + pos = 0; + } + + b = buf.coeffs[pos++]; + } while (b > i); + + c->coeffs[i] = c->coeffs[b]; + c->coeffs[b] = 1 - 2 * (signs & 1); + signs >>= 1; + } + shake256_inc_ctx_release(&state); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyeta_pack +* +* Description: Bit-pack polynomial with coefficients in [-ETA,ETA]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYETA_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_polyeta_pack(uint8_t r[POLYETA_PACKEDBYTES], const poly *restrict a) { + unsigned int i; + uint8_t t[8]; + DBENCH_START(); + + for (i = 0; i < N / 8; ++i) { + t[0] = ETA - a->coeffs[8 * i + 0]; + t[1] = ETA - a->coeffs[8 * i + 1]; + t[2] = ETA - a->coeffs[8 * i + 2]; + t[3] = ETA - a->coeffs[8 * i + 3]; + t[4] = ETA - a->coeffs[8 * i + 4]; + t[5] = ETA - a->coeffs[8 * i + 5]; + t[6] = ETA - a->coeffs[8 * i + 6]; + t[7] = ETA - a->coeffs[8 * i + 7]; + + r[3 * i + 0] = (t[0] >> 0) | (t[1] << 3) | (t[2] << 6); + r[3 * i + 1] = (t[2] >> 2) | (t[3] << 1) | (t[4] << 4) | (t[5] << 7); + r[3 * i + 2] = (t[5] >> 1) | (t[6] << 2) | (t[7] << 5); + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyeta_unpack +* +* Description: Unpack polynomial with coefficients in [-ETA,ETA]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_polyeta_unpack(poly *restrict r, const uint8_t a[POLYETA_PACKEDBYTES]) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 8; ++i) { + r->coeffs[8 * i + 0] = (a[3 * i + 0] >> 0) & 7; + r->coeffs[8 * i + 1] = (a[3 * i + 0] >> 3) & 7; + r->coeffs[8 * i + 2] = ((a[3 * i + 0] >> 6) | (a[3 * i + 1] << 2)) & 7; + r->coeffs[8 * i + 3] = (a[3 * i + 1] >> 1) & 7; + r->coeffs[8 * i + 4] = (a[3 * i + 1] >> 4) & 7; + r->coeffs[8 * i + 5] = ((a[3 * i + 1] >> 7) | (a[3 * i + 2] << 1)) & 7; + r->coeffs[8 * i + 6] = (a[3 * i + 2] >> 2) & 7; + r->coeffs[8 * i + 7] = (a[3 * i + 2] >> 5) & 7; + + r->coeffs[8 * i + 0] = ETA - r->coeffs[8 * i + 0]; + r->coeffs[8 * i + 1] = ETA - r->coeffs[8 * i + 1]; + r->coeffs[8 * i + 2] = ETA - r->coeffs[8 * i + 2]; + r->coeffs[8 * i + 3] = ETA - r->coeffs[8 * i + 3]; + r->coeffs[8 * i + 4] = ETA - r->coeffs[8 * i + 4]; + r->coeffs[8 * i + 5] = ETA - r->coeffs[8 * i + 5]; + r->coeffs[8 * i + 6] = ETA - r->coeffs[8 * i + 6]; + r->coeffs[8 * i + 7] = ETA - r->coeffs[8 * i + 7]; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyt1_pack +* +* Description: Bit-pack polynomial t1 with coefficients fitting in 10 bits. +* Input coefficients are assumed to be positive standard representatives. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYT1_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_polyt1_pack(uint8_t r[POLYT1_PACKEDBYTES], const poly *restrict a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 4; ++i) { + r[5 * i + 0] = (a->coeffs[4 * i + 0] >> 0); + r[5 * i + 1] = (a->coeffs[4 * i + 0] >> 8) | (a->coeffs[4 * i + 1] << 2); + r[5 * i + 2] = (a->coeffs[4 * i + 1] >> 6) | (a->coeffs[4 * i + 2] << 4); + r[5 * i + 3] = (a->coeffs[4 * i + 2] >> 4) | (a->coeffs[4 * i + 3] << 6); + r[5 * i + 4] = (a->coeffs[4 * i + 3] >> 2); + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyt1_unpack +* +* Description: Unpack polynomial t1 with 10-bit coefficients. +* Output coefficients are positive standard representatives. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_polyt1_unpack(poly *restrict r, const uint8_t a[POLYT1_PACKEDBYTES]) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 4; ++i) { + r->coeffs[4 * i + 0] = ((a[5 * i + 0] >> 0) | ((uint32_t)a[5 * i + 1] << 8)) & 0x3FF; + r->coeffs[4 * i + 1] = ((a[5 * i + 1] >> 2) | ((uint32_t)a[5 * i + 2] << 6)) & 0x3FF; + r->coeffs[4 * i + 2] = ((a[5 * i + 2] >> 4) | ((uint32_t)a[5 * i + 3] << 4)) & 0x3FF; + r->coeffs[4 * i + 3] = ((a[5 * i + 3] >> 6) | ((uint32_t)a[5 * i + 4] << 2)) & 0x3FF; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyt0_pack +* +* Description: Bit-pack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYT0_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_polyt0_pack(uint8_t r[POLYT0_PACKEDBYTES], const poly *restrict a) { + unsigned int i; + uint32_t t[8]; + DBENCH_START(); + + for (i = 0; i < N / 8; ++i) { + t[0] = (1 << (D - 1)) - a->coeffs[8 * i + 0]; + t[1] = (1 << (D - 1)) - a->coeffs[8 * i + 1]; + t[2] = (1 << (D - 1)) - a->coeffs[8 * i + 2]; + t[3] = (1 << (D - 1)) - a->coeffs[8 * i + 3]; + t[4] = (1 << (D - 1)) - a->coeffs[8 * i + 4]; + t[5] = (1 << (D - 1)) - a->coeffs[8 * i + 5]; + t[6] = (1 << (D - 1)) - a->coeffs[8 * i + 6]; + t[7] = (1 << (D - 1)) - a->coeffs[8 * i + 7]; + + r[13 * i + 0] = t[0]; + r[13 * i + 1] = t[0] >> 8; + r[13 * i + 1] |= t[1] << 5; + r[13 * i + 2] = t[1] >> 3; + r[13 * i + 3] = t[1] >> 11; + r[13 * i + 3] |= t[2] << 2; + r[13 * i + 4] = t[2] >> 6; + r[13 * i + 4] |= t[3] << 7; + r[13 * i + 5] = t[3] >> 1; + r[13 * i + 6] = t[3] >> 9; + r[13 * i + 6] |= t[4] << 4; + r[13 * i + 7] = t[4] >> 4; + r[13 * i + 8] = t[4] >> 12; + r[13 * i + 8] |= t[5] << 1; + r[13 * i + 9] = t[5] >> 7; + r[13 * i + 9] |= t[6] << 6; + r[13 * i + 10] = t[6] >> 2; + r[13 * i + 11] = t[6] >> 10; + r[13 * i + 11] |= t[7] << 3; + r[13 * i + 12] = t[7] >> 5; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyt0_unpack +* +* Description: Unpack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_polyt0_unpack(poly *restrict r, const uint8_t a[POLYT0_PACKEDBYTES]) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 8; ++i) { + r->coeffs[8 * i + 0] = a[13 * i + 0]; + r->coeffs[8 * i + 0] |= (uint32_t)a[13 * i + 1] << 8; + r->coeffs[8 * i + 0] &= 0x1FFF; + + r->coeffs[8 * i + 1] = a[13 * i + 1] >> 5; + r->coeffs[8 * i + 1] |= (uint32_t)a[13 * i + 2] << 3; + r->coeffs[8 * i + 1] |= (uint32_t)a[13 * i + 3] << 11; + r->coeffs[8 * i + 1] &= 0x1FFF; + + r->coeffs[8 * i + 2] = a[13 * i + 3] >> 2; + r->coeffs[8 * i + 2] |= (uint32_t)a[13 * i + 4] << 6; + r->coeffs[8 * i + 2] &= 0x1FFF; + + r->coeffs[8 * i + 3] = a[13 * i + 4] >> 7; + r->coeffs[8 * i + 3] |= (uint32_t)a[13 * i + 5] << 1; + r->coeffs[8 * i + 3] |= (uint32_t)a[13 * i + 6] << 9; + r->coeffs[8 * i + 3] &= 0x1FFF; + + r->coeffs[8 * i + 4] = a[13 * i + 6] >> 4; + r->coeffs[8 * i + 4] |= (uint32_t)a[13 * i + 7] << 4; + r->coeffs[8 * i + 4] |= (uint32_t)a[13 * i + 8] << 12; + r->coeffs[8 * i + 4] &= 0x1FFF; + + r->coeffs[8 * i + 5] = a[13 * i + 8] >> 1; + r->coeffs[8 * i + 5] |= (uint32_t)a[13 * i + 9] << 7; + r->coeffs[8 * i + 5] &= 0x1FFF; + + r->coeffs[8 * i + 6] = a[13 * i + 9] >> 6; + r->coeffs[8 * i + 6] |= (uint32_t)a[13 * i + 10] << 2; + r->coeffs[8 * i + 6] |= (uint32_t)a[13 * i + 11] << 10; + r->coeffs[8 * i + 6] &= 0x1FFF; + + r->coeffs[8 * i + 7] = a[13 * i + 11] >> 3; + r->coeffs[8 * i + 7] |= (uint32_t)a[13 * i + 12] << 5; + r->coeffs[8 * i + 7] &= 0x1FFF; + + r->coeffs[8 * i + 0] = (1 << (D - 1)) - r->coeffs[8 * i + 0]; + r->coeffs[8 * i + 1] = (1 << (D - 1)) - r->coeffs[8 * i + 1]; + r->coeffs[8 * i + 2] = (1 << (D - 1)) - r->coeffs[8 * i + 2]; + r->coeffs[8 * i + 3] = (1 << (D - 1)) - r->coeffs[8 * i + 3]; + r->coeffs[8 * i + 4] = (1 << (D - 1)) - r->coeffs[8 * i + 4]; + r->coeffs[8 * i + 5] = (1 << (D - 1)) - r->coeffs[8 * i + 5]; + r->coeffs[8 * i + 6] = (1 << (D - 1)) - r->coeffs[8 * i + 6]; + r->coeffs[8 * i + 7] = (1 << (D - 1)) - r->coeffs[8 * i + 7]; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyz_pack +* +* Description: Bit-pack polynomial with coefficients +* in [-(GAMMA1 - 1), GAMMA1]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYZ_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_polyz_pack(uint8_t r[POLYZ_PACKEDBYTES], const poly *restrict a) { + unsigned int i; + uint32_t t[4]; + DBENCH_START(); + + for (i = 0; i < N / 2; ++i) { + t[0] = GAMMA1 - a->coeffs[2 * i + 0]; + t[1] = GAMMA1 - a->coeffs[2 * i + 1]; + + r[5 * i + 0] = t[0]; + r[5 * i + 1] = t[0] >> 8; + r[5 * i + 2] = t[0] >> 16; + r[5 * i + 2] |= t[1] << 4; + r[5 * i + 3] = t[1] >> 4; + r[5 * i + 4] = t[1] >> 12; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyz_unpack +* +* Description: Unpack polynomial z with coefficients +* in [-(GAMMA1 - 1), GAMMA1]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_polyz_unpack(poly *restrict r, const uint8_t a[POLYZ_PACKEDBYTES + 12]) { + unsigned int i; + __m256i f; + const __m256i shufbidx = _mm256_set_epi8(-1, 11, 10, 9, -1, 9, 8, 7, -1, 6, 5, 4, -1, 4, 3, 2, + -1, 9, 8, 7, -1, 7, 6, 5, -1, 4, 3, 2, -1, 2, 1, 0); + const __m256i srlvdidx = _mm256_set1_epi64x((uint64_t)4 << 32); + const __m256i mask = _mm256_set1_epi32(0xFFFFF); + const __m256i gamma1 = _mm256_set1_epi32(GAMMA1); + DBENCH_START(); + + for (i = 0; i < N / 8; i++) { + f = _mm256_loadu_si256((__m256i *)&a[20 * i]); + f = _mm256_permute4x64_epi64(f, 0x94); + f = _mm256_shuffle_epi8(f, shufbidx); + f = _mm256_srlv_epi32(f, srlvdidx); + f = _mm256_and_si256(f, mask); + f = _mm256_sub_epi32(gamma1, f); + _mm256_store_si256(&r->vec[i], f); + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyw1_pack +* +* Description: Bit-pack polynomial w1 with coefficients in [0,15] or [0,43]. +* Input coefficients are assumed to be positive standard representatives. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYW1_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_polyw1_pack(uint8_t r[POLYW1_PACKEDBYTES], const poly *restrict a) { + unsigned int i; + __m256i f0, f1, f2, f3, f4, f5, f6, f7; + const __m256i shift = _mm256_set1_epi16((16 << 8) + 1); + const __m256i shufbidx = _mm256_set_epi8(15, 14, 7, 6, 13, 12, 5, 4, 11, 10, 3, 2, 9, 8, 1, 0, + 15, 14, 7, 6, 13, 12, 5, 4, 11, 10, 3, 2, 9, 8, 1, 0); + DBENCH_START(); + + for (i = 0; i < N / 64; ++i) { + f0 = _mm256_load_si256(&a->vec[8 * i + 0]); + f1 = _mm256_load_si256(&a->vec[8 * i + 1]); + f2 = _mm256_load_si256(&a->vec[8 * i + 2]); + f3 = _mm256_load_si256(&a->vec[8 * i + 3]); + f4 = _mm256_load_si256(&a->vec[8 * i + 4]); + f5 = _mm256_load_si256(&a->vec[8 * i + 5]); + f6 = _mm256_load_si256(&a->vec[8 * i + 6]); + f7 = _mm256_load_si256(&a->vec[8 * i + 7]); + f0 = _mm256_packus_epi32(f0, f1); + f1 = _mm256_packus_epi32(f2, f3); + f2 = _mm256_packus_epi32(f4, f5); + f3 = _mm256_packus_epi32(f6, f7); + f0 = _mm256_packus_epi16(f0, f1); + f1 = _mm256_packus_epi16(f2, f3); + f0 = _mm256_maddubs_epi16(f0, shift); + f1 = _mm256_maddubs_epi16(f1, shift); + f0 = _mm256_packus_epi16(f0, f1); + f0 = _mm256_permute4x64_epi64(f0, 0xD8); + f0 = _mm256_shuffle_epi8(f0, shufbidx); + _mm256_storeu_si256((__m256i *)&r[32 * i], f0); + } + + DBENCH_STOP(*tpack); +} diff --git a/crypto_sign/dilithium/dilithium5/avx2/poly.h b/crypto_sign/dilithium/dilithium5/avx2/poly.h new file mode 100644 index 00000000..0dc8c4ac --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/poly.h @@ -0,0 +1,79 @@ +#ifndef PQCLEAN_DILITHIUM5_AVX2_POLY_H +#define PQCLEAN_DILITHIUM5_AVX2_POLY_H +#include "align.h" +#include "params.h" +#include "symmetric.h" +#include + +typedef ALIGNED_INT32(N) poly; + +void PQCLEAN_DILITHIUM5_AVX2_poly_reduce(poly *a); +void PQCLEAN_DILITHIUM5_AVX2_poly_caddq(poly *a); +void PQCLEAN_DILITHIUM5_AVX2_poly_freeze(poly *a); + +void PQCLEAN_DILITHIUM5_AVX2_poly_add(poly *c, const poly *a, const poly *b); +void PQCLEAN_DILITHIUM5_AVX2_poly_sub(poly *c, const poly *a, const poly *b); +void PQCLEAN_DILITHIUM5_AVX2_poly_shiftl(poly *a); + +void PQCLEAN_DILITHIUM5_AVX2_poly_ntt(poly *a); +void PQCLEAN_DILITHIUM5_AVX2_poly_invntt_tomont(poly *a); +void PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(poly *a); +void PQCLEAN_DILITHIUM5_AVX2_poly_pointwise_montgomery(poly *c, const poly *a, const poly *b); + +void PQCLEAN_DILITHIUM5_AVX2_poly_power2round(poly *a1, poly *a0, const poly *a); +void PQCLEAN_DILITHIUM5_AVX2_poly_decompose(poly *a1, poly *a0, const poly *a); +unsigned int PQCLEAN_DILITHIUM5_AVX2_poly_make_hint(uint8_t hint[N], const poly *a0, const poly *a1); +void PQCLEAN_DILITHIUM5_AVX2_poly_use_hint(poly *b, const poly *a, const poly *h); + +int PQCLEAN_DILITHIUM5_AVX2_poly_chknorm(const poly *a, int32_t B); +void PQCLEAN_DILITHIUM5_AVX2_poly_uniform_preinit(poly *a, stream128_state *state); +void PQCLEAN_DILITHIUM5_AVX2_poly_uniform(poly *a, const uint8_t seed[SEEDBYTES], uint16_t nonce); +void PQCLEAN_DILITHIUM5_AVX2_poly_uniform_eta_preinit(poly *a, stream128_state *state); +void PQCLEAN_DILITHIUM5_AVX2_poly_uniform_eta(poly *a, const uint8_t seed[SEEDBYTES], uint16_t nonce); +void PQCLEAN_DILITHIUM5_AVX2_poly_uniform_gamma1_preinit(poly *a, stream256_state *state); +void PQCLEAN_DILITHIUM5_AVX2_poly_uniform_gamma1(poly *a, const uint8_t seed[CRHBYTES], uint16_t nonce); +void PQCLEAN_DILITHIUM5_AVX2_poly_challenge(poly *c, const uint8_t seed[SEEDBYTES]); + +void PQCLEAN_DILITHIUM5_AVX2_poly_uniform_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[SEEDBYTES], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3); +void PQCLEAN_DILITHIUM5_AVX2_poly_uniform_eta_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[SEEDBYTES], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3); +void PQCLEAN_DILITHIUM5_AVX2_poly_uniform_gamma1_4x(poly *a0, + poly *a1, + poly *a2, + poly *a3, + const uint8_t seed[CRHBYTES], + uint16_t nonce0, + uint16_t nonce1, + uint16_t nonce2, + uint16_t nonce3); + +void PQCLEAN_DILITHIUM5_AVX2_polyeta_pack(uint8_t r[POLYETA_PACKEDBYTES], const poly *a); +void PQCLEAN_DILITHIUM5_AVX2_polyeta_unpack(poly *r, const uint8_t a[POLYETA_PACKEDBYTES]); + +void PQCLEAN_DILITHIUM5_AVX2_polyt1_pack(uint8_t r[POLYT1_PACKEDBYTES], const poly *a); +void PQCLEAN_DILITHIUM5_AVX2_polyt1_unpack(poly *r, const uint8_t a[POLYT1_PACKEDBYTES]); + +void PQCLEAN_DILITHIUM5_AVX2_polyt0_pack(uint8_t r[POLYT0_PACKEDBYTES], const poly *a); +void PQCLEAN_DILITHIUM5_AVX2_polyt0_unpack(poly *r, const uint8_t a[POLYT0_PACKEDBYTES]); + +void PQCLEAN_DILITHIUM5_AVX2_polyz_pack(uint8_t r[POLYZ_PACKEDBYTES], const poly *a); +void PQCLEAN_DILITHIUM5_AVX2_polyz_unpack(poly *r, const uint8_t a[POLYZ_PACKEDBYTES + 14]); + +void PQCLEAN_DILITHIUM5_AVX2_polyw1_pack(uint8_t r[POLYW1_PACKEDBYTES + 8], const poly *a); + +#endif diff --git a/crypto_sign/dilithium/dilithium5/avx2/polyvec.c b/crypto_sign/dilithium/dilithium5/avx2/polyvec.c new file mode 100644 index 00000000..de295523 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/polyvec.c @@ -0,0 +1,538 @@ +#include "consts.h" +#include "ntt.h" +#include "params.h" +#include "poly.h" +#include "polyvec.h" +#include + +#define UNUSED(x) (void)x + +/************************************************* +* Name: expand_mat +* +* Description: Implementation of ExpandA. Generates matrix A with uniformly +* random coefficients a_{i,j} by performing rejection +* sampling on the output stream of SHAKE128(rho|j|i) +* or AES256CTR(rho,j|i). +* +* Arguments: - polyvecl mat[K]: output matrix +* - const uint8_t rho[]: byte array containing seed rho +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]) { + PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row0(&mat[0], &mat[1], rho); + PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row1(&mat[1], &mat[2], rho); + PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row2(&mat[2], &mat[3], rho); + PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row3(&mat[3], NULL, rho); + PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row4(&mat[4], &mat[5], rho); + PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row5(&mat[5], &mat[6], rho); + PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row6(&mat[6], &mat[7], rho); + PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row7(&mat[7], NULL, rho); +} + +void PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row0(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + PQCLEAN_DILITHIUM5_AVX2_poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 0, 1, 2, 3); + PQCLEAN_DILITHIUM5_AVX2_poly_uniform_4x(&rowa->vec[4], &rowa->vec[5], &rowa->vec[6], &rowb->vec[0], rho, 4, 5, 6, 256); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[0]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[1]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[2]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[3]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[4]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[5]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[6]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowb->vec[0]); +} + +void PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row1(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + PQCLEAN_DILITHIUM5_AVX2_poly_uniform_4x(&rowa->vec[1], &rowa->vec[2], &rowa->vec[3], &rowa->vec[4], rho, 257, 258, 259, 260); + PQCLEAN_DILITHIUM5_AVX2_poly_uniform_4x(&rowa->vec[5], &rowa->vec[6], &rowb->vec[0], &rowb->vec[1], rho, 261, 262, 512, 513); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[1]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[2]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[3]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[4]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[5]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[6]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowb->vec[0]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowb->vec[1]); +} + +void PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row2(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + PQCLEAN_DILITHIUM5_AVX2_poly_uniform_4x(&rowa->vec[2], &rowa->vec[3], &rowa->vec[4], &rowa->vec[5], rho, 514, 515, 516, 517); + PQCLEAN_DILITHIUM5_AVX2_poly_uniform_4x(&rowa->vec[6], &rowb->vec[0], &rowb->vec[1], &rowb->vec[2], rho, 518, 768, 769, 770); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[2]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[3]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[4]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[5]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[6]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowb->vec[0]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowb->vec[1]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowb->vec[2]); +} + +void PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row3(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + UNUSED(rowb); + PQCLEAN_DILITHIUM5_AVX2_poly_uniform_4x(&rowa->vec[3], &rowa->vec[4], &rowa->vec[5], &rowa->vec[6], rho, 771, 772, 773, 774); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[3]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[4]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[5]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[6]); +} + +void PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row4(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + PQCLEAN_DILITHIUM5_AVX2_poly_uniform_4x(&rowa->vec[0], &rowa->vec[1], &rowa->vec[2], &rowa->vec[3], rho, 1024, 1025, 1026, 1027); + PQCLEAN_DILITHIUM5_AVX2_poly_uniform_4x(&rowa->vec[4], &rowa->vec[5], &rowa->vec[6], &rowb->vec[0], rho, 1028, 1029, 1030, 1280); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[0]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[1]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[2]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[3]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[4]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[5]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[6]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowb->vec[0]); +} + +void PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row5(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + PQCLEAN_DILITHIUM5_AVX2_poly_uniform_4x(&rowa->vec[1], &rowa->vec[2], &rowa->vec[3], &rowa->vec[4], rho, 1281, 1282, 1283, 1284); + PQCLEAN_DILITHIUM5_AVX2_poly_uniform_4x(&rowa->vec[5], &rowa->vec[6], &rowb->vec[0], &rowb->vec[1], rho, 1285, 1286, 1536, 1537); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[1]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[2]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[3]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[4]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[5]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[6]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowb->vec[0]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowb->vec[1]); +} + +void PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row6(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + PQCLEAN_DILITHIUM5_AVX2_poly_uniform_4x(&rowa->vec[2], &rowa->vec[3], &rowa->vec[4], &rowa->vec[5], rho, 1538, 1539, 1540, 1541); + PQCLEAN_DILITHIUM5_AVX2_poly_uniform_4x(&rowa->vec[6], &rowb->vec[0], &rowb->vec[1], &rowb->vec[2], rho, 1542, 1792, 1793, 1794); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[2]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[3]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[4]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[5]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[6]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowb->vec[0]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowb->vec[1]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowb->vec[2]); +} + +void PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row7(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]) { + UNUSED(rowb); + PQCLEAN_DILITHIUM5_AVX2_poly_uniform_4x(&rowa->vec[3], &rowa->vec[4], &rowa->vec[5], &rowa->vec[6], rho, 1795, 1796, 1797, 1798); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[3]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[4]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[5]); + PQCLEAN_DILITHIUM5_AVX2_poly_nttunpack(&rowa->vec[6]); +} + + +void PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_pointwise_montgomery(polyveck *t, const polyvecl mat[K], const polyvecl *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_AVX2_polyvecl_pointwise_acc_montgomery(&t->vec[i], &mat[i], v); + } +} + +/**************************************************************/ +/************ Vectors of polynomials of length L **************/ +/**************************************************************/ + +void PQCLEAN_DILITHIUM5_AVX2_polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM5_AVX2_poly_uniform_eta(&v->vec[i], seed, nonce++); + } +} + +void PQCLEAN_DILITHIUM5_AVX2_polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM5_AVX2_poly_uniform_gamma1(&v->vec[i], seed, L * nonce + i); + } +} + +void PQCLEAN_DILITHIUM5_AVX2_polyvecl_reduce(polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM5_AVX2_poly_reduce(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyvecl_freeze +* +* Description: Reduce coefficients of polynomials in vector of length L +* to standard representatives. +* +* Arguments: - polyvecl *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_polyvecl_freeze(polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM5_AVX2_poly_freeze(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyvecl_add +* +* Description: Add vectors of polynomials of length L. +* No modular reduction is performed. +* +* Arguments: - polyvecl *w: pointer to output vector +* - const polyvecl *u: pointer to first summand +* - const polyvecl *v: pointer to second summand +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM5_AVX2_poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyvecl_ntt +* +* Description: Forward NTT of all polynomials in vector of length L. Output +* coefficients can be up to 16*Q larger than input coefficients. +* +* Arguments: - polyvecl *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_polyvecl_ntt(polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM5_AVX2_poly_ntt(&v->vec[i]); + } +} + +void PQCLEAN_DILITHIUM5_AVX2_polyvecl_invntt_tomont(polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM5_AVX2_poly_invntt_tomont(&v->vec[i]); + } +} + +void PQCLEAN_DILITHIUM5_AVX2_polyvecl_pointwise_poly_montgomery(polyvecl *r, const poly *a, const polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM5_AVX2_poly_pointwise_montgomery(&r->vec[i], a, &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyvecl_pointwise_acc_montgomery +* +* Description: Pointwise multiply vectors of polynomials of length L, multiply +* resulting vector by 2^{-32} and add (accumulate) polynomials +* in it. Input/output vectors are in NTT domain representation. +* +* Arguments: - poly *w: output polynomial +* - const polyvecl *u: pointer to first input vector +* - const polyvecl *v: pointer to second input vector +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_polyvecl_pointwise_acc_montgomery(poly *w, const polyvecl *u, const polyvecl *v) { + PQCLEAN_DILITHIUM5_AVX2_pointwise_acc_avx(w->vec, u->vec->vec, v->vec->vec, PQCLEAN_DILITHIUM5_AVX2_qdata.vec); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyvecl_chknorm +* +* Description: Check infinity norm of polynomials in vector of length L. +* Assumes input polyvecl to be reduced by PQCLEAN_DILITHIUM5_AVX2_polyvecl_reduce(). +* +* Arguments: - const polyvecl *v: pointer to vector +* - int32_t B: norm bound +* +* Returns 0 if norm of all polynomials is strictly smaller than B <= (Q-1)/8 +* and 1 otherwise. +**************************************************/ +int PQCLEAN_DILITHIUM5_AVX2_polyvecl_chknorm(const polyvecl *v, int32_t bound) { + unsigned int i; + + for (i = 0; i < L; ++i) { + if (PQCLEAN_DILITHIUM5_AVX2_poly_chknorm(&v->vec[i], bound)) { + return 1; + } + } + + return 0; +} + +/**************************************************************/ +/************ Vectors of polynomials of length K **************/ +/**************************************************************/ + +void PQCLEAN_DILITHIUM5_AVX2_polyveck_uniform_eta(polyveck *v, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_AVX2_poly_uniform_eta(&v->vec[i], seed, nonce++); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyveck_reduce +* +* Description: Reduce coefficients of polynomials in vector of length K +* to representatives in [-6283009,6283007]. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_polyveck_reduce(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_AVX2_poly_reduce(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyveck_caddq +* +* Description: For all coefficients of polynomials in vector of length K +* add Q if coefficient is negative. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_polyveck_caddq(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_AVX2_poly_caddq(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyveck_freeze +* +* Description: Reduce coefficients of polynomials in vector of length K +* to standard representatives. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_polyveck_freeze(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_AVX2_poly_freeze(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyveck_add +* +* Description: Add vectors of polynomials of length K. +* No modular reduction is performed. +* +* Arguments: - polyveck *w: pointer to output vector +* - const polyveck *u: pointer to first summand +* - const polyveck *v: pointer to second summand +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_polyveck_add(polyveck *w, const polyveck *u, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_AVX2_poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyveck_sub +* +* Description: Subtract vectors of polynomials of length K. +* No modular reduction is performed. +* +* Arguments: - polyveck *w: pointer to output vector +* - const polyveck *u: pointer to first input vector +* - const polyveck *v: pointer to second input vector to be +* subtracted from first input vector +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_AVX2_poly_sub(&w->vec[i], &u->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyveck_shiftl +* +* Description: Multiply vector of polynomials of Length K by 2^D without modular +* reduction. Assumes input coefficients to be less than 2^{31-D}. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_polyveck_shiftl(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_AVX2_poly_shiftl(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyveck_ntt +* +* Description: Forward NTT of all polynomials in vector of length K. Output +* coefficients can be up to 16*Q larger than input coefficients. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_polyveck_ntt(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_AVX2_poly_ntt(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyveck_invntt_tomont +* +* Description: Inverse NTT and multiplication by 2^{32} of polynomials +* in vector of length K. Input coefficients need to be less +* than 2*Q. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_polyveck_invntt_tomont(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_AVX2_poly_invntt_tomont(&v->vec[i]); + } +} + +void PQCLEAN_DILITHIUM5_AVX2_polyveck_pointwise_poly_montgomery(polyveck *r, const poly *a, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_AVX2_poly_pointwise_montgomery(&r->vec[i], a, &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyveck_chknorm +* +* Description: Check infinity norm of polynomials in vector of length K. +* Assumes input polyveck to be reduced by PQCLEAN_DILITHIUM5_AVX2_polyveck_reduce(). +* +* Arguments: - const polyveck *v: pointer to vector +* - int32_t B: norm bound +* +* Returns 0 if norm of all polynomials are strictly smaller than B <= (Q-1)/8 +* and 1 otherwise. +**************************************************/ +int PQCLEAN_DILITHIUM5_AVX2_polyveck_chknorm(const polyveck *v, int32_t bound) { + unsigned int i; + + for (i = 0; i < K; ++i) { + if (PQCLEAN_DILITHIUM5_AVX2_poly_chknorm(&v->vec[i], bound)) { + return 1; + } + } + + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyveck_power2round +* +* Description: For all coefficients a of polynomials in vector of length K, +* compute a0, a1 such that a mod^+ Q = a1*2^D + a0 +* with -2^{D-1} < a0 <= 2^{D-1}. Assumes coefficients to be +* standard representatives. +* +* Arguments: - polyveck *v1: pointer to output vector of polynomials with +* coefficients a1 +* - polyveck *v0: pointer to output vector of polynomials with +* coefficients a0 +* - const polyveck *v: pointer to input vector +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_AVX2_poly_power2round(&v1->vec[i], &v0->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyveck_decompose +* +* Description: For all coefficients a of polynomials in vector of length K, +* compute high and low bits a0, a1 such a mod^+ Q = a1*ALPHA + a0 +* with -ALPHA/2 < a0 <= ALPHA/2 except a1 = (Q-1)/ALPHA where we +* set a1 = 0 and -ALPHA/2 <= a0 = a mod Q - Q < 0. +* Assumes coefficients to be standard representatives. +* +* Arguments: - polyveck *v1: pointer to output vector of polynomials with +* coefficients a1 +* - polyveck *v0: pointer to output vector of polynomials with +* coefficients a0 +* - const polyveck *v: pointer to input vector +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_AVX2_poly_decompose(&v1->vec[i], &v0->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyveck_make_hint +* +* Description: Compute hint vector. +* +* Arguments: - uint8_t *hint: pointer to output hint array +* - const polyveck *v0: pointer to low part of input vector +* - const polyveck *v1: pointer to high part of input vector +* +* Returns number of 1 bits. +**************************************************/ +unsigned int PQCLEAN_DILITHIUM5_AVX2_polyveck_make_hint(uint8_t *hint, const polyveck *v0, const polyveck *v1) { + unsigned int i, n = 0; + + for (i = 0; i < K; ++i) { + n += PQCLEAN_DILITHIUM5_AVX2_poly_make_hint(&hint[n], &v0->vec[i], &v1->vec[i]); + } + + return n; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_polyveck_use_hint +* +* Description: Use hint vector to correct the high bits of input vector. +* +* Arguments: - polyveck *w: pointer to output vector of polynomials with +* corrected high bits +* - const polyveck *u: pointer to input vector +* - const polyveck *h: pointer to input hint vector +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_polyveck_use_hint(polyveck *w, const polyveck *u, const polyveck *h) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_AVX2_poly_use_hint(&w->vec[i], &u->vec[i], &h->vec[i]); + } +} + +void PQCLEAN_DILITHIUM5_AVX2_polyveck_pack_w1(uint8_t r[K * POLYW1_PACKEDBYTES], const polyveck *w1) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_AVX2_polyw1_pack(&r[i * POLYW1_PACKEDBYTES], &w1->vec[i]); + } +} diff --git a/crypto_sign/dilithium/dilithium5/avx2/polyvec.h b/crypto_sign/dilithium/dilithium5/avx2/polyvec.h new file mode 100644 index 00000000..6213436b --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/polyvec.h @@ -0,0 +1,72 @@ +#ifndef PQCLEAN_DILITHIUM5_AVX2_POLYVEC_H +#define PQCLEAN_DILITHIUM5_AVX2_POLYVEC_H +#include "params.h" +#include "poly.h" +#include + +/* Vectors of polynomials of length L */ +typedef struct { + poly vec[L]; +} polyvecl; + +void PQCLEAN_DILITHIUM5_AVX2_polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[SEEDBYTES], uint16_t nonce); + +void PQCLEAN_DILITHIUM5_AVX2_polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[SEEDBYTES], uint16_t nonce); + +void PQCLEAN_DILITHIUM5_AVX2_polyvecl_reduce(polyvecl *v); + +void PQCLEAN_DILITHIUM5_AVX2_polyvecl_freeze(polyvecl *v); + +void PQCLEAN_DILITHIUM5_AVX2_polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v); + +void PQCLEAN_DILITHIUM5_AVX2_polyvecl_ntt(polyvecl *v); +void PQCLEAN_DILITHIUM5_AVX2_polyvecl_invntt_tomont(polyvecl *v); +void PQCLEAN_DILITHIUM5_AVX2_polyvecl_pointwise_poly_montgomery(polyvecl *r, const poly *a, const polyvecl *v); +void PQCLEAN_DILITHIUM5_AVX2_polyvecl_pointwise_acc_montgomery(poly *w, + const polyvecl *u, + const polyvecl *v); + +int PQCLEAN_DILITHIUM5_AVX2_polyvecl_chknorm(const polyvecl *v, int32_t B); + +/* Vectors of polynomials of length K */ +typedef struct { + poly vec[K]; +} polyveck; + +void PQCLEAN_DILITHIUM5_AVX2_polyveck_uniform_eta(polyveck *v, const uint8_t seed[SEEDBYTES], uint16_t nonce); + +void PQCLEAN_DILITHIUM5_AVX2_polyveck_reduce(polyveck *v); +void PQCLEAN_DILITHIUM5_AVX2_polyveck_caddq(polyveck *v); +void PQCLEAN_DILITHIUM5_AVX2_polyveck_freeze(polyveck *v); + +void PQCLEAN_DILITHIUM5_AVX2_polyveck_add(polyveck *w, const polyveck *u, const polyveck *v); +void PQCLEAN_DILITHIUM5_AVX2_polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v); +void PQCLEAN_DILITHIUM5_AVX2_polyveck_shiftl(polyveck *v); + +void PQCLEAN_DILITHIUM5_AVX2_polyveck_ntt(polyveck *v); +void PQCLEAN_DILITHIUM5_AVX2_polyveck_invntt_tomont(polyveck *v); +void PQCLEAN_DILITHIUM5_AVX2_polyveck_pointwise_poly_montgomery(polyveck *r, const poly *a, const polyveck *v); + +int PQCLEAN_DILITHIUM5_AVX2_polyveck_chknorm(const polyveck *v, int32_t B); + +void PQCLEAN_DILITHIUM5_AVX2_polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v); +void PQCLEAN_DILITHIUM5_AVX2_polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v); +unsigned int PQCLEAN_DILITHIUM5_AVX2_polyveck_make_hint(uint8_t *hint, const polyveck *v0, const polyveck *v1); +void PQCLEAN_DILITHIUM5_AVX2_polyveck_use_hint(polyveck *w, const polyveck *u, const polyveck *h); + +void PQCLEAN_DILITHIUM5_AVX2_polyveck_pack_w1(uint8_t r[K * POLYW1_PACKEDBYTES], const polyveck *w1); + +void PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]); + +void PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row0(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +void PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row1(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +void PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row2(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +void PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row3(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +void PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row4(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +void PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row5(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +void PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row6(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); +void PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row7(polyvecl *rowa, polyvecl *rowb, const uint8_t rho[SEEDBYTES]); + +void PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_pointwise_montgomery(polyveck *t, const polyvecl mat[K], const polyvecl *v); + +#endif diff --git a/crypto_sign/dilithium/dilithium5/avx2/rejsample.c b/crypto_sign/dilithium/dilithium5/avx2/rejsample.c new file mode 100644 index 00000000..96159f34 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/rejsample.c @@ -0,0 +1,408 @@ +#include "params.h" +#include "rejsample.h" +#include "symmetric.h" +#include +#include + +const uint8_t PQCLEAN_DILITHIUM5_AVX2_idxlut[256][8] = { + { 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0}, + { 1, 0, 0, 0, 0, 0, 0, 0}, + { 0, 1, 0, 0, 0, 0, 0, 0}, + { 2, 0, 0, 0, 0, 0, 0, 0}, + { 0, 2, 0, 0, 0, 0, 0, 0}, + { 1, 2, 0, 0, 0, 0, 0, 0}, + { 0, 1, 2, 0, 0, 0, 0, 0}, + { 3, 0, 0, 0, 0, 0, 0, 0}, + { 0, 3, 0, 0, 0, 0, 0, 0}, + { 1, 3, 0, 0, 0, 0, 0, 0}, + { 0, 1, 3, 0, 0, 0, 0, 0}, + { 2, 3, 0, 0, 0, 0, 0, 0}, + { 0, 2, 3, 0, 0, 0, 0, 0}, + { 1, 2, 3, 0, 0, 0, 0, 0}, + { 0, 1, 2, 3, 0, 0, 0, 0}, + { 4, 0, 0, 0, 0, 0, 0, 0}, + { 0, 4, 0, 0, 0, 0, 0, 0}, + { 1, 4, 0, 0, 0, 0, 0, 0}, + { 0, 1, 4, 0, 0, 0, 0, 0}, + { 2, 4, 0, 0, 0, 0, 0, 0}, + { 0, 2, 4, 0, 0, 0, 0, 0}, + { 1, 2, 4, 0, 0, 0, 0, 0}, + { 0, 1, 2, 4, 0, 0, 0, 0}, + { 3, 4, 0, 0, 0, 0, 0, 0}, + { 0, 3, 4, 0, 0, 0, 0, 0}, + { 1, 3, 4, 0, 0, 0, 0, 0}, + { 0, 1, 3, 4, 0, 0, 0, 0}, + { 2, 3, 4, 0, 0, 0, 0, 0}, + { 0, 2, 3, 4, 0, 0, 0, 0}, + { 1, 2, 3, 4, 0, 0, 0, 0}, + { 0, 1, 2, 3, 4, 0, 0, 0}, + { 5, 0, 0, 0, 0, 0, 0, 0}, + { 0, 5, 0, 0, 0, 0, 0, 0}, + { 1, 5, 0, 0, 0, 0, 0, 0}, + { 0, 1, 5, 0, 0, 0, 0, 0}, + { 2, 5, 0, 0, 0, 0, 0, 0}, + { 0, 2, 5, 0, 0, 0, 0, 0}, + { 1, 2, 5, 0, 0, 0, 0, 0}, + { 0, 1, 2, 5, 0, 0, 0, 0}, + { 3, 5, 0, 0, 0, 0, 0, 0}, + { 0, 3, 5, 0, 0, 0, 0, 0}, + { 1, 3, 5, 0, 0, 0, 0, 0}, + { 0, 1, 3, 5, 0, 0, 0, 0}, + { 2, 3, 5, 0, 0, 0, 0, 0}, + { 0, 2, 3, 5, 0, 0, 0, 0}, + { 1, 2, 3, 5, 0, 0, 0, 0}, + { 0, 1, 2, 3, 5, 0, 0, 0}, + { 4, 5, 0, 0, 0, 0, 0, 0}, + { 0, 4, 5, 0, 0, 0, 0, 0}, + { 1, 4, 5, 0, 0, 0, 0, 0}, + { 0, 1, 4, 5, 0, 0, 0, 0}, + { 2, 4, 5, 0, 0, 0, 0, 0}, + { 0, 2, 4, 5, 0, 0, 0, 0}, + { 1, 2, 4, 5, 0, 0, 0, 0}, + { 0, 1, 2, 4, 5, 0, 0, 0}, + { 3, 4, 5, 0, 0, 0, 0, 0}, + { 0, 3, 4, 5, 0, 0, 0, 0}, + { 1, 3, 4, 5, 0, 0, 0, 0}, + { 0, 1, 3, 4, 5, 0, 0, 0}, + { 2, 3, 4, 5, 0, 0, 0, 0}, + { 0, 2, 3, 4, 5, 0, 0, 0}, + { 1, 2, 3, 4, 5, 0, 0, 0}, + { 0, 1, 2, 3, 4, 5, 0, 0}, + { 6, 0, 0, 0, 0, 0, 0, 0}, + { 0, 6, 0, 0, 0, 0, 0, 0}, + { 1, 6, 0, 0, 0, 0, 0, 0}, + { 0, 1, 6, 0, 0, 0, 0, 0}, + { 2, 6, 0, 0, 0, 0, 0, 0}, + { 0, 2, 6, 0, 0, 0, 0, 0}, + { 1, 2, 6, 0, 0, 0, 0, 0}, + { 0, 1, 2, 6, 0, 0, 0, 0}, + { 3, 6, 0, 0, 0, 0, 0, 0}, + { 0, 3, 6, 0, 0, 0, 0, 0}, + { 1, 3, 6, 0, 0, 0, 0, 0}, + { 0, 1, 3, 6, 0, 0, 0, 0}, + { 2, 3, 6, 0, 0, 0, 0, 0}, + { 0, 2, 3, 6, 0, 0, 0, 0}, + { 1, 2, 3, 6, 0, 0, 0, 0}, + { 0, 1, 2, 3, 6, 0, 0, 0}, + { 4, 6, 0, 0, 0, 0, 0, 0}, + { 0, 4, 6, 0, 0, 0, 0, 0}, + { 1, 4, 6, 0, 0, 0, 0, 0}, + { 0, 1, 4, 6, 0, 0, 0, 0}, + { 2, 4, 6, 0, 0, 0, 0, 0}, + { 0, 2, 4, 6, 0, 0, 0, 0}, + { 1, 2, 4, 6, 0, 0, 0, 0}, + { 0, 1, 2, 4, 6, 0, 0, 0}, + { 3, 4, 6, 0, 0, 0, 0, 0}, + { 0, 3, 4, 6, 0, 0, 0, 0}, + { 1, 3, 4, 6, 0, 0, 0, 0}, + { 0, 1, 3, 4, 6, 0, 0, 0}, + { 2, 3, 4, 6, 0, 0, 0, 0}, + { 0, 2, 3, 4, 6, 0, 0, 0}, + { 1, 2, 3, 4, 6, 0, 0, 0}, + { 0, 1, 2, 3, 4, 6, 0, 0}, + { 5, 6, 0, 0, 0, 0, 0, 0}, + { 0, 5, 6, 0, 0, 0, 0, 0}, + { 1, 5, 6, 0, 0, 0, 0, 0}, + { 0, 1, 5, 6, 0, 0, 0, 0}, + { 2, 5, 6, 0, 0, 0, 0, 0}, + { 0, 2, 5, 6, 0, 0, 0, 0}, + { 1, 2, 5, 6, 0, 0, 0, 0}, + { 0, 1, 2, 5, 6, 0, 0, 0}, + { 3, 5, 6, 0, 0, 0, 0, 0}, + { 0, 3, 5, 6, 0, 0, 0, 0}, + { 1, 3, 5, 6, 0, 0, 0, 0}, + { 0, 1, 3, 5, 6, 0, 0, 0}, + { 2, 3, 5, 6, 0, 0, 0, 0}, + { 0, 2, 3, 5, 6, 0, 0, 0}, + { 1, 2, 3, 5, 6, 0, 0, 0}, + { 0, 1, 2, 3, 5, 6, 0, 0}, + { 4, 5, 6, 0, 0, 0, 0, 0}, + { 0, 4, 5, 6, 0, 0, 0, 0}, + { 1, 4, 5, 6, 0, 0, 0, 0}, + { 0, 1, 4, 5, 6, 0, 0, 0}, + { 2, 4, 5, 6, 0, 0, 0, 0}, + { 0, 2, 4, 5, 6, 0, 0, 0}, + { 1, 2, 4, 5, 6, 0, 0, 0}, + { 0, 1, 2, 4, 5, 6, 0, 0}, + { 3, 4, 5, 6, 0, 0, 0, 0}, + { 0, 3, 4, 5, 6, 0, 0, 0}, + { 1, 3, 4, 5, 6, 0, 0, 0}, + { 0, 1, 3, 4, 5, 6, 0, 0}, + { 2, 3, 4, 5, 6, 0, 0, 0}, + { 0, 2, 3, 4, 5, 6, 0, 0}, + { 1, 2, 3, 4, 5, 6, 0, 0}, + { 0, 1, 2, 3, 4, 5, 6, 0}, + { 7, 0, 0, 0, 0, 0, 0, 0}, + { 0, 7, 0, 0, 0, 0, 0, 0}, + { 1, 7, 0, 0, 0, 0, 0, 0}, + { 0, 1, 7, 0, 0, 0, 0, 0}, + { 2, 7, 0, 0, 0, 0, 0, 0}, + { 0, 2, 7, 0, 0, 0, 0, 0}, + { 1, 2, 7, 0, 0, 0, 0, 0}, + { 0, 1, 2, 7, 0, 0, 0, 0}, + { 3, 7, 0, 0, 0, 0, 0, 0}, + { 0, 3, 7, 0, 0, 0, 0, 0}, + { 1, 3, 7, 0, 0, 0, 0, 0}, + { 0, 1, 3, 7, 0, 0, 0, 0}, + { 2, 3, 7, 0, 0, 0, 0, 0}, + { 0, 2, 3, 7, 0, 0, 0, 0}, + { 1, 2, 3, 7, 0, 0, 0, 0}, + { 0, 1, 2, 3, 7, 0, 0, 0}, + { 4, 7, 0, 0, 0, 0, 0, 0}, + { 0, 4, 7, 0, 0, 0, 0, 0}, + { 1, 4, 7, 0, 0, 0, 0, 0}, + { 0, 1, 4, 7, 0, 0, 0, 0}, + { 2, 4, 7, 0, 0, 0, 0, 0}, + { 0, 2, 4, 7, 0, 0, 0, 0}, + { 1, 2, 4, 7, 0, 0, 0, 0}, + { 0, 1, 2, 4, 7, 0, 0, 0}, + { 3, 4, 7, 0, 0, 0, 0, 0}, + { 0, 3, 4, 7, 0, 0, 0, 0}, + { 1, 3, 4, 7, 0, 0, 0, 0}, + { 0, 1, 3, 4, 7, 0, 0, 0}, + { 2, 3, 4, 7, 0, 0, 0, 0}, + { 0, 2, 3, 4, 7, 0, 0, 0}, + { 1, 2, 3, 4, 7, 0, 0, 0}, + { 0, 1, 2, 3, 4, 7, 0, 0}, + { 5, 7, 0, 0, 0, 0, 0, 0}, + { 0, 5, 7, 0, 0, 0, 0, 0}, + { 1, 5, 7, 0, 0, 0, 0, 0}, + { 0, 1, 5, 7, 0, 0, 0, 0}, + { 2, 5, 7, 0, 0, 0, 0, 0}, + { 0, 2, 5, 7, 0, 0, 0, 0}, + { 1, 2, 5, 7, 0, 0, 0, 0}, + { 0, 1, 2, 5, 7, 0, 0, 0}, + { 3, 5, 7, 0, 0, 0, 0, 0}, + { 0, 3, 5, 7, 0, 0, 0, 0}, + { 1, 3, 5, 7, 0, 0, 0, 0}, + { 0, 1, 3, 5, 7, 0, 0, 0}, + { 2, 3, 5, 7, 0, 0, 0, 0}, + { 0, 2, 3, 5, 7, 0, 0, 0}, + { 1, 2, 3, 5, 7, 0, 0, 0}, + { 0, 1, 2, 3, 5, 7, 0, 0}, + { 4, 5, 7, 0, 0, 0, 0, 0}, + { 0, 4, 5, 7, 0, 0, 0, 0}, + { 1, 4, 5, 7, 0, 0, 0, 0}, + { 0, 1, 4, 5, 7, 0, 0, 0}, + { 2, 4, 5, 7, 0, 0, 0, 0}, + { 0, 2, 4, 5, 7, 0, 0, 0}, + { 1, 2, 4, 5, 7, 0, 0, 0}, + { 0, 1, 2, 4, 5, 7, 0, 0}, + { 3, 4, 5, 7, 0, 0, 0, 0}, + { 0, 3, 4, 5, 7, 0, 0, 0}, + { 1, 3, 4, 5, 7, 0, 0, 0}, + { 0, 1, 3, 4, 5, 7, 0, 0}, + { 2, 3, 4, 5, 7, 0, 0, 0}, + { 0, 2, 3, 4, 5, 7, 0, 0}, + { 1, 2, 3, 4, 5, 7, 0, 0}, + { 0, 1, 2, 3, 4, 5, 7, 0}, + { 6, 7, 0, 0, 0, 0, 0, 0}, + { 0, 6, 7, 0, 0, 0, 0, 0}, + { 1, 6, 7, 0, 0, 0, 0, 0}, + { 0, 1, 6, 7, 0, 0, 0, 0}, + { 2, 6, 7, 0, 0, 0, 0, 0}, + { 0, 2, 6, 7, 0, 0, 0, 0}, + { 1, 2, 6, 7, 0, 0, 0, 0}, + { 0, 1, 2, 6, 7, 0, 0, 0}, + { 3, 6, 7, 0, 0, 0, 0, 0}, + { 0, 3, 6, 7, 0, 0, 0, 0}, + { 1, 3, 6, 7, 0, 0, 0, 0}, + { 0, 1, 3, 6, 7, 0, 0, 0}, + { 2, 3, 6, 7, 0, 0, 0, 0}, + { 0, 2, 3, 6, 7, 0, 0, 0}, + { 1, 2, 3, 6, 7, 0, 0, 0}, + { 0, 1, 2, 3, 6, 7, 0, 0}, + { 4, 6, 7, 0, 0, 0, 0, 0}, + { 0, 4, 6, 7, 0, 0, 0, 0}, + { 1, 4, 6, 7, 0, 0, 0, 0}, + { 0, 1, 4, 6, 7, 0, 0, 0}, + { 2, 4, 6, 7, 0, 0, 0, 0}, + { 0, 2, 4, 6, 7, 0, 0, 0}, + { 1, 2, 4, 6, 7, 0, 0, 0}, + { 0, 1, 2, 4, 6, 7, 0, 0}, + { 3, 4, 6, 7, 0, 0, 0, 0}, + { 0, 3, 4, 6, 7, 0, 0, 0}, + { 1, 3, 4, 6, 7, 0, 0, 0}, + { 0, 1, 3, 4, 6, 7, 0, 0}, + { 2, 3, 4, 6, 7, 0, 0, 0}, + { 0, 2, 3, 4, 6, 7, 0, 0}, + { 1, 2, 3, 4, 6, 7, 0, 0}, + { 0, 1, 2, 3, 4, 6, 7, 0}, + { 5, 6, 7, 0, 0, 0, 0, 0}, + { 0, 5, 6, 7, 0, 0, 0, 0}, + { 1, 5, 6, 7, 0, 0, 0, 0}, + { 0, 1, 5, 6, 7, 0, 0, 0}, + { 2, 5, 6, 7, 0, 0, 0, 0}, + { 0, 2, 5, 6, 7, 0, 0, 0}, + { 1, 2, 5, 6, 7, 0, 0, 0}, + { 0, 1, 2, 5, 6, 7, 0, 0}, + { 3, 5, 6, 7, 0, 0, 0, 0}, + { 0, 3, 5, 6, 7, 0, 0, 0}, + { 1, 3, 5, 6, 7, 0, 0, 0}, + { 0, 1, 3, 5, 6, 7, 0, 0}, + { 2, 3, 5, 6, 7, 0, 0, 0}, + { 0, 2, 3, 5, 6, 7, 0, 0}, + { 1, 2, 3, 5, 6, 7, 0, 0}, + { 0, 1, 2, 3, 5, 6, 7, 0}, + { 4, 5, 6, 7, 0, 0, 0, 0}, + { 0, 4, 5, 6, 7, 0, 0, 0}, + { 1, 4, 5, 6, 7, 0, 0, 0}, + { 0, 1, 4, 5, 6, 7, 0, 0}, + { 2, 4, 5, 6, 7, 0, 0, 0}, + { 0, 2, 4, 5, 6, 7, 0, 0}, + { 1, 2, 4, 5, 6, 7, 0, 0}, + { 0, 1, 2, 4, 5, 6, 7, 0}, + { 3, 4, 5, 6, 7, 0, 0, 0}, + { 0, 3, 4, 5, 6, 7, 0, 0}, + { 1, 3, 4, 5, 6, 7, 0, 0}, + { 0, 1, 3, 4, 5, 6, 7, 0}, + { 2, 3, 4, 5, 6, 7, 0, 0}, + { 0, 2, 3, 4, 5, 6, 7, 0}, + { 1, 2, 3, 4, 5, 6, 7, 0}, + { 0, 1, 2, 3, 4, 5, 6, 7} +}; + +unsigned int PQCLEAN_DILITHIUM5_AVX2_rej_uniform_avx(int32_t *restrict r, const uint8_t buf[REJ_UNIFORM_BUFLEN + 8]) { + unsigned int ctr, pos; + uint32_t good; + __m256i d, tmp; + const __m256i bound = _mm256_set1_epi32(Q); + const __m256i mask = _mm256_set1_epi32(0x7FFFFF); + const __m256i idx8 = _mm256_set_epi8(-1, 15, 14, 13, -1, 12, 11, 10, + -1, 9, 8, 7, -1, 6, 5, 4, + -1, 11, 10, 9, -1, 8, 7, 6, + -1, 5, 4, 3, -1, 2, 1, 0); + + ctr = pos = 0; + while (pos <= REJ_UNIFORM_BUFLEN - 24) { + d = _mm256_loadu_si256((__m256i *)&buf[pos]); + d = _mm256_permute4x64_epi64(d, 0x94); + d = _mm256_shuffle_epi8(d, idx8); + d = _mm256_and_si256(d, mask); + pos += 24; + + tmp = _mm256_sub_epi32(d, bound); + good = _mm256_movemask_ps((__m256)tmp); + tmp = _mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i *)&PQCLEAN_DILITHIUM5_AVX2_idxlut[good])); + d = _mm256_permutevar8x32_epi32(d, tmp); + + _mm256_storeu_si256((__m256i *)&r[ctr], d); + ctr += _mm_popcnt_u32(good); + + if (ctr > N - 8) { + break; + } + } + + uint32_t t; + while (ctr < N && pos <= REJ_UNIFORM_BUFLEN - 3) { + t = buf[pos++]; + t |= (uint32_t)buf[pos++] << 8; + t |= (uint32_t)buf[pos++] << 16; + t &= 0x7FFFFF; + + if (t < Q) { + r[ctr++] = t; + } + } + + return ctr; +} + +unsigned int PQCLEAN_DILITHIUM5_AVX2_rej_eta_avx(int32_t *restrict r, const uint8_t buf[REJ_UNIFORM_ETA_BUFLEN]) { + unsigned int ctr, pos; + uint32_t good; + __m256i f0, f1, f2; + __m128i g0, g1; + const __m256i mask = _mm256_set1_epi8(15); + const __m256i eta = _mm256_set1_epi8(ETA); + const __m256i bound = mask; + const __m256i v = _mm256_set1_epi32(-6560); + const __m256i p = _mm256_set1_epi32(5); + + ctr = pos = 0; + while (ctr <= N - 8 && pos <= REJ_UNIFORM_ETA_BUFLEN - 16) { + f0 = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i *)&buf[pos])); + f1 = _mm256_slli_epi16(f0, 4); + f0 = _mm256_or_si256(f0, f1); + f0 = _mm256_and_si256(f0, mask); + + f1 = _mm256_sub_epi8(f0, bound); + f0 = _mm256_sub_epi8(eta, f0); + good = _mm256_movemask_epi8(f1); + + g0 = _mm256_castsi256_si128(f0); + g1 = _mm_loadl_epi64((__m128i *)&PQCLEAN_DILITHIUM5_AVX2_idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0, g1); + f1 = _mm256_cvtepi8_epi32(g1); + f2 = _mm256_mulhrs_epi16(f1, v); + f2 = _mm256_mullo_epi16(f2, p); + f1 = _mm256_add_epi32(f1, f2); + _mm256_storeu_si256((__m256i *)&r[ctr], f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if (ctr > N - 8) { + break; + } + g0 = _mm_bsrli_si128(g0, 8); + g1 = _mm_loadl_epi64((__m128i *)&PQCLEAN_DILITHIUM5_AVX2_idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0, g1); + f1 = _mm256_cvtepi8_epi32(g1); + f2 = _mm256_mulhrs_epi16(f1, v); + f2 = _mm256_mullo_epi16(f2, p); + f1 = _mm256_add_epi32(f1, f2); + _mm256_storeu_si256((__m256i *)&r[ctr], f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if (ctr > N - 8) { + break; + } + g0 = _mm256_extracti128_si256(f0, 1); + g1 = _mm_loadl_epi64((__m128i *)&PQCLEAN_DILITHIUM5_AVX2_idxlut[good & 0xFF]); + g1 = _mm_shuffle_epi8(g0, g1); + f1 = _mm256_cvtepi8_epi32(g1); + f2 = _mm256_mulhrs_epi16(f1, v); + f2 = _mm256_mullo_epi16(f2, p); + f1 = _mm256_add_epi32(f1, f2); + _mm256_storeu_si256((__m256i *)&r[ctr], f1); + ctr += _mm_popcnt_u32(good & 0xFF); + good >>= 8; + pos += 4; + + if (ctr > N - 8) { + break; + } + g0 = _mm_bsrli_si128(g0, 8); + g1 = _mm_loadl_epi64((__m128i *)&PQCLEAN_DILITHIUM5_AVX2_idxlut[good]); + g1 = _mm_shuffle_epi8(g0, g1); + f1 = _mm256_cvtepi8_epi32(g1); + f2 = _mm256_mulhrs_epi16(f1, v); + f2 = _mm256_mullo_epi16(f2, p); + f1 = _mm256_add_epi32(f1, f2); + _mm256_storeu_si256((__m256i *)&r[ctr], f1); + ctr += _mm_popcnt_u32(good); + pos += 4; + } + + uint32_t t0, t1; + while (ctr < N && pos < REJ_UNIFORM_ETA_BUFLEN) { + t0 = buf[pos] & 0x0F; + t1 = buf[pos++] >> 4; + + if (t0 < 15) { + t0 = t0 - (205 * t0 >> 10) * 5; + r[ctr++] = 2 - t0; + } + if (t1 < 15 && ctr < N) { + t1 = t1 - (205 * t1 >> 10) * 5; + r[ctr++] = 2 - t1; + } + } + + return ctr; +} diff --git a/crypto_sign/dilithium/dilithium5/avx2/rejsample.h b/crypto_sign/dilithium/dilithium5/avx2/rejsample.h new file mode 100644 index 00000000..78786bf8 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/rejsample.h @@ -0,0 +1,19 @@ +#ifndef PQCLEAN_DILITHIUM5_AVX2_REJSAMPLE_H +#define PQCLEAN_DILITHIUM5_AVX2_REJSAMPLE_H +#include "params.h" +#include "symmetric.h" +#include + +#define REJ_UNIFORM_NBLOCKS ((768+STREAM128_BLOCKBYTES-1)/STREAM128_BLOCKBYTES) +#define REJ_UNIFORM_BUFLEN (REJ_UNIFORM_NBLOCKS*STREAM128_BLOCKBYTES) + +#define REJ_UNIFORM_ETA_NBLOCKS ((137+STREAM128_BLOCKBYTES-1)/STREAM128_BLOCKBYTES) +#define REJ_UNIFORM_ETA_BUFLEN (REJ_UNIFORM_ETA_NBLOCKS*STREAM128_BLOCKBYTES) + +extern const uint8_t PQCLEAN_DILITHIUM5_AVX2_idxlut[256][8]; + +unsigned int PQCLEAN_DILITHIUM5_AVX2_rej_uniform_avx(int32_t *r, const uint8_t buf[REJ_UNIFORM_BUFLEN + 8]); + +unsigned int PQCLEAN_DILITHIUM5_AVX2_rej_eta_avx(int32_t *r, const uint8_t buf[REJ_UNIFORM_BUFLEN]); + +#endif diff --git a/crypto_sign/dilithium/dilithium5/avx2/rounding.c b/crypto_sign/dilithium/dilithium5/avx2/rounding.c new file mode 100644 index 00000000..ad1c7128 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/rounding.c @@ -0,0 +1,154 @@ +#include "consts.h" +#include "params.h" +#include "rejsample.h" +#include "rounding.h" +#include +#include +#include + +#define _mm256_blendv_epi32(a,b,mask) \ + _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(a), \ + _mm256_castsi256_ps(b), \ + _mm256_castsi256_ps(mask))) + +/************************************************* +* Name: power2round +* +* Description: For finite field elements a, compute a0, a1 such that +* a mod^+ Q = a1*2^D + a0 with -2^{D-1} < a0 <= 2^{D-1}. +* Assumes a to be positive standard representative. +* +* Arguments: - __m256i *a1: output array of length N/8 with high bits +* - __m256i *a0: output array of length N/8 with low bits a0 +* - const __m256i *a: input array of length N/8 +* +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_power2round_avx(__m256i *a1, __m256i *a0, const __m256i *a) { + unsigned int i; + __m256i f, f0, f1; + const __m256i mask = _mm256_set1_epi32(-(1 << D)); + const __m256i half = _mm256_set1_epi32((1 << (D - 1)) - 1); + + for (i = 0; i < N / 8; ++i) { + f = _mm256_load_si256(&a[i]); + f1 = _mm256_add_epi32(f, half); + f0 = _mm256_and_si256(f1, mask); + f1 = _mm256_srli_epi32(f1, D); + f0 = _mm256_sub_epi32(f, f0); + _mm256_store_si256(&a1[i], f1); + _mm256_store_si256(&a0[i], f0); + } +} + +/************************************************* +* Name: decompose +* +* Description: For finite field element a, compute high and low parts a0, a1 such +* that a mod^+ Q = a1*ALPHA + a0 with -ALPHA/2 < a0 <= ALPHA/2 except +* if a1 = (Q-1)/ALPHA where we set a1 = 0 and +* -ALPHA/2 <= a0 = a mod Q - Q < 0. Assumes a to be positive standard +* representative. +* +* Arguments: - __m256i *a1: output array of length N/8 with high parts +* - __m256i *a0: output array of length N/8 with low parts a0 +* - const __m256i *a: input array of length N/8 +* +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_decompose_avx(__m256i *a1, __m256i *a0, const __m256i *a) { + unsigned int i; + __m256i f, f0, f1; + const __m256i q = _mm256_load_si256(&PQCLEAN_DILITHIUM5_AVX2_qdata.vec[_8XQ / 8]); + const __m256i hq = _mm256_srli_epi32(q, 1); + const __m256i v = _mm256_set1_epi32(1025); + const __m256i alpha = _mm256_set1_epi32(2 * GAMMA2); + const __m256i off = _mm256_set1_epi32(127); + const __m256i shift = _mm256_set1_epi32(512); + const __m256i mask = _mm256_set1_epi32(15); + + for (i = 0; i < N / 8; i++) { + f = _mm256_load_si256(&a[i]); + f1 = _mm256_add_epi32(f, off); + f1 = _mm256_srli_epi32(f1, 7); + f1 = _mm256_mulhi_epu16(f1, v); + f1 = _mm256_mulhrs_epi16(f1, shift); + f1 = _mm256_and_si256(f1, mask); + f0 = _mm256_mullo_epi32(f1, alpha); + f0 = _mm256_sub_epi32(f, f0); + f = _mm256_cmpgt_epi32(f0, hq); + f = _mm256_and_si256(f, q); + f0 = _mm256_sub_epi32(f0, f); + _mm256_store_si256(&a1[i], f1); + _mm256_store_si256(&a0[i], f0); + } +} + + +/************************************************* +* Name: make_hint +* +* Description: Compute indices of polynomial coefficients whose low bits +* overflow into the high bits. +* +* Arguments: - uint8_t *hint: hint array +* - const __m256i *a0: low bits of input elements +* - const __m256i *a1: high bits of input elements +* +* Returns number of overflowing low bits +**************************************************/ +unsigned int PQCLEAN_DILITHIUM5_AVX2_make_hint_avx(uint8_t hint[N], const __m256i *restrict a0, const __m256i *restrict a1) { + unsigned int i, n = 0; + __m256i f0, f1, g0, g1; + uint32_t bad; + uint64_t idx; + const __m256i low = _mm256_set1_epi32(-GAMMA2); + const __m256i high = _mm256_set1_epi32(GAMMA2); + + for (i = 0; i < N / 8; ++i) { + f0 = _mm256_load_si256(&a0[i]); + f1 = _mm256_load_si256(&a1[i]); + g0 = _mm256_abs_epi32(f0); + g0 = _mm256_cmpgt_epi32(g0, high); + g1 = _mm256_cmpeq_epi32(f0, low); + g1 = _mm256_sign_epi32(g1, f1); + g0 = _mm256_or_si256(g0, g1); + + bad = _mm256_movemask_ps((__m256)g0); + memcpy(&idx, PQCLEAN_DILITHIUM5_AVX2_idxlut[bad], 8); + idx += (uint64_t)0x0808080808080808 * i; + memcpy(&hint[n], &idx, 8); + n += _mm_popcnt_u32(bad); + } + + return n; +} + +/************************************************* +* Name: use_hint +* +* Description: Correct high parts according to hint. +* +* Arguments: - __m256i *b: output array of length N/8 with corrected high parts +* - const __m256i *a: input array of length N/8 +* - const __m256i *a: input array of length N/8 with hint bits +* +**************************************************/ +void PQCLEAN_DILITHIUM5_AVX2_use_hint_avx(__m256i *b, const __m256i *a, const __m256i *restrict hint) { + unsigned int i; + __m256i a0[N / 8]; + __m256i f, g, h, t; + const __m256i zero = _mm256_setzero_si256(); + const __m256i mask = _mm256_set1_epi32(15); + + PQCLEAN_DILITHIUM5_AVX2_decompose_avx(b, a0, a); + for (i = 0; i < N / 8; i++) { + f = _mm256_load_si256(&a0[i]); + g = _mm256_load_si256(&b[i]); + h = _mm256_load_si256(&hint[i]); + t = _mm256_blendv_epi32(zero, h, f); + t = _mm256_slli_epi32(t, 1); + h = _mm256_sub_epi32(h, t); + g = _mm256_add_epi32(g, h); + g = _mm256_and_si256(g, mask); + _mm256_store_si256(&b[i], g); + } +} diff --git a/crypto_sign/dilithium/dilithium5/avx2/rounding.h b/crypto_sign/dilithium/dilithium5/avx2/rounding.h new file mode 100644 index 00000000..c3483f1e --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/rounding.h @@ -0,0 +1,12 @@ +#ifndef PQCLEAN_DILITHIUM5_AVX2_ROUNDING_H +#define PQCLEAN_DILITHIUM5_AVX2_ROUNDING_H +#include "params.h" +#include +#include + +void PQCLEAN_DILITHIUM5_AVX2_power2round_avx(__m256i *a1, __m256i *a0, const __m256i *a); +void PQCLEAN_DILITHIUM5_AVX2_decompose_avx(__m256i *a1, __m256i *a0, const __m256i *a); +unsigned int PQCLEAN_DILITHIUM5_AVX2_make_hint_avx(uint8_t hint[N], const __m256i *a0, const __m256i *a1); +void PQCLEAN_DILITHIUM5_AVX2_use_hint_avx(__m256i *b, const __m256i *a, const __m256i *hint); + +#endif diff --git a/crypto_sign/dilithium/dilithium5/avx2/shuffle.S b/crypto_sign/dilithium/dilithium5/avx2/shuffle.S new file mode 100644 index 00000000..ab186107 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/shuffle.S @@ -0,0 +1,54 @@ +#include "cdecl.h" +.include "shuffle.inc" + +.text +nttunpack128_avx: +#load +vmovdqa (%rdi),%ymm4 +vmovdqa 32(%rdi),%ymm5 +vmovdqa 64(%rdi),%ymm6 +vmovdqa 96(%rdi),%ymm7 +vmovdqa 128(%rdi),%ymm8 +vmovdqa 160(%rdi),%ymm9 +vmovdqa 192(%rdi),%ymm10 +vmovdqa 224(%rdi),%ymm11 + +shuffle8 4,8,3,8 +shuffle8 5,9,4,9 +shuffle8 6,10,5,10 +shuffle8 7,11,6,11 + +shuffle4 3,5,7,5 +shuffle4 8,10,3,10 +shuffle4 4,6,8,6 +shuffle4 9,11,4,11 + +shuffle2 7,8,9,8 +shuffle2 5,6,7,6 +shuffle2 3,4,5,4 +shuffle2 10,11,3,11 + +#store +vmovdqa %ymm9,(%rdi) +vmovdqa %ymm8,32(%rdi) +vmovdqa %ymm7,64(%rdi) +vmovdqa %ymm6,96(%rdi) +vmovdqa %ymm5,128(%rdi) +vmovdqa %ymm4,160(%rdi) +vmovdqa %ymm3,192(%rdi) +vmovdqa %ymm11,224(%rdi) + +ret + +.global cdecl(PQCLEAN_DILITHIUM5_AVX2_nttunpack_avx) +.global _cdecl(PQCLEAN_DILITHIUM5_AVX2_nttunpack_avx) +cdecl(PQCLEAN_DILITHIUM5_AVX2_nttunpack_avx): +_cdecl(PQCLEAN_DILITHIUM5_AVX2_nttunpack_avx): +call nttunpack128_avx +add $256,%rdi +call nttunpack128_avx +add $256,%rdi +call nttunpack128_avx +add $256,%rdi +call nttunpack128_avx +ret diff --git a/crypto_sign/dilithium/dilithium5/avx2/shuffle.inc b/crypto_sign/dilithium/dilithium5/avx2/shuffle.inc new file mode 100644 index 00000000..73e9ffe0 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/shuffle.inc @@ -0,0 +1,25 @@ +.macro shuffle8 r0,r1,r2,r3 +vperm2i128 $0x20,%ymm\r1,%ymm\r0,%ymm\r2 +vperm2i128 $0x31,%ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle4 r0,r1,r2,r3 +vpunpcklqdq %ymm\r1,%ymm\r0,%ymm\r2 +vpunpckhqdq %ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle2 r0,r1,r2,r3 +#vpsllq $32,%ymm\r1,%ymm\r2 +vmovsldup %ymm\r1,%ymm\r2 +vpblendd $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 +vpsrlq $32,%ymm\r0,%ymm\r0 +#vmovshdup %ymm\r0,%ymm\r0 +vpblendd $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 +.endm + +.macro shuffle1 r0,r1,r2,r3 +vpslld $16,%ymm\r1,%ymm\r2 +vpblendw $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 +vpsrld $16,%ymm\r0,%ymm\r0 +vpblendw $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 +.endm diff --git a/crypto_sign/dilithium/dilithium5/avx2/sign.c b/crypto_sign/dilithium/dilithium5/avx2/sign.c new file mode 100644 index 00000000..050a5371 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/sign.c @@ -0,0 +1,435 @@ +#include "align.h" +#include "fips202.h" +#include "packing.h" +#include "params.h" +#include "poly.h" +#include "polyvec.h" +#include "randombytes.h" +#include "sign.h" +#include "symmetric.h" +#include +#include + +static inline void polyvec_matrix_expand_row(polyvecl **row, polyvecl buf[2], const uint8_t rho[SEEDBYTES], unsigned int i) { + switch (i) { + case 0: + PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row0(buf, buf + 1, rho); + *row = buf; + break; + case 1: + PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row1(buf + 1, buf, rho); + *row = buf + 1; + break; + case 2: + PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row2(buf, buf + 1, rho); + *row = buf; + break; + case 3: + PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row3(buf + 1, buf, rho); + *row = buf + 1; + break; + case 4: + PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row4(buf, buf + 1, rho); + *row = buf; + break; + case 5: + PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row5(buf + 1, buf, rho); + *row = buf + 1; + break; + case 6: + PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row6(buf, buf + 1, rho); + *row = buf; + break; + case 7: + PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand_row7(buf + 1, buf, rho); + *row = buf + 1; + break; + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_crypto_sign_keypair +* +* Description: Generates public and private key. +* +* Arguments: - uint8_t *pk: pointer to output public key (allocated +* array of PQCLEAN_DILITHIUM5_AVX2_CRYPTO_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key (allocated +* array of PQCLEAN_DILITHIUM5_AVX2_CRYPTO_SECRETKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int PQCLEAN_DILITHIUM5_AVX2_crypto_sign_keypair(uint8_t *pk, uint8_t *sk) { + unsigned int i; + uint8_t seedbuf[3 * SEEDBYTES]; + const uint8_t *rho, *rhoprime, *key; + polyvecl rowbuf[2]; + polyvecl s1, *row = rowbuf; + polyveck s2; + poly t1, t0; + + /* Get randomness for rho, rhoprime and key */ + randombytes(seedbuf, SEEDBYTES); + shake256(seedbuf, 3 * SEEDBYTES, seedbuf, SEEDBYTES); + rho = seedbuf; + rhoprime = seedbuf + SEEDBYTES; + key = seedbuf + 2 * SEEDBYTES; + + /* Store rho, key */ + memcpy(pk, rho, SEEDBYTES); + memcpy(sk, rho, SEEDBYTES); + memcpy(sk + SEEDBYTES, key, SEEDBYTES); + + /* Sample short vectors s1 and s2 */ + PQCLEAN_DILITHIUM5_AVX2_poly_uniform_eta_4x(&s1.vec[0], &s1.vec[1], &s1.vec[2], &s1.vec[3], rhoprime, 0, 1, 2, 3); + PQCLEAN_DILITHIUM5_AVX2_poly_uniform_eta_4x(&s1.vec[4], &s1.vec[5], &s1.vec[6], &s2.vec[0], rhoprime, 4, 5, 6, 7); + PQCLEAN_DILITHIUM5_AVX2_poly_uniform_eta_4x(&s2.vec[1], &s2.vec[2], &s2.vec[3], &s2.vec[4], rhoprime, 8, 9, 10, 11); + PQCLEAN_DILITHIUM5_AVX2_poly_uniform_eta_4x(&s2.vec[5], &s2.vec[6], &s2.vec[7], &t0, rhoprime, 12, 13, 14, 15); + + /* Pack secret vectors */ + for (i = 0; i < L; i++) { + PQCLEAN_DILITHIUM5_AVX2_polyeta_pack(sk + 2 * SEEDBYTES + CRHBYTES + i * POLYETA_PACKEDBYTES, &s1.vec[i]); + } + for (i = 0; i < K; i++) { + PQCLEAN_DILITHIUM5_AVX2_polyeta_pack(sk + 2 * SEEDBYTES + CRHBYTES + (L + i)*POLYETA_PACKEDBYTES, &s2.vec[i]); + } + + /* Transform s1 */ + PQCLEAN_DILITHIUM5_AVX2_polyvecl_ntt(&s1); + + + for (i = 0; i < K; i++) { + /* Expand matrix row */ + polyvec_matrix_expand_row(&row, rowbuf, rho, i); + + /* Compute inner-product */ + PQCLEAN_DILITHIUM5_AVX2_polyvecl_pointwise_acc_montgomery(&t1, row, &s1); + PQCLEAN_DILITHIUM5_AVX2_poly_invntt_tomont(&t1); + + /* Add error polynomial */ + PQCLEAN_DILITHIUM5_AVX2_poly_add(&t1, &t1, &s2.vec[i]); + + /* Round t and pack t1, t0 */ + PQCLEAN_DILITHIUM5_AVX2_poly_caddq(&t1); + PQCLEAN_DILITHIUM5_AVX2_poly_power2round(&t1, &t0, &t1); + PQCLEAN_DILITHIUM5_AVX2_polyt1_pack(pk + SEEDBYTES + i * POLYT1_PACKEDBYTES, &t1); + PQCLEAN_DILITHIUM5_AVX2_polyt0_pack(sk + 2 * SEEDBYTES + CRHBYTES + (L + K)*POLYETA_PACKEDBYTES + i * POLYT0_PACKEDBYTES, &t0); + } + + /* Compute CRH(rho, t1) and store in secret key */ + crh(sk + 2 * SEEDBYTES, pk, PQCLEAN_DILITHIUM5_AVX2_CRYPTO_PUBLICKEYBYTES); + + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_crypto_sign_signature +* +* Description: Computes signature. +* +* Arguments: - uint8_t *sig: pointer to output signature (of length PQCLEAN_DILITHIUM5_AVX2_CRYPTO_BYTES) +* - size_t *siglen: pointer to output length of signature +* - uint8_t *m: pointer to message to be signed +* - size_t mlen: length of message +* - uint8_t *sk: pointer to bit-packed secret key +* +* Returns 0 (success) +**************************************************/ +int PQCLEAN_DILITHIUM5_AVX2_crypto_sign_signature(uint8_t *sig, size_t *siglen, const uint8_t *m, size_t mlen, const uint8_t *sk) { + unsigned int i, n, pos; + uint8_t seedbuf[2 * SEEDBYTES + 3 * CRHBYTES]; + uint8_t *rho, *tr, *key, *mu, *rhoprime; + uint8_t hintbuf[N]; + uint8_t *hint = sig + SEEDBYTES + L * POLYZ_PACKEDBYTES; + uint64_t nonce = 0; + polyvecl mat[K], s1, z; + polyveck t0, s2, w1; + poly c, tmp; + union { + polyvecl y; + polyveck w0; + } tmpv; + shake256incctx state; + + rho = seedbuf; + tr = rho + SEEDBYTES; + key = tr + CRHBYTES; + mu = key + SEEDBYTES; + rhoprime = mu + CRHBYTES; + PQCLEAN_DILITHIUM5_AVX2_unpack_sk(rho, tr, key, &t0, &s1, &s2, sk); + + /* Compute CRH(tr, msg) */ + shake256_inc_init(&state); + shake256_inc_absorb(&state, tr, CRHBYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(mu, CRHBYTES, &state); + shake256_inc_ctx_release(&state); + + crh(rhoprime, key, SEEDBYTES + CRHBYTES); + + /* Expand matrix and transform vectors */ + PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_expand(mat, rho); + PQCLEAN_DILITHIUM5_AVX2_polyvecl_ntt(&s1); + PQCLEAN_DILITHIUM5_AVX2_polyveck_ntt(&s2); + PQCLEAN_DILITHIUM5_AVX2_polyveck_ntt(&t0); + + +rej: + /* Sample intermediate vector y */ + PQCLEAN_DILITHIUM5_AVX2_poly_uniform_gamma1_4x(&z.vec[0], &z.vec[1], &z.vec[2], &z.vec[3], + rhoprime, nonce, nonce + 1, nonce + 2, nonce + 3); + PQCLEAN_DILITHIUM5_AVX2_poly_uniform_gamma1_4x(&z.vec[4], &z.vec[5], &z.vec[6], &tmp, + rhoprime, nonce + 4, nonce + 5, nonce + 6, 0); + nonce += 7; + + /* Matrix-vector product */ + tmpv.y = z; + PQCLEAN_DILITHIUM5_AVX2_polyvecl_ntt(&tmpv.y); + PQCLEAN_DILITHIUM5_AVX2_polyvec_matrix_pointwise_montgomery(&w1, mat, &tmpv.y); + PQCLEAN_DILITHIUM5_AVX2_polyveck_invntt_tomont(&w1); + + /* Decompose w and call the random oracle */ + PQCLEAN_DILITHIUM5_AVX2_polyveck_caddq(&w1); + PQCLEAN_DILITHIUM5_AVX2_polyveck_decompose(&w1, &tmpv.w0, &w1); + PQCLEAN_DILITHIUM5_AVX2_polyveck_pack_w1(sig, &w1); + + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, sig, K * POLYW1_PACKEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(sig, SEEDBYTES, &state); + shake256_inc_ctx_release(&state); + PQCLEAN_DILITHIUM5_AVX2_poly_challenge(&c, sig); + PQCLEAN_DILITHIUM5_AVX2_poly_ntt(&c); + + /* Compute z, reject if it reveals secret */ + for (i = 0; i < L; i++) { + PQCLEAN_DILITHIUM5_AVX2_poly_pointwise_montgomery(&tmp, &c, &s1.vec[i]); + PQCLEAN_DILITHIUM5_AVX2_poly_invntt_tomont(&tmp); + PQCLEAN_DILITHIUM5_AVX2_poly_add(&z.vec[i], &z.vec[i], &tmp); + PQCLEAN_DILITHIUM5_AVX2_poly_reduce(&z.vec[i]); + if (PQCLEAN_DILITHIUM5_AVX2_poly_chknorm(&z.vec[i], GAMMA1 - BETA)) { + goto rej; + } + } + + /* Zero hint vector in signature */ + pos = 0; + memset(hint, 0, OMEGA); + + for (i = 0; i < K; i++) { + /* Check that subtracting cs2 does not change high bits of w and low bits + * do not reveal secret information */ + PQCLEAN_DILITHIUM5_AVX2_poly_pointwise_montgomery(&tmp, &c, &s2.vec[i]); + PQCLEAN_DILITHIUM5_AVX2_poly_invntt_tomont(&tmp); + PQCLEAN_DILITHIUM5_AVX2_poly_sub(&tmpv.w0.vec[i], &tmpv.w0.vec[i], &tmp); + PQCLEAN_DILITHIUM5_AVX2_poly_reduce(&tmpv.w0.vec[i]); + if (PQCLEAN_DILITHIUM5_AVX2_poly_chknorm(&tmpv.w0.vec[i], GAMMA2 - BETA)) { + goto rej; + } + + /* Compute hints */ + PQCLEAN_DILITHIUM5_AVX2_poly_pointwise_montgomery(&tmp, &c, &t0.vec[i]); + PQCLEAN_DILITHIUM5_AVX2_poly_invntt_tomont(&tmp); + PQCLEAN_DILITHIUM5_AVX2_poly_reduce(&tmp); + if (PQCLEAN_DILITHIUM5_AVX2_poly_chknorm(&tmp, GAMMA2)) { + goto rej; + } + + PQCLEAN_DILITHIUM5_AVX2_poly_add(&tmpv.w0.vec[i], &tmpv.w0.vec[i], &tmp); + n = PQCLEAN_DILITHIUM5_AVX2_poly_make_hint(hintbuf, &tmpv.w0.vec[i], &w1.vec[i]); + if (pos + n > OMEGA) { + goto rej; + } + + /* Store hints in signature */ + memcpy(&hint[pos], hintbuf, n); + hint[OMEGA + i] = pos = pos + n; + } + + /* Pack z into signature */ + for (i = 0; i < L; i++) { + PQCLEAN_DILITHIUM5_AVX2_polyz_pack(sig + SEEDBYTES + i * POLYZ_PACKEDBYTES, &z.vec[i]); + } + + *siglen = PQCLEAN_DILITHIUM5_AVX2_CRYPTO_BYTES; + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_crypto_sign +* +* Description: Compute signed message. +* +* Arguments: - uint8_t *sm: pointer to output signed message (allocated +* array with PQCLEAN_DILITHIUM5_AVX2_CRYPTO_BYTES + mlen bytes), +* can be equal to m +* - size_t *smlen: pointer to output length of signed +* message +* - const uint8_t *m: pointer to message to be signed +* - size_t mlen: length of message +* - const uint8_t *sk: pointer to bit-packed secret key +* +* Returns 0 (success) +**************************************************/ +int PQCLEAN_DILITHIUM5_AVX2_crypto_sign(uint8_t *sm, size_t *smlen, const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t i; + + for (i = 0; i < mlen; ++i) { + sm[PQCLEAN_DILITHIUM5_AVX2_CRYPTO_BYTES + mlen - 1 - i] = m[mlen - 1 - i]; + } + PQCLEAN_DILITHIUM5_AVX2_crypto_sign_signature(sm, smlen, sm + PQCLEAN_DILITHIUM5_AVX2_CRYPTO_BYTES, mlen, sk); + *smlen += mlen; + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_crypto_sign_verify +* +* Description: Verifies signature. +* +* Arguments: - uint8_t *m: pointer to input signature +* - size_t siglen: length of signature +* - const uint8_t *m: pointer to message +* - size_t mlen: length of message +* - const uint8_t *pk: pointer to bit-packed public key +* +* Returns 0 if signature could be verified correctly and -1 otherwise +**************************************************/ +int PQCLEAN_DILITHIUM5_AVX2_crypto_sign_verify(const uint8_t *sig, size_t siglen, const uint8_t *m, size_t mlen, const uint8_t *pk) { + unsigned int i, j, pos = 0; + /* PQCLEAN_DILITHIUM5_AVX2_polyw1_pack writes additional 14 bytes */ + ALIGNED_UINT8(K * POLYW1_PACKEDBYTES + 14) buf; + uint8_t mu[CRHBYTES]; + const uint8_t *hint = sig + SEEDBYTES + L * POLYZ_PACKEDBYTES; + polyvecl rowbuf[2]; + polyvecl *row = rowbuf; + polyvecl z; + poly c, w1, h; + shake256incctx state; + + if (siglen != PQCLEAN_DILITHIUM5_AVX2_CRYPTO_BYTES) { + return -1; + } + + /* Compute CRH(CRH(rho, t1), msg) */ + crh(mu, pk, PQCLEAN_DILITHIUM5_AVX2_CRYPTO_PUBLICKEYBYTES); + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(mu, CRHBYTES, &state); + shake256_inc_ctx_release(&state); + + /* Expand PQCLEAN_DILITHIUM5_AVX2_challenge */ + PQCLEAN_DILITHIUM5_AVX2_poly_challenge(&c, sig); + PQCLEAN_DILITHIUM5_AVX2_poly_ntt(&c); + + /* Unpack z; shortness follows from unpacking */ + for (i = 0; i < L; i++) { + PQCLEAN_DILITHIUM5_AVX2_polyz_unpack(&z.vec[i], sig + SEEDBYTES + i * POLYZ_PACKEDBYTES); + PQCLEAN_DILITHIUM5_AVX2_poly_ntt(&z.vec[i]); + } + + + for (i = 0; i < K; i++) { + /* Expand matrix row */ + polyvec_matrix_expand_row(&row, rowbuf, pk, i); + + /* Compute i-th row of Az - c2^Dt1 */ + PQCLEAN_DILITHIUM5_AVX2_polyvecl_pointwise_acc_montgomery(&w1, row, &z); + + PQCLEAN_DILITHIUM5_AVX2_polyt1_unpack(&h, pk + SEEDBYTES + i * POLYT1_PACKEDBYTES); + PQCLEAN_DILITHIUM5_AVX2_poly_shiftl(&h); + PQCLEAN_DILITHIUM5_AVX2_poly_ntt(&h); + PQCLEAN_DILITHIUM5_AVX2_poly_pointwise_montgomery(&h, &c, &h); + + PQCLEAN_DILITHIUM5_AVX2_poly_sub(&w1, &w1, &h); + PQCLEAN_DILITHIUM5_AVX2_poly_reduce(&w1); + PQCLEAN_DILITHIUM5_AVX2_poly_invntt_tomont(&w1); + + /* Get hint polynomial and reconstruct w1 */ + memset(h.vec, 0, sizeof(poly)); + if (hint[OMEGA + i] < pos || hint[OMEGA + i] > OMEGA) { + return -1; + } + + for (j = pos; j < hint[OMEGA + i]; ++j) { + /* Coefficients are ordered for strong unforgeability */ + if (j > pos && hint[j] <= hint[j - 1]) { + return -1; + } + h.coeffs[hint[j]] = 1; + } + pos = hint[OMEGA + i]; + + PQCLEAN_DILITHIUM5_AVX2_poly_caddq(&w1); + PQCLEAN_DILITHIUM5_AVX2_poly_use_hint(&w1, &w1, &h); + PQCLEAN_DILITHIUM5_AVX2_polyw1_pack(buf.coeffs + i * POLYW1_PACKEDBYTES, &w1); + } + + /* Extra indices are zero for strong unforgeability */ + for (j = pos; j < OMEGA; ++j) { + if (hint[j]) { + return -1; + } + } + + /* Call random oracle and verify PQCLEAN_DILITHIUM5_AVX2_challenge */ + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, buf.coeffs, K * POLYW1_PACKEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf.coeffs, SEEDBYTES, &state); + shake256_inc_ctx_release(&state); + for (i = 0; i < SEEDBYTES; ++i) { + if (buf.coeffs[i] != sig[i]) { + return -1; + } + } + + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_AVX2_crypto_sign_open +* +* Description: Verify signed message. +* +* Arguments: - uint8_t *m: pointer to output message (allocated +* array with smlen bytes), can be equal to sm +* - size_t *mlen: pointer to output length of message +* - const uint8_t *sm: pointer to signed message +* - size_t smlen: length of signed message +* - const uint8_t *pk: pointer to bit-packed public key +* +* Returns 0 if signed message could be verified correctly and -1 otherwise +**************************************************/ +int PQCLEAN_DILITHIUM5_AVX2_crypto_sign_open(uint8_t *m, size_t *mlen, const uint8_t *sm, size_t smlen, const uint8_t *pk) { + size_t i; + + if (smlen < PQCLEAN_DILITHIUM5_AVX2_CRYPTO_BYTES) { + goto badsig; + } + + *mlen = smlen - PQCLEAN_DILITHIUM5_AVX2_CRYPTO_BYTES; + if (PQCLEAN_DILITHIUM5_AVX2_crypto_sign_verify(sm, PQCLEAN_DILITHIUM5_AVX2_CRYPTO_BYTES, sm + PQCLEAN_DILITHIUM5_AVX2_CRYPTO_BYTES, *mlen, pk)) { + goto badsig; + } else { + /* All good, copy msg, return 0 */ + for (i = 0; i < *mlen; ++i) { + m[i] = sm[PQCLEAN_DILITHIUM5_AVX2_CRYPTO_BYTES + i]; + } + return 0; + } + +badsig: + /* Signature verification failed */ + *mlen = -1; + for (i = 0; i < smlen; ++i) { + m[i] = 0; + } + + return -1; +} diff --git a/crypto_sign/dilithium/dilithium5/avx2/sign.h b/crypto_sign/dilithium/dilithium5/avx2/sign.h new file mode 100644 index 00000000..e1c0ecf4 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/sign.h @@ -0,0 +1,29 @@ +#ifndef PQCLEAN_DILITHIUM5_AVX2_SIGN_H +#define PQCLEAN_DILITHIUM5_AVX2_SIGN_H +#include "params.h" +#include "poly.h" +#include "polyvec.h" +#include +#include + +void PQCLEAN_DILITHIUM5_AVX2_challenge(poly *c, const uint8_t seed[SEEDBYTES]); + +int PQCLEAN_DILITHIUM5_AVX2_crypto_sign_keypair(uint8_t *pk, uint8_t *sk); + +int PQCLEAN_DILITHIUM5_AVX2_crypto_sign_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int PQCLEAN_DILITHIUM5_AVX2_crypto_sign(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int PQCLEAN_DILITHIUM5_AVX2_crypto_sign_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +int PQCLEAN_DILITHIUM5_AVX2_crypto_sign_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + +#endif diff --git a/crypto_sign/dilithium/dilithium5/avx2/symmetric-shake.c b/crypto_sign/dilithium/dilithium5/avx2/symmetric-shake.c new file mode 100644 index 00000000..aee9daa5 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/symmetric-shake.c @@ -0,0 +1,26 @@ +#include "fips202.h" +#include "params.h" +#include "symmetric.h" +#include + +void PQCLEAN_DILITHIUM5_AVX2_dilithium_shake128_stream_init(shake128incctx *state, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + uint8_t t[2]; + t[0] = (uint8_t) nonce; + t[1] = (uint8_t) (nonce >> 8); + + shake128_inc_init(state); + shake128_inc_absorb(state, seed, SEEDBYTES); + shake128_inc_absorb(state, t, 2); + shake128_inc_finalize(state); +} + +void PQCLEAN_DILITHIUM5_AVX2_dilithium_shake256_stream_init(shake256incctx *state, const uint8_t seed[CRHBYTES], uint16_t nonce) { + uint8_t t[2]; + t[0] = (uint8_t) nonce; + t[1] = (uint8_t) (nonce >> 8); + + shake256_inc_init(state); + shake256_inc_absorb(state, seed, CRHBYTES); + shake256_inc_absorb(state, t, 2); + shake256_inc_finalize(state); +} diff --git a/crypto_sign/dilithium/dilithium5/avx2/symmetric.h b/crypto_sign/dilithium/dilithium5/avx2/symmetric.h new file mode 100644 index 00000000..3d5e8a50 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/avx2/symmetric.h @@ -0,0 +1,36 @@ +#ifndef PQCLEAN_DILITHIUM5_AVX2_SYMMETRIC_H +#define PQCLEAN_DILITHIUM5_AVX2_SYMMETRIC_H +#include "fips202.h" +#include "params.h" +#include + + + +typedef shake128incctx stream128_state; +typedef shake256incctx stream256_state; + +void PQCLEAN_DILITHIUM5_AVX2_dilithium_shake128_stream_init(shake128incctx *state, + const uint8_t seed[SEEDBYTES], + uint16_t nonce); + +void PQCLEAN_DILITHIUM5_AVX2_dilithium_shake256_stream_init(shake256incctx *state, + const uint8_t seed[CRHBYTES], + uint16_t nonce); + +#define STREAM128_BLOCKBYTES SHAKE128_RATE +#define STREAM256_BLOCKBYTES SHAKE256_RATE + +#define crh(OUT, IN, INBYTES) shake256(OUT, CRHBYTES, IN, INBYTES) +#define stream128_init(STATE, SEED, NONCE) \ + PQCLEAN_DILITHIUM5_AVX2_dilithium_shake128_stream_init(STATE, SEED, NONCE) +#define stream128_squeezeblocks(OUT, OUTBLOCKS, STATE) \ + shake128_inc_squeeze(OUT, (OUTBLOCKS)*(SHAKE128_RATE), STATE) +#define stream128_release(STATE) shake128_inc_ctx_release(STATE) +#define stream256_init(STATE, SEED, NONCE) \ + PQCLEAN_DILITHIUM5_AVX2_dilithium_shake256_stream_init(STATE, SEED, NONCE) +#define stream256_squeezeblocks(OUT, OUTBLOCKS, STATE) \ + shake256_inc_squeeze(OUT, (OUTBLOCKS)*(SHAKE256_RATE), STATE) +#define stream256_release(STATE) shake256_inc_ctx_release(STATE) + + +#endif diff --git a/crypto_sign/dilithium/dilithium5/clean/LICENSE b/crypto_sign/dilithium/dilithium5/clean/LICENSE new file mode 100644 index 00000000..08473af7 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/clean/LICENSE @@ -0,0 +1,5 @@ +Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/) + +For Keccak and AES we are using public-domain +code from sources and by authors listed in +comments on top of the respective files. diff --git a/crypto_sign/dilithium/dilithium5/clean/Makefile.Microsoft_nmake b/crypto_sign/dilithium/dilithium5/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..017ca211 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/clean/Makefile.Microsoft_nmake @@ -0,0 +1,23 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libdilithium5_clean.lib +OBJECTS=ntt.obj packing.obj poly.obj polyvec.obj reduce.obj rounding.obj sign.obj symmetric-shake.obj + +# Warning C4146 is raised when a unary minus operator is applied to an +# unsigned type; this has nonetheless been standard and portable for as +# long as there has been a C standard, and we need it for constant-time +# computations. Thus, we disable that spurious warning. +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX /wd4146 + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/dilithium/dilithium5/clean/api.h b/crypto_sign/dilithium/dilithium5/clean/api.h new file mode 100644 index 00000000..8a8d7901 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/clean/api.h @@ -0,0 +1,30 @@ +#ifndef PQCLEAN_DILITHIUM5_CLEAN_API_H +#define PQCLEAN_DILITHIUM5_CLEAN_API_H + +#include +#include + +#define PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_PUBLICKEYBYTES 2592 +#define PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_SECRETKEYBYTES 4880 +#define PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_BYTES 4595 +#define PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_ALGNAME "Dilithium5" + +int PQCLEAN_DILITHIUM5_CLEAN_crypto_sign_keypair(uint8_t *pk, uint8_t *sk); + +int PQCLEAN_DILITHIUM5_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +int PQCLEAN_DILITHIUM5_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +int PQCLEAN_DILITHIUM5_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +int PQCLEAN_DILITHIUM5_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/dilithium/dilithium5/clean/ntt.c b/crypto_sign/dilithium/dilithium5/clean/ntt.c new file mode 100644 index 00000000..4f654191 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/clean/ntt.c @@ -0,0 +1,98 @@ +#include "ntt.h" +#include "params.h" +#include "reduce.h" +#include + +static const int32_t zetas[N] = { + 0, 25847, -2608894, -518909, 237124, -777960, -876248, 466468, + 1826347, 2353451, -359251, -2091905, 3119733, -2884855, 3111497, 2680103, + 2725464, 1024112, -1079900, 3585928, -549488, -1119584, 2619752, -2108549, + -2118186, -3859737, -1399561, -3277672, 1757237, -19422, 4010497, 280005, + 2706023, 95776, 3077325, 3530437, -1661693, -3592148, -2537516, 3915439, + -3861115, -3043716, 3574422, -2867647, 3539968, -300467, 2348700, -539299, + -1699267, -1643818, 3505694, -3821735, 3507263, -2140649, -1600420, 3699596, + 811944, 531354, 954230, 3881043, 3900724, -2556880, 2071892, -2797779, + -3930395, -1528703, -3677745, -3041255, -1452451, 3475950, 2176455, -1585221, + -1257611, 1939314, -4083598, -1000202, -3190144, -3157330, -3632928, 126922, + 3412210, -983419, 2147896, 2715295, -2967645, -3693493, -411027, -2477047, + -671102, -1228525, -22981, -1308169, -381987, 1349076, 1852771, -1430430, + -3343383, 264944, 508951, 3097992, 44288, -1100098, 904516, 3958618, + -3724342, -8578, 1653064, -3249728, 2389356, -210977, 759969, -1316856, + 189548, -3553272, 3159746, -1851402, -2409325, -177440, 1315589, 1341330, + 1285669, -1584928, -812732, -1439742, -3019102, -3881060, -3628969, 3839961, + 2091667, 3407706, 2316500, 3817976, -3342478, 2244091, -2446433, -3562462, + 266997, 2434439, -1235728, 3513181, -3520352, -3759364, -1197226, -3193378, + 900702, 1859098, 909542, 819034, 495491, -1613174, -43260, -522500, + -655327, -3122442, 2031748, 3207046, -3556995, -525098, -768622, -3595838, + 342297, 286988, -2437823, 4108315, 3437287, -3342277, 1735879, 203044, + 2842341, 2691481, -2590150, 1265009, 4055324, 1247620, 2486353, 1595974, + -3767016, 1250494, 2635921, -3548272, -2994039, 1869119, 1903435, -1050970, + -1333058, 1237275, -3318210, -1430225, -451100, 1312455, 3306115, -1962642, + -1279661, 1917081, -2546312, -1374803, 1500165, 777191, 2235880, 3406031, + -542412, -2831860, -1671176, -1846953, -2584293, -3724270, 594136, -3776993, + -2013608, 2432395, 2454455, -164721, 1957272, 3369112, 185531, -1207385, + -3183426, 162844, 1616392, 3014001, 810149, 1652634, -3694233, -1799107, + -3038916, 3523897, 3866901, 269760, 2213111, -975884, 1717735, 472078, + -426683, 1723600, -1803090, 1910376, -1667432, -1104333, -260646, -3833893, + -2939036, -2235985, -420899, -2286327, 183443, -976891, 1612842, -3545687, + -554416, 3919660, -48306, -1362209, 3937738, 1400424, -846154, 1976782 +}; + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_ntt +* +* Description: Forward NTT, in-place. No modular reduction is performed after +* additions or subtractions. Output vector is in bitreversed order. +* +* Arguments: - uint32_t p[N]: input/output coefficient array +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_ntt(int32_t a[N]) { + unsigned int len, start, j, k; + int32_t zeta, t; + + k = 0; + for (len = 128; len > 0; len >>= 1) { + for (start = 0; start < N; start = j + len) { + zeta = zetas[++k]; + for (j = start; j < start + len; ++j) { + t = PQCLEAN_DILITHIUM5_CLEAN_montgomery_reduce((int64_t)zeta * a[j + len]); + a[j + len] = a[j] - t; + a[j] = a[j] + t; + } + } + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_invntt_tomont +* +* Description: Inverse NTT and multiplication by Montgomery factor 2^32. +* In-place. No modular reductions after additions or +* subtractions; input coefficients need to be smaller than +* Q in absolute value. Output coefficient are smaller than Q in +* absolute value. +* +* Arguments: - uint32_t p[N]: input/output coefficient array +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_invntt_tomont(int32_t a[N]) { + unsigned int start, len, j, k; + int32_t t, zeta; + const int32_t f = 41978; // mont^2/256 + + k = 256; + for (len = 1; len < N; len <<= 1) { + for (start = 0; start < N; start = j + len) { + zeta = -zetas[--k]; + for (j = start; j < start + len; ++j) { + t = a[j]; + a[j] = t + a[j + len]; + a[j + len] = t - a[j + len]; + a[j + len] = PQCLEAN_DILITHIUM5_CLEAN_montgomery_reduce((int64_t)zeta * a[j + len]); + } + } + } + + for (j = 0; j < N; ++j) { + a[j] = PQCLEAN_DILITHIUM5_CLEAN_montgomery_reduce((int64_t)f * a[j]); + } +} diff --git a/crypto_sign/dilithium/dilithium5/clean/ntt.h b/crypto_sign/dilithium/dilithium5/clean/ntt.h new file mode 100644 index 00000000..747b315c --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/clean/ntt.h @@ -0,0 +1,10 @@ +#ifndef PQCLEAN_DILITHIUM5_CLEAN_NTT_H +#define PQCLEAN_DILITHIUM5_CLEAN_NTT_H +#include "params.h" +#include + +void PQCLEAN_DILITHIUM5_CLEAN_ntt(int32_t a[N]); + +void PQCLEAN_DILITHIUM5_CLEAN_invntt_tomont(int32_t a[N]); + +#endif diff --git a/crypto_sign/dilithium/dilithium5/clean/packing.c b/crypto_sign/dilithium/dilithium5/clean/packing.c new file mode 100644 index 00000000..34542a2b --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/clean/packing.c @@ -0,0 +1,261 @@ +#include "packing.h" +#include "params.h" +#include "poly.h" +#include "polyvec.h" + + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_pack_pk +* +* Description: Bit-pack public key pk = (rho, t1). +* +* Arguments: - uint8_t pk[]: output byte array +* - const uint8_t rho[]: byte array containing rho +* - const polyveck *t1: pointer to vector t1 +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_pack_pk(uint8_t pk[PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_PUBLICKEYBYTES], + const uint8_t rho[SEEDBYTES], + const polyveck *t1) { + unsigned int i; + + for (i = 0; i < SEEDBYTES; ++i) { + pk[i] = rho[i]; + } + pk += SEEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_polyt1_pack(pk + i * POLYT1_PACKEDBYTES, &t1->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_unpack_pk +* +* Description: Unpack public key pk = (rho, t1). +* +* Arguments: - const uint8_t rho[]: output byte array for rho +* - const polyveck *t1: pointer to output vector t1 +* - uint8_t pk[]: byte array containing bit-packed pk +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_unpack_pk(uint8_t rho[SEEDBYTES], + polyveck *t1, + const uint8_t pk[PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_PUBLICKEYBYTES]) { + unsigned int i; + + for (i = 0; i < SEEDBYTES; ++i) { + rho[i] = pk[i]; + } + pk += SEEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_polyt1_unpack(&t1->vec[i], pk + i * POLYT1_PACKEDBYTES); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_pack_sk +* +* Description: Bit-pack secret key sk = (rho, tr, key, t0, s1, s2). +* +* Arguments: - uint8_t sk[]: output byte array +* - const uint8_t rho[]: byte array containing rho +* - const uint8_t tr[]: byte array containing tr +* - const uint8_t key[]: byte array containing key +* - const polyveck *t0: pointer to vector t0 +* - const polyvecl *s1: pointer to vector s1 +* - const polyveck *s2: pointer to vector s2 +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_pack_sk(uint8_t sk[PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_SECRETKEYBYTES], + const uint8_t rho[SEEDBYTES], + const uint8_t tr[CRHBYTES], + const uint8_t key[SEEDBYTES], + const polyveck *t0, + const polyvecl *s1, + const polyveck *s2) { + unsigned int i; + + for (i = 0; i < SEEDBYTES; ++i) { + sk[i] = rho[i]; + } + sk += SEEDBYTES; + + for (i = 0; i < SEEDBYTES; ++i) { + sk[i] = key[i]; + } + sk += SEEDBYTES; + + for (i = 0; i < CRHBYTES; ++i) { + sk[i] = tr[i]; + } + sk += CRHBYTES; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_polyeta_pack(sk + i * POLYETA_PACKEDBYTES, &s1->vec[i]); + } + sk += L * POLYETA_PACKEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_polyeta_pack(sk + i * POLYETA_PACKEDBYTES, &s2->vec[i]); + } + sk += K * POLYETA_PACKEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_polyt0_pack(sk + i * POLYT0_PACKEDBYTES, &t0->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_unpack_sk +* +* Description: Unpack secret key sk = (rho, tr, key, t0, s1, s2). +* +* Arguments: - const uint8_t rho[]: output byte array for rho +* - const uint8_t tr[]: output byte array for tr +* - const uint8_t key[]: output byte array for key +* - const polyveck *t0: pointer to output vector t0 +* - const polyvecl *s1: pointer to output vector s1 +* - const polyveck *s2: pointer to output vector s2 +* - uint8_t sk[]: byte array containing bit-packed sk +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_unpack_sk(uint8_t rho[SEEDBYTES], + uint8_t tr[CRHBYTES], + uint8_t key[SEEDBYTES], + polyveck *t0, + polyvecl *s1, + polyveck *s2, + const uint8_t sk[PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_SECRETKEYBYTES]) { + unsigned int i; + + for (i = 0; i < SEEDBYTES; ++i) { + rho[i] = sk[i]; + } + sk += SEEDBYTES; + + for (i = 0; i < SEEDBYTES; ++i) { + key[i] = sk[i]; + } + sk += SEEDBYTES; + + for (i = 0; i < CRHBYTES; ++i) { + tr[i] = sk[i]; + } + sk += CRHBYTES; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_polyeta_unpack(&s1->vec[i], sk + i * POLYETA_PACKEDBYTES); + } + sk += L * POLYETA_PACKEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_polyeta_unpack(&s2->vec[i], sk + i * POLYETA_PACKEDBYTES); + } + sk += K * POLYETA_PACKEDBYTES; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_polyt0_unpack(&t0->vec[i], sk + i * POLYT0_PACKEDBYTES); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_pack_sig +* +* Description: Bit-pack signature sig = (c, z, h). +* +* Arguments: - uint8_t sig[]: output byte array +* - const uint8_t *c: pointer to PQCLEAN_DILITHIUM5_CLEAN_challenge hash length SEEDBYTES +* - const polyvecl *z: pointer to vector z +* - const polyveck *h: pointer to hint vector h +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_pack_sig(uint8_t sig[PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_BYTES], + const uint8_t c[SEEDBYTES], + const polyvecl *z, + const polyveck *h) { + unsigned int i, j, k; + + for (i = 0; i < SEEDBYTES; ++i) { + sig[i] = c[i]; + } + sig += SEEDBYTES; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_polyz_pack(sig + i * POLYZ_PACKEDBYTES, &z->vec[i]); + } + sig += L * POLYZ_PACKEDBYTES; + + /* Encode h */ + for (i = 0; i < OMEGA + K; ++i) { + sig[i] = 0; + } + + k = 0; + for (i = 0; i < K; ++i) { + for (j = 0; j < N; ++j) { + if (h->vec[i].coeffs[j] != 0) { + sig[k++] = (uint8_t) j; + } + } + + sig[OMEGA + i] = (uint8_t) k; + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_unpack_sig +* +* Description: Unpack signature sig = (c, z, h). +* +* Arguments: - uint8_t *c: pointer to output PQCLEAN_DILITHIUM5_CLEAN_challenge hash +* - polyvecl *z: pointer to output vector z +* - polyveck *h: pointer to output hint vector h +* - const uint8_t sig[]: byte array containing +* bit-packed signature +* +* Returns 1 in case of malformed signature; otherwise 0. +**************************************************/ +int PQCLEAN_DILITHIUM5_CLEAN_unpack_sig(uint8_t c[SEEDBYTES], + polyvecl *z, + polyveck *h, + const uint8_t sig[PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_BYTES]) { + unsigned int i, j, k; + + for (i = 0; i < SEEDBYTES; ++i) { + c[i] = sig[i]; + } + sig += SEEDBYTES; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_polyz_unpack(&z->vec[i], sig + i * POLYZ_PACKEDBYTES); + } + sig += L * POLYZ_PACKEDBYTES; + + /* Decode h */ + k = 0; + for (i = 0; i < K; ++i) { + for (j = 0; j < N; ++j) { + h->vec[i].coeffs[j] = 0; + } + + if (sig[OMEGA + i] < k || sig[OMEGA + i] > OMEGA) { + return 1; + } + + for (j = k; j < sig[OMEGA + i]; ++j) { + /* Coefficients are ordered for strong unforgeability */ + if (j > k && sig[j] <= sig[j - 1]) { + return 1; + } + h->vec[i].coeffs[sig[j]] = 1; + } + + k = sig[OMEGA + i]; + } + + /* Extra indices are zero for strong unforgeability */ + for (j = k; j < OMEGA; ++j) { + if (sig[j]) { + return 1; + } + } + + return 0; +} diff --git a/crypto_sign/dilithium/dilithium5/clean/packing.h b/crypto_sign/dilithium/dilithium5/clean/packing.h new file mode 100644 index 00000000..f1fa637a --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/clean/packing.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_DILITHIUM5_CLEAN_PACKING_H +#define PQCLEAN_DILITHIUM5_CLEAN_PACKING_H +#include "params.h" +#include "polyvec.h" +#include + +void PQCLEAN_DILITHIUM5_CLEAN_pack_pk(uint8_t pk[PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_PUBLICKEYBYTES], const uint8_t rho[SEEDBYTES], const polyveck *t1); + +void PQCLEAN_DILITHIUM5_CLEAN_pack_sk(uint8_t sk[PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_SECRETKEYBYTES], + const uint8_t rho[SEEDBYTES], + const uint8_t tr[CRHBYTES], + const uint8_t key[SEEDBYTES], + const polyveck *t0, + const polyvecl *s1, + const polyveck *s2); + +void PQCLEAN_DILITHIUM5_CLEAN_pack_sig(uint8_t sig[PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_BYTES], const uint8_t c[SEEDBYTES], const polyvecl *z, const polyveck *h); + +void PQCLEAN_DILITHIUM5_CLEAN_unpack_pk(uint8_t rho[SEEDBYTES], polyveck *t1, const uint8_t pk[PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_PUBLICKEYBYTES]); + +void PQCLEAN_DILITHIUM5_CLEAN_unpack_sk(uint8_t rho[SEEDBYTES], + uint8_t tr[CRHBYTES], + uint8_t key[SEEDBYTES], + polyveck *t0, + polyvecl *s1, + polyveck *s2, + const uint8_t sk[PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_SECRETKEYBYTES]); + +int PQCLEAN_DILITHIUM5_CLEAN_unpack_sig(uint8_t c[SEEDBYTES], polyvecl *z, polyveck *h, const uint8_t sig[PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_BYTES]); + +#endif diff --git a/crypto_sign/dilithium/dilithium5/clean/params.h b/crypto_sign/dilithium/dilithium5/clean/params.h new file mode 100644 index 00000000..f7604a1d --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/clean/params.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_DILITHIUM5_CLEAN_PARAMS_H +#define PQCLEAN_DILITHIUM5_CLEAN_PARAMS_H + + + +#define SEEDBYTES 32 +#define CRHBYTES 48 +#define N 256 +#define Q 8380417 +#define D 13 +#define ROOT_OF_UNITY 1753 + +#define K 8 +#define L 7 +#define ETA 2 +#define TAU 60 +#define BETA 120 +#define GAMMA1 (1 << 19) +#define GAMMA2 ((Q-1)/32) +#define OMEGA 75 +#define PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_ALGNAME "Dilithium5" + + +#define POLYT1_PACKEDBYTES 320 +#define POLYT0_PACKEDBYTES 416 +#define POLYVECH_PACKEDBYTES (OMEGA + K) + +#define POLYZ_PACKEDBYTES 640 + +#define POLYW1_PACKEDBYTES 128 + +#define POLYETA_PACKEDBYTES 96 + +#define PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_PUBLICKEYBYTES (SEEDBYTES + K*POLYT1_PACKEDBYTES) +#define PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_SECRETKEYBYTES (2*SEEDBYTES + CRHBYTES \ + + L*POLYETA_PACKEDBYTES \ + + K*POLYETA_PACKEDBYTES \ + + K*POLYT0_PACKEDBYTES) +#define PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_BYTES (SEEDBYTES + L*POLYZ_PACKEDBYTES + POLYVECH_PACKEDBYTES) + +#endif diff --git a/crypto_sign/dilithium/dilithium5/clean/poly.c b/crypto_sign/dilithium/dilithium5/clean/poly.c new file mode 100644 index 00000000..ff12495b --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/clean/poly.c @@ -0,0 +1,842 @@ +#include "ntt.h" +#include "params.h" +#include "poly.h" +#include "reduce.h" +#include "rounding.h" +#include "symmetric.h" +#include + +#define DBENCH_START() +#define DBENCH_STOP(t) + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_poly_reduce +* +* Description: Inplace reduction of all coefficients of polynomial to +* representative in [-6283009,6283007]. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_poly_reduce(poly *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + a->coeffs[i] = PQCLEAN_DILITHIUM5_CLEAN_reduce32(a->coeffs[i]); + } + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_poly_caddq +* +* Description: For all coefficients of in/out polynomial add Q if +* coefficient is negative. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_poly_caddq(poly *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + a->coeffs[i] = PQCLEAN_DILITHIUM5_CLEAN_caddq(a->coeffs[i]); + } + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_poly_freeze +* +* Description: Inplace reduction of all coefficients of polynomial to +* standard representatives. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_poly_freeze(poly *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + a->coeffs[i] = PQCLEAN_DILITHIUM5_CLEAN_freeze(a->coeffs[i]); + } + + DBENCH_STOP(*tred); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_poly_add +* +* Description: Add polynomials. No modular reduction is performed. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first summand +* - const poly *b: pointer to second summand +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_poly_add(poly *c, const poly *a, const poly *b) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + c->coeffs[i] = a->coeffs[i] + b->coeffs[i]; + } + + DBENCH_STOP(*tadd); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_poly_sub +* +* Description: Subtract polynomials. No modular reduction is +* performed. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial to be +* subtraced from first input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_poly_sub(poly *c, const poly *a, const poly *b) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + c->coeffs[i] = a->coeffs[i] - b->coeffs[i]; + } + + DBENCH_STOP(*tadd); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_poly_shiftl +* +* Description: Multiply polynomial by 2^D without modular reduction. Assumes +* input coefficients to be less than 2^{31-D} in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_poly_shiftl(poly *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + a->coeffs[i] <<= D; + } + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_poly_ntt +* +* Description: Inplace forward NTT. Coefficients can grow by +* 8*Q in absolute value. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_poly_ntt(poly *a) { + DBENCH_START(); + + PQCLEAN_DILITHIUM5_CLEAN_ntt(a->coeffs); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_poly_invntt_tomont +* +* Description: Inplace inverse NTT and multiplication by 2^{32}. +* Input coefficients need to be less than Q in absolute +* value and output coefficients are again bounded by Q. +* +* Arguments: - poly *a: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_poly_invntt_tomont(poly *a) { + DBENCH_START(); + + PQCLEAN_DILITHIUM5_CLEAN_invntt_tomont(a->coeffs); + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_poly_pointwise_montgomery +* +* Description: Pointwise multiplication of polynomials in NTT domain +* representation and multiplication of resulting polynomial +* by 2^{-32}. +* +* Arguments: - poly *c: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_poly_pointwise_montgomery(poly *c, const poly *a, const poly *b) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + c->coeffs[i] = PQCLEAN_DILITHIUM5_CLEAN_montgomery_reduce((int64_t)a->coeffs[i] * b->coeffs[i]); + } + + DBENCH_STOP(*tmul); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_poly_power2round +* +* Description: For all coefficients c of the input polynomial, +* compute c0, c1 such that c mod Q = c1*2^D + c0 +* with -2^{D-1} < c0 <= 2^{D-1}. Assumes coefficients to be +* standard representatives. +* +* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 +* - poly *a0: pointer to output polynomial with coefficients c0 +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_poly_power2round(poly *a1, poly *a0, const poly *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + a1->coeffs[i] = PQCLEAN_DILITHIUM5_CLEAN_power2round(&a0->coeffs[i], a->coeffs[i]); + } + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_poly_decompose +* +* Description: For all coefficients c of the input polynomial, +* compute high and low bits c0, c1 such c mod Q = c1*ALPHA + c0 +* with -ALPHA/2 < c0 <= ALPHA/2 except c1 = (Q-1)/ALPHA where we +* set c1 = 0 and -ALPHA/2 <= c0 = c mod Q - Q < 0. +* Assumes coefficients to be standard representatives. +* +* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 +* - poly *a0: pointer to output polynomial with coefficients c0 +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_poly_decompose(poly *a1, poly *a0, const poly *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + a1->coeffs[i] = PQCLEAN_DILITHIUM5_CLEAN_decompose(&a0->coeffs[i], a->coeffs[i]); + } + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_poly_make_hint +* +* Description: Compute hint polynomial. The coefficients of which indicate +* whether the low bits of the corresponding coefficient of +* the input polynomial overflow into the high bits. +* +* Arguments: - poly *h: pointer to output hint polynomial +* - const poly *a0: pointer to low part of input polynomial +* - const poly *a1: pointer to high part of input polynomial +* +* Returns number of 1 bits. +**************************************************/ +unsigned int PQCLEAN_DILITHIUM5_CLEAN_poly_make_hint(poly *h, const poly *a0, const poly *a1) { + unsigned int i, s = 0; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + h->coeffs[i] = PQCLEAN_DILITHIUM5_CLEAN_make_hint(a0->coeffs[i], a1->coeffs[i]); + s += h->coeffs[i]; + } + + DBENCH_STOP(*tround); + return s; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_poly_use_hint +* +* Description: Use hint polynomial to correct the high bits of a polynomial. +* +* Arguments: - poly *b: pointer to output polynomial with corrected high bits +* - const poly *a: pointer to input polynomial +* - const poly *h: pointer to input hint polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_poly_use_hint(poly *b, const poly *a, const poly *h) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N; ++i) { + b->coeffs[i] = PQCLEAN_DILITHIUM5_CLEAN_use_hint(a->coeffs[i], h->coeffs[i]); + } + + DBENCH_STOP(*tround); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_poly_chknorm +* +* Description: Check infinity norm of polynomial against given bound. +* Assumes input coefficients were reduced by PQCLEAN_DILITHIUM5_CLEAN_reduce32(). +* +* Arguments: - const poly *a: pointer to polynomial +* - int32_t B: norm bound +* +* Returns 0 if norm is strictly smaller than B <= (Q-1)/8 and 1 otherwise. +**************************************************/ +int PQCLEAN_DILITHIUM5_CLEAN_poly_chknorm(const poly *a, int32_t B) { + unsigned int i; + int32_t t; + DBENCH_START(); + + if (B > (Q - 1) / 8) { + return 1; + } + + /* It is ok to leak which coefficient violates the bound since + the probability for each coefficient is independent of secret + data but we must not leak the sign of the centralized representative. */ + for (i = 0; i < N; ++i) { + /* Absolute value */ + t = a->coeffs[i] >> 31; + t = a->coeffs[i] - (t & 2 * a->coeffs[i]); + + if (t >= B) { + DBENCH_STOP(*tsample); + return 1; + } + } + + DBENCH_STOP(*tsample); + return 0; +} + +/************************************************* +* Name: rej_uniform +* +* Description: Sample uniformly random coefficients in [0, Q-1] by +* performing rejection sampling on array of random bytes. +* +* Arguments: - int32_t *a: pointer to output array (allocated) +* - unsigned int len: number of coefficients to be sampled +* - const uint8_t *buf: array of random bytes +* - unsigned int buflen: length of array of random bytes +* +* Returns number of sampled coefficients. Can be smaller than len if not enough +* random bytes were given. +**************************************************/ +static unsigned int rej_uniform(int32_t *a, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) { + unsigned int ctr, pos; + uint32_t t; + DBENCH_START(); + + ctr = pos = 0; + while (ctr < len && pos + 3 <= buflen) { + t = buf[pos++]; + t |= (uint32_t)buf[pos++] << 8; + t |= (uint32_t)buf[pos++] << 16; + t &= 0x7FFFFF; + + if (t < Q) { + a[ctr++] = t; + } + } + + DBENCH_STOP(*tsample); + return ctr; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_poly_uniform +* +* Description: Sample polynomial with uniformly random coefficients +* in [0,Q-1] by performing rejection sampling on the +* output stream of SHAKE256(seed|nonce) or AES256CTR(seed,nonce). +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length SEEDBYTES +* - uint16_t nonce: 2-byte nonce +**************************************************/ +#define POLY_UNIFORM_NBLOCKS ((768 + STREAM128_BLOCKBYTES - 1)/STREAM128_BLOCKBYTES) +void PQCLEAN_DILITHIUM5_CLEAN_poly_uniform(poly *a, + const uint8_t seed[SEEDBYTES], + uint16_t nonce) { + unsigned int i, ctr, off; + unsigned int buflen = POLY_UNIFORM_NBLOCKS * STREAM128_BLOCKBYTES; + uint8_t buf[POLY_UNIFORM_NBLOCKS * STREAM128_BLOCKBYTES + 2]; + stream128_state state; + + stream128_init(&state, seed, nonce); + stream128_squeezeblocks(buf, POLY_UNIFORM_NBLOCKS, &state); + + ctr = rej_uniform(a->coeffs, N, buf, buflen); + + while (ctr < N) { + off = buflen % 3; + for (i = 0; i < off; ++i) { + buf[i] = buf[buflen - off + i]; + } + + stream128_squeezeblocks(buf + off, 1, &state); + buflen = STREAM128_BLOCKBYTES + off; + ctr += rej_uniform(a->coeffs + ctr, N - ctr, buf, buflen); + } + stream128_release(&state); +} + +/************************************************* +* Name: rej_eta +* +* Description: Sample uniformly random coefficients in [-ETA, ETA] by +* performing rejection sampling on array of random bytes. +* +* Arguments: - int32_t *a: pointer to output array (allocated) +* - unsigned int len: number of coefficients to be sampled +* - const uint8_t *buf: array of random bytes +* - unsigned int buflen: length of array of random bytes +* +* Returns number of sampled coefficients. Can be smaller than len if not enough +* random bytes were given. +**************************************************/ +static unsigned int rej_eta(int32_t *a, + unsigned int len, + const uint8_t *buf, + unsigned int buflen) { + unsigned int ctr, pos; + uint32_t t0, t1; + DBENCH_START(); + + ctr = pos = 0; + while (ctr < len && pos < buflen) { + t0 = buf[pos] & 0x0F; + t1 = buf[pos++] >> 4; + + if (t0 < 15) { + t0 = t0 - (205 * t0 >> 10) * 5; + a[ctr++] = 2 - t0; + } + if (t1 < 15 && ctr < len) { + t1 = t1 - (205 * t1 >> 10) * 5; + a[ctr++] = 2 - t1; + } + } + + DBENCH_STOP(*tsample); + return ctr; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_poly_uniform_eta +* +* Description: Sample polynomial with uniformly random coefficients +* in [-ETA,ETA] by performing rejection sampling on the +* output stream from SHAKE256(seed|nonce) or AES256CTR(seed,nonce). +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length SEEDBYTES +* - uint16_t nonce: 2-byte nonce +**************************************************/ +#define POLY_UNIFORM_ETA_NBLOCKS ((136 + STREAM128_BLOCKBYTES - 1)/STREAM128_BLOCKBYTES) +void PQCLEAN_DILITHIUM5_CLEAN_poly_uniform_eta(poly *a, + const uint8_t seed[SEEDBYTES], + uint16_t nonce) { + unsigned int ctr; + unsigned int buflen = POLY_UNIFORM_ETA_NBLOCKS * STREAM128_BLOCKBYTES; + uint8_t buf[POLY_UNIFORM_ETA_NBLOCKS * STREAM128_BLOCKBYTES]; + stream128_state state; + + stream128_init(&state, seed, nonce); + stream128_squeezeblocks(buf, POLY_UNIFORM_ETA_NBLOCKS, &state); + + ctr = rej_eta(a->coeffs, N, buf, buflen); + + while (ctr < N) { + stream128_squeezeblocks(buf, 1, &state); + ctr += rej_eta(a->coeffs + ctr, N - ctr, buf, STREAM128_BLOCKBYTES); + } + stream128_release(&state); +} + +/************************************************* +* Name: poly_uniform_gamma1m1 +* +* Description: Sample polynomial with uniformly random coefficients +* in [-(GAMMA1 - 1), GAMMA1] by unpacking output stream +* of SHAKE256(seed|nonce) or AES256CTR(seed,nonce). +* +* Arguments: - poly *a: pointer to output polynomial +* - const uint8_t seed[]: byte array with seed of length CRHBYTES +* - uint16_t nonce: 16-bit nonce +**************************************************/ +#define POLY_UNIFORM_GAMMA1_NBLOCKS ((POLYZ_PACKEDBYTES + STREAM256_BLOCKBYTES - 1)/STREAM256_BLOCKBYTES) +void PQCLEAN_DILITHIUM5_CLEAN_poly_uniform_gamma1(poly *a, + const uint8_t seed[CRHBYTES], + uint16_t nonce) { + uint8_t buf[POLY_UNIFORM_GAMMA1_NBLOCKS * STREAM256_BLOCKBYTES]; + stream256_state state; + + stream256_init(&state, seed, nonce); + stream256_squeezeblocks(buf, POLY_UNIFORM_GAMMA1_NBLOCKS, &state); + stream256_release(&state); + PQCLEAN_DILITHIUM5_CLEAN_polyz_unpack(a, buf); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_challenge +* +* Description: Implementation of H. Samples polynomial with TAU nonzero +* coefficients in {-1,1} using the output stream of +* SHAKE256(seed). +* +* Arguments: - poly *c: pointer to output polynomial +* - const uint8_t mu[]: byte array containing seed of length SEEDBYTES +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_poly_challenge(poly *c, const uint8_t seed[SEEDBYTES]) { + unsigned int i, b, pos; + uint64_t signs; + uint8_t buf[SHAKE256_RATE]; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, seed, SEEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, sizeof buf, &state); + + signs = 0; + for (i = 0; i < 8; ++i) { + signs |= (uint64_t)buf[i] << 8 * i; + } + pos = 8; + + for (i = 0; i < N; ++i) { + c->coeffs[i] = 0; + } + for (i = N - TAU; i < N; ++i) { + do { + if (pos >= SHAKE256_RATE) { + shake256_inc_squeeze(buf, sizeof buf, &state); + pos = 0; + } + + b = buf[pos++]; + } while (b > i); + + c->coeffs[i] = c->coeffs[b]; + c->coeffs[b] = 1 - 2 * (signs & 1); + signs >>= 1; + } + shake256_inc_ctx_release(&state); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyeta_pack +* +* Description: Bit-pack polynomial with coefficients in [-ETA,ETA]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYETA_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_polyeta_pack(uint8_t *r, const poly *a) { + unsigned int i; + uint8_t t[8]; + DBENCH_START(); + + for (i = 0; i < N / 8; ++i) { + t[0] = (uint8_t) (ETA - a->coeffs[8 * i + 0]); + t[1] = (uint8_t) (ETA - a->coeffs[8 * i + 1]); + t[2] = (uint8_t) (ETA - a->coeffs[8 * i + 2]); + t[3] = (uint8_t) (ETA - a->coeffs[8 * i + 3]); + t[4] = (uint8_t) (ETA - a->coeffs[8 * i + 4]); + t[5] = (uint8_t) (ETA - a->coeffs[8 * i + 5]); + t[6] = (uint8_t) (ETA - a->coeffs[8 * i + 6]); + t[7] = (uint8_t) (ETA - a->coeffs[8 * i + 7]); + + r[3 * i + 0] = (t[0] >> 0) | (t[1] << 3) | (t[2] << 6); + r[3 * i + 1] = (t[2] >> 2) | (t[3] << 1) | (t[4] << 4) | (t[5] << 7); + r[3 * i + 2] = (t[5] >> 1) | (t[6] << 2) | (t[7] << 5); + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyeta_unpack +* +* Description: Unpack polynomial with coefficients in [-ETA,ETA]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_polyeta_unpack(poly *r, const uint8_t *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 8; ++i) { + r->coeffs[8 * i + 0] = (a[3 * i + 0] >> 0) & 7; + r->coeffs[8 * i + 1] = (a[3 * i + 0] >> 3) & 7; + r->coeffs[8 * i + 2] = ((a[3 * i + 0] >> 6) | (a[3 * i + 1] << 2)) & 7; + r->coeffs[8 * i + 3] = (a[3 * i + 1] >> 1) & 7; + r->coeffs[8 * i + 4] = (a[3 * i + 1] >> 4) & 7; + r->coeffs[8 * i + 5] = ((a[3 * i + 1] >> 7) | (a[3 * i + 2] << 1)) & 7; + r->coeffs[8 * i + 6] = (a[3 * i + 2] >> 2) & 7; + r->coeffs[8 * i + 7] = (a[3 * i + 2] >> 5) & 7; + + r->coeffs[8 * i + 0] = ETA - r->coeffs[8 * i + 0]; + r->coeffs[8 * i + 1] = ETA - r->coeffs[8 * i + 1]; + r->coeffs[8 * i + 2] = ETA - r->coeffs[8 * i + 2]; + r->coeffs[8 * i + 3] = ETA - r->coeffs[8 * i + 3]; + r->coeffs[8 * i + 4] = ETA - r->coeffs[8 * i + 4]; + r->coeffs[8 * i + 5] = ETA - r->coeffs[8 * i + 5]; + r->coeffs[8 * i + 6] = ETA - r->coeffs[8 * i + 6]; + r->coeffs[8 * i + 7] = ETA - r->coeffs[8 * i + 7]; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyt1_pack +* +* Description: Bit-pack polynomial t1 with coefficients fitting in 10 bits. +* Input coefficients are assumed to be standard representatives. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYT1_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_polyt1_pack(uint8_t *r, const poly *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 4; ++i) { + r[5 * i + 0] = (uint8_t) (a->coeffs[4 * i + 0] >> 0); + r[5 * i + 1] = (uint8_t) ((a->coeffs[4 * i + 0] >> 8) | (a->coeffs[4 * i + 1] << 2)); + r[5 * i + 2] = (uint8_t) ((a->coeffs[4 * i + 1] >> 6) | (a->coeffs[4 * i + 2] << 4)); + r[5 * i + 3] = (uint8_t) ((a->coeffs[4 * i + 2] >> 4) | (a->coeffs[4 * i + 3] << 6)); + r[5 * i + 4] = (uint8_t) (a->coeffs[4 * i + 3] >> 2); + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyt1_unpack +* +* Description: Unpack polynomial t1 with 10-bit coefficients. +* Output coefficients are standard representatives. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_polyt1_unpack(poly *r, const uint8_t *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 4; ++i) { + r->coeffs[4 * i + 0] = ((a[5 * i + 0] >> 0) | ((uint32_t)a[5 * i + 1] << 8)) & 0x3FF; + r->coeffs[4 * i + 1] = ((a[5 * i + 1] >> 2) | ((uint32_t)a[5 * i + 2] << 6)) & 0x3FF; + r->coeffs[4 * i + 2] = ((a[5 * i + 2] >> 4) | ((uint32_t)a[5 * i + 3] << 4)) & 0x3FF; + r->coeffs[4 * i + 3] = ((a[5 * i + 3] >> 6) | ((uint32_t)a[5 * i + 4] << 2)) & 0x3FF; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyt0_pack +* +* Description: Bit-pack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYT0_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_polyt0_pack(uint8_t *r, const poly *a) { + unsigned int i; + uint32_t t[8]; + DBENCH_START(); + + for (i = 0; i < N / 8; ++i) { + t[0] = (1 << (D - 1)) - a->coeffs[8 * i + 0]; + t[1] = (1 << (D - 1)) - a->coeffs[8 * i + 1]; + t[2] = (1 << (D - 1)) - a->coeffs[8 * i + 2]; + t[3] = (1 << (D - 1)) - a->coeffs[8 * i + 3]; + t[4] = (1 << (D - 1)) - a->coeffs[8 * i + 4]; + t[5] = (1 << (D - 1)) - a->coeffs[8 * i + 5]; + t[6] = (1 << (D - 1)) - a->coeffs[8 * i + 6]; + t[7] = (1 << (D - 1)) - a->coeffs[8 * i + 7]; + + r[13 * i + 0] = (uint8_t) t[0]; + r[13 * i + 1] = (uint8_t) (t[0] >> 8); + r[13 * i + 1] |= (uint8_t) (t[1] << 5); + r[13 * i + 2] = (uint8_t) (t[1] >> 3); + r[13 * i + 3] = (uint8_t) (t[1] >> 11); + r[13 * i + 3] |= (uint8_t) (t[2] << 2); + r[13 * i + 4] = (uint8_t) (t[2] >> 6); + r[13 * i + 4] |= (uint8_t) (t[3] << 7); + r[13 * i + 5] = (uint8_t) (t[3] >> 1); + r[13 * i + 6] = (uint8_t) (t[3] >> 9); + r[13 * i + 6] |= (uint8_t) (t[4] << 4); + r[13 * i + 7] = (uint8_t) (t[4] >> 4); + r[13 * i + 8] = (uint8_t) (t[4] >> 12); + r[13 * i + 8] |= (uint8_t) (t[5] << 1); + r[13 * i + 9] = (uint8_t) (t[5] >> 7); + r[13 * i + 9] |= (uint8_t) (t[6] << 6); + r[13 * i + 10] = (uint8_t) (t[6] >> 2); + r[13 * i + 11] = (uint8_t) (t[6] >> 10); + r[13 * i + 11] |= (uint8_t) (t[7] << 3); + r[13 * i + 12] = (uint8_t) (t[7] >> 5); + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyt0_unpack +* +* Description: Unpack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_polyt0_unpack(poly *r, const uint8_t *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 8; ++i) { + r->coeffs[8 * i + 0] = a[13 * i + 0]; + r->coeffs[8 * i + 0] |= (uint32_t)a[13 * i + 1] << 8; + r->coeffs[8 * i + 0] &= 0x1FFF; + + r->coeffs[8 * i + 1] = a[13 * i + 1] >> 5; + r->coeffs[8 * i + 1] |= (uint32_t)a[13 * i + 2] << 3; + r->coeffs[8 * i + 1] |= (uint32_t)a[13 * i + 3] << 11; + r->coeffs[8 * i + 1] &= 0x1FFF; + + r->coeffs[8 * i + 2] = a[13 * i + 3] >> 2; + r->coeffs[8 * i + 2] |= (uint32_t)a[13 * i + 4] << 6; + r->coeffs[8 * i + 2] &= 0x1FFF; + + r->coeffs[8 * i + 3] = a[13 * i + 4] >> 7; + r->coeffs[8 * i + 3] |= (uint32_t)a[13 * i + 5] << 1; + r->coeffs[8 * i + 3] |= (uint32_t)a[13 * i + 6] << 9; + r->coeffs[8 * i + 3] &= 0x1FFF; + + r->coeffs[8 * i + 4] = a[13 * i + 6] >> 4; + r->coeffs[8 * i + 4] |= (uint32_t)a[13 * i + 7] << 4; + r->coeffs[8 * i + 4] |= (uint32_t)a[13 * i + 8] << 12; + r->coeffs[8 * i + 4] &= 0x1FFF; + + r->coeffs[8 * i + 5] = a[13 * i + 8] >> 1; + r->coeffs[8 * i + 5] |= (uint32_t)a[13 * i + 9] << 7; + r->coeffs[8 * i + 5] &= 0x1FFF; + + r->coeffs[8 * i + 6] = a[13 * i + 9] >> 6; + r->coeffs[8 * i + 6] |= (uint32_t)a[13 * i + 10] << 2; + r->coeffs[8 * i + 6] |= (uint32_t)a[13 * i + 11] << 10; + r->coeffs[8 * i + 6] &= 0x1FFF; + + r->coeffs[8 * i + 7] = a[13 * i + 11] >> 3; + r->coeffs[8 * i + 7] |= (uint32_t)a[13 * i + 12] << 5; + r->coeffs[8 * i + 7] &= 0x1FFF; + + r->coeffs[8 * i + 0] = (1 << (D - 1)) - r->coeffs[8 * i + 0]; + r->coeffs[8 * i + 1] = (1 << (D - 1)) - r->coeffs[8 * i + 1]; + r->coeffs[8 * i + 2] = (1 << (D - 1)) - r->coeffs[8 * i + 2]; + r->coeffs[8 * i + 3] = (1 << (D - 1)) - r->coeffs[8 * i + 3]; + r->coeffs[8 * i + 4] = (1 << (D - 1)) - r->coeffs[8 * i + 4]; + r->coeffs[8 * i + 5] = (1 << (D - 1)) - r->coeffs[8 * i + 5]; + r->coeffs[8 * i + 6] = (1 << (D - 1)) - r->coeffs[8 * i + 6]; + r->coeffs[8 * i + 7] = (1 << (D - 1)) - r->coeffs[8 * i + 7]; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyz_pack +* +* Description: Bit-pack polynomial with coefficients +* in [-(GAMMA1 - 1), GAMMA1]. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYZ_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_polyz_pack(uint8_t *r, const poly *a) { + unsigned int i; + uint32_t t[4]; + DBENCH_START(); + + for (i = 0; i < N / 2; ++i) { + t[0] = GAMMA1 - a->coeffs[2 * i + 0]; + t[1] = GAMMA1 - a->coeffs[2 * i + 1]; + + r[5 * i + 0] = (uint8_t) t[0]; + r[5 * i + 1] = (uint8_t) (t[0] >> 8); + r[5 * i + 2] = (uint8_t) (t[0] >> 16); + r[5 * i + 2] |= (uint8_t) (t[1] << 4); + r[5 * i + 3] = (uint8_t) (t[1] >> 4); + r[5 * i + 4] = (uint8_t) (t[1] >> 12); + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyz_unpack +* +* Description: Unpack polynomial z with coefficients +* in [-(GAMMA1 - 1), GAMMA1]. +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: byte array with bit-packed polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_polyz_unpack(poly *r, const uint8_t *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 2; ++i) { + r->coeffs[2 * i + 0] = a[5 * i + 0]; + r->coeffs[2 * i + 0] |= (uint32_t)a[5 * i + 1] << 8; + r->coeffs[2 * i + 0] |= (uint32_t)a[5 * i + 2] << 16; + r->coeffs[2 * i + 0] &= 0xFFFFF; + + r->coeffs[2 * i + 1] = a[5 * i + 2] >> 4; + r->coeffs[2 * i + 1] |= (uint32_t)a[5 * i + 3] << 4; + r->coeffs[2 * i + 1] |= (uint32_t)a[5 * i + 4] << 12; + r->coeffs[2 * i + 0] &= 0xFFFFF; + + r->coeffs[2 * i + 0] = GAMMA1 - r->coeffs[2 * i + 0]; + r->coeffs[2 * i + 1] = GAMMA1 - r->coeffs[2 * i + 1]; + } + + DBENCH_STOP(*tpack); +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyw1_pack +* +* Description: Bit-pack polynomial w1 with coefficients in [0,15] or [0,43]. +* Input coefficients are assumed to be standard representatives. +* +* Arguments: - uint8_t *r: pointer to output byte array with at least +* POLYW1_PACKEDBYTES bytes +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_polyw1_pack(uint8_t *r, const poly *a) { + unsigned int i; + DBENCH_START(); + + for (i = 0; i < N / 2; ++i) { + r[i] = (uint8_t) (a->coeffs[2 * i + 0] | (a->coeffs[2 * i + 1] << 4)); + } + + DBENCH_STOP(*tpack); +} diff --git a/crypto_sign/dilithium/dilithium5/clean/poly.h b/crypto_sign/dilithium/dilithium5/clean/poly.h new file mode 100644 index 00000000..f768f63e --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/clean/poly.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_DILITHIUM5_CLEAN_POLY_H +#define PQCLEAN_DILITHIUM5_CLEAN_POLY_H +#include "params.h" +#include + +typedef struct { + int32_t coeffs[N]; +} poly; + +void PQCLEAN_DILITHIUM5_CLEAN_poly_reduce(poly *a); +void PQCLEAN_DILITHIUM5_CLEAN_poly_caddq(poly *a); +void PQCLEAN_DILITHIUM5_CLEAN_poly_freeze(poly *a); + +void PQCLEAN_DILITHIUM5_CLEAN_poly_add(poly *c, const poly *a, const poly *b); +void PQCLEAN_DILITHIUM5_CLEAN_poly_sub(poly *c, const poly *a, const poly *b); +void PQCLEAN_DILITHIUM5_CLEAN_poly_shiftl(poly *a); + +void PQCLEAN_DILITHIUM5_CLEAN_poly_ntt(poly *a); +void PQCLEAN_DILITHIUM5_CLEAN_poly_invntt_tomont(poly *a); +void PQCLEAN_DILITHIUM5_CLEAN_poly_pointwise_montgomery(poly *c, const poly *a, const poly *b); + +void PQCLEAN_DILITHIUM5_CLEAN_poly_power2round(poly *a1, poly *a0, const poly *a); +void PQCLEAN_DILITHIUM5_CLEAN_poly_decompose(poly *a1, poly *a0, const poly *a); +unsigned int PQCLEAN_DILITHIUM5_CLEAN_poly_make_hint(poly *h, const poly *a0, const poly *a1); +void PQCLEAN_DILITHIUM5_CLEAN_poly_use_hint(poly *b, const poly *a, const poly *h); + +int PQCLEAN_DILITHIUM5_CLEAN_poly_chknorm(const poly *a, int32_t B); +void PQCLEAN_DILITHIUM5_CLEAN_poly_uniform(poly *a, + const uint8_t seed[SEEDBYTES], + uint16_t nonce); +void PQCLEAN_DILITHIUM5_CLEAN_poly_uniform_eta(poly *a, + const uint8_t seed[SEEDBYTES], + uint16_t nonce); +void PQCLEAN_DILITHIUM5_CLEAN_poly_uniform_gamma1(poly *a, + const uint8_t seed[CRHBYTES], + uint16_t nonce); +void PQCLEAN_DILITHIUM5_CLEAN_poly_challenge(poly *c, const uint8_t seed[SEEDBYTES]); + +void PQCLEAN_DILITHIUM5_CLEAN_polyeta_pack(uint8_t *r, const poly *a); +void PQCLEAN_DILITHIUM5_CLEAN_polyeta_unpack(poly *r, const uint8_t *a); + +void PQCLEAN_DILITHIUM5_CLEAN_polyt1_pack(uint8_t *r, const poly *a); +void PQCLEAN_DILITHIUM5_CLEAN_polyt1_unpack(poly *r, const uint8_t *a); + +void PQCLEAN_DILITHIUM5_CLEAN_polyt0_pack(uint8_t *r, const poly *a); +void PQCLEAN_DILITHIUM5_CLEAN_polyt0_unpack(poly *r, const uint8_t *a); + +void PQCLEAN_DILITHIUM5_CLEAN_polyz_pack(uint8_t *r, const poly *a); +void PQCLEAN_DILITHIUM5_CLEAN_polyz_unpack(poly *r, const uint8_t *a); + +void PQCLEAN_DILITHIUM5_CLEAN_polyw1_pack(uint8_t *r, const poly *a); + +#endif diff --git a/crypto_sign/dilithium/dilithium5/clean/polyvec.c b/crypto_sign/dilithium/dilithium5/clean/polyvec.c new file mode 100644 index 00000000..e996d1af --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/clean/polyvec.c @@ -0,0 +1,448 @@ +#include "params.h" +#include "poly.h" +#include "polyvec.h" +#include + +/************************************************* +* Name: expand_mat +* +* Description: Implementation of ExpandA. Generates matrix A with uniformly +* random coefficients a_{i,j} by performing rejection +* sampling on the output stream of SHAKE128(rho|j|i) +* or AES256CTR(rho,j|i). +* +* Arguments: - polyvecl mat[K]: output matrix +* - const uint8_t rho[]: byte array containing seed rho +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]) { + unsigned int i, j; + + for (i = 0; i < K; ++i) { + for (j = 0; j < L; ++j) { + PQCLEAN_DILITHIUM5_CLEAN_poly_uniform(&mat[i].vec[j], rho, (uint16_t) ((i << 8) + j)); + } + } +} + +void PQCLEAN_DILITHIUM5_CLEAN_polyvec_matrix_pointwise_montgomery(polyveck *t, const polyvecl mat[K], const polyvecl *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_polyvecl_pointwise_acc_montgomery(&t->vec[i], &mat[i], v); + } +} + +/**************************************************************/ +/************ Vectors of polynomials of length L **************/ +/**************************************************************/ + +void PQCLEAN_DILITHIUM5_CLEAN_polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_poly_uniform_eta(&v->vec[i], seed, nonce++); + } +} + +void PQCLEAN_DILITHIUM5_CLEAN_polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_poly_uniform_gamma1(&v->vec[i], seed, (uint16_t) (L * nonce + i)); + } +} + +void PQCLEAN_DILITHIUM5_CLEAN_polyvecl_reduce(polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_poly_reduce(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyvecl_freeze +* +* Description: Reduce coefficients of polynomials in vector of length L +* to standard representatives. +* +* Arguments: - polyvecl *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_polyvecl_freeze(polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_poly_freeze(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyvecl_add +* +* Description: Add vectors of polynomials of length L. +* No modular reduction is performed. +* +* Arguments: - polyvecl *w: pointer to output vector +* - const polyvecl *u: pointer to first summand +* - const polyvecl *v: pointer to second summand +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyvecl_ntt +* +* Description: Forward NTT of all polynomials in vector of length L. Output +* coefficients can be up to 16*Q larger than input coefficients. +* +* Arguments: - polyvecl *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_polyvecl_ntt(polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_poly_ntt(&v->vec[i]); + } +} + +void PQCLEAN_DILITHIUM5_CLEAN_polyvecl_invntt_tomont(polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_poly_invntt_tomont(&v->vec[i]); + } +} + +void PQCLEAN_DILITHIUM5_CLEAN_polyvecl_pointwise_poly_montgomery(polyvecl *r, const poly *a, const polyvecl *v) { + unsigned int i; + + for (i = 0; i < L; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_poly_pointwise_montgomery(&r->vec[i], a, &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyvecl_pointwise_acc_montgomery +* +* Description: Pointwise multiply vectors of polynomials of length L, multiply +* resulting vector by 2^{-32} and add (accumulate) polynomials +* in it. Input/output vectors are in NTT domain representation. +* +* Arguments: - poly *w: output polynomial +* - const polyvecl *u: pointer to first input vector +* - const polyvecl *v: pointer to second input vector +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_polyvecl_pointwise_acc_montgomery(poly *w, + const polyvecl *u, + const polyvecl *v) { + unsigned int i; + poly t; + + PQCLEAN_DILITHIUM5_CLEAN_poly_pointwise_montgomery(w, &u->vec[0], &v->vec[0]); + for (i = 1; i < L; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_poly_pointwise_montgomery(&t, &u->vec[i], &v->vec[i]); + PQCLEAN_DILITHIUM5_CLEAN_poly_add(w, w, &t); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyvecl_chknorm +* +* Description: Check infinity norm of polynomials in vector of length L. +* Assumes input polyvecl to be reduced by PQCLEAN_DILITHIUM5_CLEAN_polyvecl_reduce(). +* +* Arguments: - const polyvecl *v: pointer to vector +* - int32_t B: norm bound +* +* Returns 0 if norm of all polynomials is strictly smaller than B <= (Q-1)/8 +* and 1 otherwise. +**************************************************/ +int PQCLEAN_DILITHIUM5_CLEAN_polyvecl_chknorm(const polyvecl *v, int32_t bound) { + unsigned int i; + + for (i = 0; i < L; ++i) { + if (PQCLEAN_DILITHIUM5_CLEAN_poly_chknorm(&v->vec[i], bound)) { + return 1; + } + } + + return 0; +} + +/**************************************************************/ +/************ Vectors of polynomials of length K **************/ +/**************************************************************/ + +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_uniform_eta(polyveck *v, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_poly_uniform_eta(&v->vec[i], seed, nonce++); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyveck_reduce +* +* Description: Reduce coefficients of polynomials in vector of length K +* to representatives in [-6283009,6283007]. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_reduce(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_poly_reduce(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyveck_caddq +* +* Description: For all coefficients of polynomials in vector of length K +* add Q if coefficient is negative. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_caddq(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_poly_caddq(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyveck_freeze +* +* Description: Reduce coefficients of polynomials in vector of length K +* to standard representatives. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_freeze(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_poly_freeze(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyveck_add +* +* Description: Add vectors of polynomials of length K. +* No modular reduction is performed. +* +* Arguments: - polyveck *w: pointer to output vector +* - const polyveck *u: pointer to first summand +* - const polyveck *v: pointer to second summand +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_add(polyveck *w, const polyveck *u, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyveck_sub +* +* Description: Subtract vectors of polynomials of length K. +* No modular reduction is performed. +* +* Arguments: - polyveck *w: pointer to output vector +* - const polyveck *u: pointer to first input vector +* - const polyveck *v: pointer to second input vector to be +* subtracted from first input vector +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_poly_sub(&w->vec[i], &u->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyveck_shiftl +* +* Description: Multiply vector of polynomials of Length K by 2^D without modular +* reduction. Assumes input coefficients to be less than 2^{31-D}. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_shiftl(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_poly_shiftl(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyveck_ntt +* +* Description: Forward NTT of all polynomials in vector of length K. Output +* coefficients can be up to 16*Q larger than input coefficients. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_ntt(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_poly_ntt(&v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyveck_invntt_tomont +* +* Description: Inverse NTT and multiplication by 2^{32} of polynomials +* in vector of length K. Input coefficients need to be less +* than 2*Q. +* +* Arguments: - polyveck *v: pointer to input/output vector +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_invntt_tomont(polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_poly_invntt_tomont(&v->vec[i]); + } +} + +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_pointwise_poly_montgomery(polyveck *r, const poly *a, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_poly_pointwise_montgomery(&r->vec[i], a, &v->vec[i]); + } +} + + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyveck_chknorm +* +* Description: Check infinity norm of polynomials in vector of length K. +* Assumes input polyveck to be reduced by PQCLEAN_DILITHIUM5_CLEAN_polyveck_reduce(). +* +* Arguments: - const polyveck *v: pointer to vector +* - int32_t B: norm bound +* +* Returns 0 if norm of all polynomials are strictly smaller than B <= (Q-1)/8 +* and 1 otherwise. +**************************************************/ +int PQCLEAN_DILITHIUM5_CLEAN_polyveck_chknorm(const polyveck *v, int32_t bound) { + unsigned int i; + + for (i = 0; i < K; ++i) { + if (PQCLEAN_DILITHIUM5_CLEAN_poly_chknorm(&v->vec[i], bound)) { + return 1; + } + } + + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyveck_power2round +* +* Description: For all coefficients a of polynomials in vector of length K, +* compute a0, a1 such that a mod^+ Q = a1*2^D + a0 +* with -2^{D-1} < a0 <= 2^{D-1}. Assumes coefficients to be +* standard representatives. +* +* Arguments: - polyveck *v1: pointer to output vector of polynomials with +* coefficients a1 +* - polyveck *v0: pointer to output vector of polynomials with +* coefficients a0 +* - const polyveck *v: pointer to input vector +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_poly_power2round(&v1->vec[i], &v0->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyveck_decompose +* +* Description: For all coefficients a of polynomials in vector of length K, +* compute high and low bits a0, a1 such a mod^+ Q = a1*ALPHA + a0 +* with -ALPHA/2 < a0 <= ALPHA/2 except a1 = (Q-1)/ALPHA where we +* set a1 = 0 and -ALPHA/2 <= a0 = a mod Q - Q < 0. +* Assumes coefficients to be standard representatives. +* +* Arguments: - polyveck *v1: pointer to output vector of polynomials with +* coefficients a1 +* - polyveck *v0: pointer to output vector of polynomials with +* coefficients a0 +* - const polyveck *v: pointer to input vector +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_poly_decompose(&v1->vec[i], &v0->vec[i], &v->vec[i]); + } +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyveck_make_hint +* +* Description: Compute hint vector. +* +* Arguments: - polyveck *h: pointer to output vector +* - const polyveck *v0: pointer to low part of input vector +* - const polyveck *v1: pointer to high part of input vector +* +* Returns number of 1 bits. +**************************************************/ +unsigned int PQCLEAN_DILITHIUM5_CLEAN_polyveck_make_hint(polyveck *h, + const polyveck *v0, + const polyveck *v1) { + unsigned int i, s = 0; + + for (i = 0; i < K; ++i) { + s += PQCLEAN_DILITHIUM5_CLEAN_poly_make_hint(&h->vec[i], &v0->vec[i], &v1->vec[i]); + } + + return s; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_polyveck_use_hint +* +* Description: Use hint vector to correct the high bits of input vector. +* +* Arguments: - polyveck *w: pointer to output vector of polynomials with +* corrected high bits +* - const polyveck *u: pointer to input vector +* - const polyveck *h: pointer to input hint vector +**************************************************/ +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_use_hint(polyveck *w, const polyveck *u, const polyveck *h) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_poly_use_hint(&w->vec[i], &u->vec[i], &h->vec[i]); + } +} + +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_pack_w1(uint8_t r[K * POLYW1_PACKEDBYTES], const polyveck *w1) { + unsigned int i; + + for (i = 0; i < K; ++i) { + PQCLEAN_DILITHIUM5_CLEAN_polyw1_pack(&r[i * POLYW1_PACKEDBYTES], &w1->vec[i]); + } +} diff --git a/crypto_sign/dilithium/dilithium5/clean/polyvec.h b/crypto_sign/dilithium/dilithium5/clean/polyvec.h new file mode 100644 index 00000000..9d564f67 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/clean/polyvec.h @@ -0,0 +1,68 @@ +#ifndef PQCLEAN_DILITHIUM5_CLEAN_POLYVEC_H +#define PQCLEAN_DILITHIUM5_CLEAN_POLYVEC_H +#include "params.h" +#include "poly.h" +#include + +/* Vectors of polynomials of length L */ +typedef struct { + poly vec[L]; +} polyvecl; + +void PQCLEAN_DILITHIUM5_CLEAN_polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[SEEDBYTES], uint16_t nonce); + +void PQCLEAN_DILITHIUM5_CLEAN_polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[CRHBYTES], uint16_t nonce); + +void PQCLEAN_DILITHIUM5_CLEAN_polyvecl_reduce(polyvecl *v); + +void PQCLEAN_DILITHIUM5_CLEAN_polyvecl_freeze(polyvecl *v); + +void PQCLEAN_DILITHIUM5_CLEAN_polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v); + +void PQCLEAN_DILITHIUM5_CLEAN_polyvecl_ntt(polyvecl *v); +void PQCLEAN_DILITHIUM5_CLEAN_polyvecl_invntt_tomont(polyvecl *v); +void PQCLEAN_DILITHIUM5_CLEAN_polyvecl_pointwise_poly_montgomery(polyvecl *r, const poly *a, const polyvecl *v); +void PQCLEAN_DILITHIUM5_CLEAN_polyvecl_pointwise_acc_montgomery(poly *w, + const polyvecl *u, + const polyvecl *v); + + +int PQCLEAN_DILITHIUM5_CLEAN_polyvecl_chknorm(const polyvecl *v, int32_t B); + + + +/* Vectors of polynomials of length K */ +typedef struct { + poly vec[K]; +} polyveck; + +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_uniform_eta(polyveck *v, const uint8_t seed[SEEDBYTES], uint16_t nonce); + +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_reduce(polyveck *v); +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_caddq(polyveck *v); +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_freeze(polyveck *v); + +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_add(polyveck *w, const polyveck *u, const polyveck *v); +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v); +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_shiftl(polyveck *v); + +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_ntt(polyveck *v); +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_invntt_tomont(polyveck *v); +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_pointwise_poly_montgomery(polyveck *r, const poly *a, const polyveck *v); + +int PQCLEAN_DILITHIUM5_CLEAN_polyveck_chknorm(const polyveck *v, int32_t B); + +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v); +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v); +unsigned int PQCLEAN_DILITHIUM5_CLEAN_polyveck_make_hint(polyveck *h, + const polyveck *v0, + const polyveck *v1); +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_use_hint(polyveck *w, const polyveck *u, const polyveck *h); + +void PQCLEAN_DILITHIUM5_CLEAN_polyveck_pack_w1(uint8_t r[K * POLYW1_PACKEDBYTES], const polyveck *w1); + +void PQCLEAN_DILITHIUM5_CLEAN_polyvec_matrix_expand(polyvecl mat[K], const uint8_t rho[SEEDBYTES]); + +void PQCLEAN_DILITHIUM5_CLEAN_polyvec_matrix_pointwise_montgomery(polyveck *t, const polyvecl mat[K], const polyvecl *v); + +#endif diff --git a/crypto_sign/dilithium/dilithium5/clean/reduce.c b/crypto_sign/dilithium/dilithium5/clean/reduce.c new file mode 100644 index 00000000..ded7c28b --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/clean/reduce.c @@ -0,0 +1,69 @@ +#include "params.h" +#include "reduce.h" +#include + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_montgomery_reduce +* +* Description: For finite field element a with -2^{31}Q <= a <= Q*2^31, +* compute r \equiv a*2^{-32} (mod Q) such that -Q < r < Q. +* +* Arguments: - int64_t: finite field element a +* +* Returns r. +**************************************************/ +int32_t PQCLEAN_DILITHIUM5_CLEAN_montgomery_reduce(int64_t a) { + int32_t t; + + t = (int32_t)((uint64_t)a * (uint64_t)QINV); + t = (a - (int64_t)t * Q) >> 32; + return t; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_reduce32 +* +* Description: For finite field element a with a <= 2^{31} - 2^{22} - 1, +* compute r \equiv a (mod Q) such that -6283009 <= r <= 6283007. +* +* Arguments: - int32_t: finite field element a +* +* Returns r. +**************************************************/ +int32_t PQCLEAN_DILITHIUM5_CLEAN_reduce32(int32_t a) { + int32_t t; + + t = (a + (1 << 22)) >> 23; + t = a - t * Q; + return t; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_caddq +* +* Description: Add Q if input coefficient is negative. +* +* Arguments: - int32_t: finite field element a +* +* Returns r. +**************************************************/ +int32_t PQCLEAN_DILITHIUM5_CLEAN_caddq(int32_t a) { + a += (a >> 31) & Q; + return a; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_freeze +* +* Description: For finite field element a, compute standard +* representative r = a mod^+ Q. +* +* Arguments: - int32_t: finite field element a +* +* Returns r. +**************************************************/ +int32_t PQCLEAN_DILITHIUM5_CLEAN_freeze(int32_t a) { + a = PQCLEAN_DILITHIUM5_CLEAN_reduce32(a); + a = PQCLEAN_DILITHIUM5_CLEAN_caddq(a); + return a; +} diff --git a/crypto_sign/dilithium/dilithium5/clean/reduce.h b/crypto_sign/dilithium/dilithium5/clean/reduce.h new file mode 100644 index 00000000..4448149a --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/clean/reduce.h @@ -0,0 +1,17 @@ +#ifndef PQCLEAN_DILITHIUM5_CLEAN_REDUCE_H +#define PQCLEAN_DILITHIUM5_CLEAN_REDUCE_H +#include "params.h" +#include + +#define MONT (-4186625) // 2^32 % Q +#define QINV 58728449 // q^(-1) mod 2^32 + +int32_t PQCLEAN_DILITHIUM5_CLEAN_montgomery_reduce(int64_t a); + +int32_t PQCLEAN_DILITHIUM5_CLEAN_reduce32(int32_t a); + +int32_t PQCLEAN_DILITHIUM5_CLEAN_caddq(int32_t a); + +int32_t PQCLEAN_DILITHIUM5_CLEAN_freeze(int32_t a); + +#endif diff --git a/crypto_sign/dilithium/dilithium5/clean/rounding.c b/crypto_sign/dilithium/dilithium5/clean/rounding.c new file mode 100644 index 00000000..7a8e3b01 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/clean/rounding.c @@ -0,0 +1,92 @@ +#include "params.h" +#include "rounding.h" +#include + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_power2round +* +* Description: For finite field element a, compute a0, a1 such that +* a mod^+ Q = a1*2^D + a0 with -2^{D-1} < a0 <= 2^{D-1}. +* Assumes a to be standard representative. +* +* Arguments: - int32_t a: input element +* - int32_t *a0: pointer to output element a0 +* +* Returns a1. +**************************************************/ +int32_t PQCLEAN_DILITHIUM5_CLEAN_power2round(int32_t *a0, int32_t a) { + int32_t a1; + + a1 = (a + (1 << (D - 1)) - 1) >> D; + *a0 = a - (a1 << D); + return a1; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_decompose +* +* Description: For finite field element a, compute high and low bits a0, a1 such +* that a mod^+ Q = a1*ALPHA + a0 with -ALPHA/2 < a0 <= ALPHA/2 except +* if a1 = (Q-1)/ALPHA where we set a1 = 0 and +* -ALPHA/2 <= a0 = a mod^+ Q - Q < 0. Assumes a to be standard +* representative. +* +* Arguments: - int32_t a: input element +* - int32_t *a0: pointer to output element a0 +* +* Returns a1. +**************************************************/ +int32_t PQCLEAN_DILITHIUM5_CLEAN_decompose(int32_t *a0, int32_t a) { + int32_t a1; + + a1 = (a + 127) >> 7; + a1 = (a1 * 1025 + (1 << 21)) >> 22; + a1 &= 15; + + *a0 = a - a1 * 2 * GAMMA2; + *a0 -= (((Q - 1) / 2 - *a0) >> 31) & Q; + return a1; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_make_hint +* +* Description: Compute hint bit indicating whether the low bits of the +* input element overflow into the high bits. +* +* Arguments: - int32_t a0: low bits of input element +* - int32_t a1: high bits of input element +* +* Returns 1 if overflow. +**************************************************/ +unsigned int PQCLEAN_DILITHIUM5_CLEAN_make_hint(int32_t a0, int32_t a1) { + if (a0 > GAMMA2 || a0 < -GAMMA2 || (a0 == -GAMMA2 && a1 != 0)) { + return 1; + } + + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_use_hint +* +* Description: Correct high bits according to hint. +* +* Arguments: - int32_t a: input element +* - unsigned int hint: hint bit +* +* Returns corrected high bits. +**************************************************/ +int32_t PQCLEAN_DILITHIUM5_CLEAN_use_hint(int32_t a, unsigned int hint) { + int32_t a0, a1; + + a1 = PQCLEAN_DILITHIUM5_CLEAN_decompose(&a0, a); + if (hint == 0) { + return a1; + } + + if (a0 > 0) { + return (a1 + 1) & 15; + } + return (a1 - 1) & 15; +} diff --git a/crypto_sign/dilithium/dilithium5/clean/rounding.h b/crypto_sign/dilithium/dilithium5/clean/rounding.h new file mode 100644 index 00000000..820e9cd6 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/clean/rounding.h @@ -0,0 +1,14 @@ +#ifndef PQCLEAN_DILITHIUM5_CLEAN_ROUNDING_H +#define PQCLEAN_DILITHIUM5_CLEAN_ROUNDING_H +#include "params.h" +#include + +int32_t PQCLEAN_DILITHIUM5_CLEAN_power2round(int32_t *a0, int32_t a); + +int32_t PQCLEAN_DILITHIUM5_CLEAN_decompose(int32_t *a0, int32_t a); + +unsigned int PQCLEAN_DILITHIUM5_CLEAN_make_hint(int32_t a0, int32_t a1); + +int32_t PQCLEAN_DILITHIUM5_CLEAN_use_hint(int32_t a, unsigned int hint); + +#endif diff --git a/crypto_sign/dilithium/dilithium5/clean/sign.c b/crypto_sign/dilithium/dilithium5/clean/sign.c new file mode 100644 index 00000000..791821b4 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/clean/sign.c @@ -0,0 +1,343 @@ +#include "fips202.h" +#include "packing.h" +#include "params.h" +#include "poly.h" +#include "polyvec.h" +#include "randombytes.h" +#include "sign.h" +#include "symmetric.h" +#include + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_crypto_sign_keypair +* +* Description: Generates public and private key. +* +* Arguments: - uint8_t *pk: pointer to output public key (allocated +* array of PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key (allocated +* array of PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_SECRETKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int PQCLEAN_DILITHIUM5_CLEAN_crypto_sign_keypair(uint8_t *pk, uint8_t *sk) { + uint8_t seedbuf[3 * SEEDBYTES]; + uint8_t tr[CRHBYTES]; + const uint8_t *rho, *rhoprime, *key; + polyvecl mat[K]; + polyvecl s1, s1hat; + polyveck s2, t1, t0; + + /* Get randomness for rho, rhoprime and key */ + randombytes(seedbuf, SEEDBYTES); + shake256(seedbuf, 3 * SEEDBYTES, seedbuf, SEEDBYTES); + rho = seedbuf; + rhoprime = seedbuf + SEEDBYTES; + key = seedbuf + 2 * SEEDBYTES; + + /* Expand matrix */ + PQCLEAN_DILITHIUM5_CLEAN_polyvec_matrix_expand(mat, rho); + + /* Sample short vectors s1 and s2 */ + PQCLEAN_DILITHIUM5_CLEAN_polyvecl_uniform_eta(&s1, rhoprime, 0); + PQCLEAN_DILITHIUM5_CLEAN_polyveck_uniform_eta(&s2, rhoprime, L); + + /* Matrix-vector multiplication */ + s1hat = s1; + PQCLEAN_DILITHIUM5_CLEAN_polyvecl_ntt(&s1hat); + PQCLEAN_DILITHIUM5_CLEAN_polyvec_matrix_pointwise_montgomery(&t1, mat, &s1hat); + PQCLEAN_DILITHIUM5_CLEAN_polyveck_reduce(&t1); + PQCLEAN_DILITHIUM5_CLEAN_polyveck_invntt_tomont(&t1); + + /* Add error vector s2 */ + PQCLEAN_DILITHIUM5_CLEAN_polyveck_add(&t1, &t1, &s2); + + /* Extract t1 and write public key */ + PQCLEAN_DILITHIUM5_CLEAN_polyveck_caddq(&t1); + PQCLEAN_DILITHIUM5_CLEAN_polyveck_power2round(&t1, &t0, &t1); + PQCLEAN_DILITHIUM5_CLEAN_pack_pk(pk, rho, &t1); + + /* Compute CRH(rho, t1) and write secret key */ + crh(tr, pk, PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_PUBLICKEYBYTES); + PQCLEAN_DILITHIUM5_CLEAN_pack_sk(sk, rho, tr, key, &t0, &s1, &s2); + + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_crypto_sign_signature +* +* Description: Computes signature. +* +* Arguments: - uint8_t *sig: pointer to output signature (of length PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_BYTES) +* - size_t *siglen: pointer to output length of signature +* - uint8_t *m: pointer to message to be signed +* - size_t mlen: length of message +* - uint8_t *sk: pointer to bit-packed secret key +* +* Returns 0 (success) +**************************************************/ +int PQCLEAN_DILITHIUM5_CLEAN_crypto_sign_signature(uint8_t *sig, + size_t *siglen, + const uint8_t *m, + size_t mlen, + const uint8_t *sk) { + unsigned int n; + uint8_t seedbuf[2 * SEEDBYTES + 3 * CRHBYTES]; + uint8_t *rho, *tr, *key, *mu, *rhoprime; + uint16_t nonce = 0; + polyvecl mat[K], s1, y, z; + polyveck t0, s2, w1, w0, h; + poly cp; + shake256incctx state; + + rho = seedbuf; + tr = rho + SEEDBYTES; + key = tr + CRHBYTES; + mu = key + SEEDBYTES; + rhoprime = mu + CRHBYTES; + PQCLEAN_DILITHIUM5_CLEAN_unpack_sk(rho, tr, key, &t0, &s1, &s2, sk); + + /* Compute CRH(tr, msg) */ + shake256_inc_init(&state); + shake256_inc_absorb(&state, tr, CRHBYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(mu, CRHBYTES, &state); + shake256_inc_ctx_release(&state); + + crh(rhoprime, key, SEEDBYTES + CRHBYTES); + + /* Expand matrix and transform vectors */ + PQCLEAN_DILITHIUM5_CLEAN_polyvec_matrix_expand(mat, rho); + PQCLEAN_DILITHIUM5_CLEAN_polyvecl_ntt(&s1); + PQCLEAN_DILITHIUM5_CLEAN_polyveck_ntt(&s2); + PQCLEAN_DILITHIUM5_CLEAN_polyveck_ntt(&t0); + +rej: + /* Sample intermediate vector y */ + PQCLEAN_DILITHIUM5_CLEAN_polyvecl_uniform_gamma1(&y, rhoprime, nonce++); + + /* Matrix-vector multiplication */ + z = y; + PQCLEAN_DILITHIUM5_CLEAN_polyvecl_ntt(&z); + PQCLEAN_DILITHIUM5_CLEAN_polyvec_matrix_pointwise_montgomery(&w1, mat, &z); + PQCLEAN_DILITHIUM5_CLEAN_polyveck_reduce(&w1); + PQCLEAN_DILITHIUM5_CLEAN_polyveck_invntt_tomont(&w1); + + /* Decompose w and call the random oracle */ + PQCLEAN_DILITHIUM5_CLEAN_polyveck_caddq(&w1); + PQCLEAN_DILITHIUM5_CLEAN_polyveck_decompose(&w1, &w0, &w1); + PQCLEAN_DILITHIUM5_CLEAN_polyveck_pack_w1(sig, &w1); + + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, sig, K * POLYW1_PACKEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(sig, SEEDBYTES, &state); + shake256_inc_ctx_release(&state); + PQCLEAN_DILITHIUM5_CLEAN_poly_challenge(&cp, sig); + PQCLEAN_DILITHIUM5_CLEAN_poly_ntt(&cp); + + /* Compute z, reject if it reveals secret */ + PQCLEAN_DILITHIUM5_CLEAN_polyvecl_pointwise_poly_montgomery(&z, &cp, &s1); + PQCLEAN_DILITHIUM5_CLEAN_polyvecl_invntt_tomont(&z); + PQCLEAN_DILITHIUM5_CLEAN_polyvecl_add(&z, &z, &y); + PQCLEAN_DILITHIUM5_CLEAN_polyvecl_reduce(&z); + if (PQCLEAN_DILITHIUM5_CLEAN_polyvecl_chknorm(&z, GAMMA1 - BETA)) { + goto rej; + } + + /* Check that subtracting cs2 does not change high bits of w and low bits + * do not reveal secret information */ + PQCLEAN_DILITHIUM5_CLEAN_polyveck_pointwise_poly_montgomery(&h, &cp, &s2); + PQCLEAN_DILITHIUM5_CLEAN_polyveck_invntt_tomont(&h); + PQCLEAN_DILITHIUM5_CLEAN_polyveck_sub(&w0, &w0, &h); + PQCLEAN_DILITHIUM5_CLEAN_polyveck_reduce(&w0); + if (PQCLEAN_DILITHIUM5_CLEAN_polyveck_chknorm(&w0, GAMMA2 - BETA)) { + goto rej; + } + + /* Compute hints for w1 */ + PQCLEAN_DILITHIUM5_CLEAN_polyveck_pointwise_poly_montgomery(&h, &cp, &t0); + PQCLEAN_DILITHIUM5_CLEAN_polyveck_invntt_tomont(&h); + PQCLEAN_DILITHIUM5_CLEAN_polyveck_reduce(&h); + if (PQCLEAN_DILITHIUM5_CLEAN_polyveck_chknorm(&h, GAMMA2)) { + goto rej; + } + + PQCLEAN_DILITHIUM5_CLEAN_polyveck_add(&w0, &w0, &h); + n = PQCLEAN_DILITHIUM5_CLEAN_polyveck_make_hint(&h, &w0, &w1); + if (n > OMEGA) { + goto rej; + } + + /* Write signature */ + PQCLEAN_DILITHIUM5_CLEAN_pack_sig(sig, sig, &z, &h); + *siglen = PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_BYTES; + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_crypto_sign +* +* Description: Compute signed message. +* +* Arguments: - uint8_t *sm: pointer to output signed message (allocated +* array with PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_BYTES + mlen bytes), +* can be equal to m +* - size_t *smlen: pointer to output length of signed +* message +* - const uint8_t *m: pointer to message to be signed +* - size_t mlen: length of message +* - const uint8_t *sk: pointer to bit-packed secret key +* +* Returns 0 (success) +**************************************************/ +int PQCLEAN_DILITHIUM5_CLEAN_crypto_sign(uint8_t *sm, + size_t *smlen, + const uint8_t *m, + size_t mlen, + const uint8_t *sk) { + size_t i; + + for (i = 0; i < mlen; ++i) { + sm[PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_BYTES + mlen - 1 - i] = m[mlen - 1 - i]; + } + PQCLEAN_DILITHIUM5_CLEAN_crypto_sign_signature(sm, smlen, sm + PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_BYTES, mlen, sk); + *smlen += mlen; + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_crypto_sign_verify +* +* Description: Verifies signature. +* +* Arguments: - uint8_t *m: pointer to input signature +* - size_t siglen: length of signature +* - const uint8_t *m: pointer to message +* - size_t mlen: length of message +* - const uint8_t *pk: pointer to bit-packed public key +* +* Returns 0 if signature could be verified correctly and -1 otherwise +**************************************************/ +int PQCLEAN_DILITHIUM5_CLEAN_crypto_sign_verify(const uint8_t *sig, + size_t siglen, + const uint8_t *m, + size_t mlen, + const uint8_t *pk) { + unsigned int i; + uint8_t buf[K * POLYW1_PACKEDBYTES]; + uint8_t rho[SEEDBYTES]; + uint8_t mu[CRHBYTES]; + uint8_t c[SEEDBYTES]; + uint8_t c2[SEEDBYTES]; + poly cp; + polyvecl mat[K], z; + polyveck t1, w1, h; + shake256incctx state; + + if (siglen != PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_BYTES) { + return -1; + } + + PQCLEAN_DILITHIUM5_CLEAN_unpack_pk(rho, &t1, pk); + if (PQCLEAN_DILITHIUM5_CLEAN_unpack_sig(c, &z, &h, sig)) { + return -1; + } + if (PQCLEAN_DILITHIUM5_CLEAN_polyvecl_chknorm(&z, GAMMA1 - BETA)) { + return -1; + } + + /* Compute CRH(CRH(rho, t1), msg) */ + crh(mu, pk, PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_PUBLICKEYBYTES); + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(mu, CRHBYTES, &state); + shake256_inc_ctx_release(&state); + + /* Matrix-vector multiplication; compute Az - c2^dt1 */ + PQCLEAN_DILITHIUM5_CLEAN_poly_challenge(&cp, c); + PQCLEAN_DILITHIUM5_CLEAN_polyvec_matrix_expand(mat, rho); + + PQCLEAN_DILITHIUM5_CLEAN_polyvecl_ntt(&z); + PQCLEAN_DILITHIUM5_CLEAN_polyvec_matrix_pointwise_montgomery(&w1, mat, &z); + + PQCLEAN_DILITHIUM5_CLEAN_poly_ntt(&cp); + PQCLEAN_DILITHIUM5_CLEAN_polyveck_shiftl(&t1); + PQCLEAN_DILITHIUM5_CLEAN_polyveck_ntt(&t1); + PQCLEAN_DILITHIUM5_CLEAN_polyveck_pointwise_poly_montgomery(&t1, &cp, &t1); + + PQCLEAN_DILITHIUM5_CLEAN_polyveck_sub(&w1, &w1, &t1); + PQCLEAN_DILITHIUM5_CLEAN_polyveck_reduce(&w1); + PQCLEAN_DILITHIUM5_CLEAN_polyveck_invntt_tomont(&w1); + + /* Reconstruct w1 */ + PQCLEAN_DILITHIUM5_CLEAN_polyveck_caddq(&w1); + PQCLEAN_DILITHIUM5_CLEAN_polyveck_use_hint(&w1, &w1, &h); + PQCLEAN_DILITHIUM5_CLEAN_polyveck_pack_w1(buf, &w1); + + /* Call random oracle and verify PQCLEAN_DILITHIUM5_CLEAN_challenge */ + shake256_inc_init(&state); + shake256_inc_absorb(&state, mu, CRHBYTES); + shake256_inc_absorb(&state, buf, K * POLYW1_PACKEDBYTES); + shake256_inc_finalize(&state); + shake256_inc_squeeze(c2, SEEDBYTES, &state); + shake256_inc_ctx_release(&state); + for (i = 0; i < SEEDBYTES; ++i) { + if (c[i] != c2[i]) { + return -1; + } + } + + return 0; +} + +/************************************************* +* Name: PQCLEAN_DILITHIUM5_CLEAN_crypto_sign_open +* +* Description: Verify signed message. +* +* Arguments: - uint8_t *m: pointer to output message (allocated +* array with smlen bytes), can be equal to sm +* - size_t *mlen: pointer to output length of message +* - const uint8_t *sm: pointer to signed message +* - size_t smlen: length of signed message +* - const uint8_t *pk: pointer to bit-packed public key +* +* Returns 0 if signed message could be verified correctly and -1 otherwise +**************************************************/ +int PQCLEAN_DILITHIUM5_CLEAN_crypto_sign_open(uint8_t *m, + size_t *mlen, + const uint8_t *sm, + size_t smlen, + const uint8_t *pk) { + size_t i; + + if (smlen < PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_BYTES) { + goto badsig; + } + + *mlen = smlen - PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_BYTES; + if (PQCLEAN_DILITHIUM5_CLEAN_crypto_sign_verify(sm, PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_BYTES, sm + PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_BYTES, *mlen, pk)) { + goto badsig; + } else { + /* All good, copy msg, return 0 */ + for (i = 0; i < *mlen; ++i) { + m[i] = sm[PQCLEAN_DILITHIUM5_CLEAN_CRYPTO_BYTES + i]; + } + return 0; + } + +badsig: + /* Signature verification failed */ + *mlen = (size_t) -1; + for (i = 0; i < smlen; ++i) { + m[i] = 0; + } + + return -1; +} diff --git a/crypto_sign/dilithium/dilithium5/clean/sign.h b/crypto_sign/dilithium/dilithium5/clean/sign.h new file mode 100644 index 00000000..82c6855b --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/clean/sign.h @@ -0,0 +1,29 @@ +#ifndef PQCLEAN_DILITHIUM5_CLEAN_SIGN_H +#define PQCLEAN_DILITHIUM5_CLEAN_SIGN_H +#include "params.h" +#include "poly.h" +#include "polyvec.h" +#include +#include + +void PQCLEAN_DILITHIUM5_CLEAN_challenge(poly *c, const uint8_t seed[SEEDBYTES]); + +int PQCLEAN_DILITHIUM5_CLEAN_crypto_sign_keypair(uint8_t *pk, uint8_t *sk); + +int PQCLEAN_DILITHIUM5_CLEAN_crypto_sign_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int PQCLEAN_DILITHIUM5_CLEAN_crypto_sign(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int PQCLEAN_DILITHIUM5_CLEAN_crypto_sign_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *pk); + +int PQCLEAN_DILITHIUM5_CLEAN_crypto_sign_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + +#endif diff --git a/crypto_sign/dilithium/dilithium5/clean/symmetric-shake.c b/crypto_sign/dilithium/dilithium5/clean/symmetric-shake.c new file mode 100644 index 00000000..23faa5f0 --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/clean/symmetric-shake.c @@ -0,0 +1,26 @@ +#include "fips202.h" +#include "params.h" +#include "symmetric.h" +#include + +void PQCLEAN_DILITHIUM5_CLEAN_dilithium_shake128_stream_init(shake128incctx *state, const uint8_t seed[SEEDBYTES], uint16_t nonce) { + uint8_t t[2]; + t[0] = (uint8_t) nonce; + t[1] = (uint8_t) (nonce >> 8); + + shake128_inc_init(state); + shake128_inc_absorb(state, seed, SEEDBYTES); + shake128_inc_absorb(state, t, 2); + shake128_inc_finalize(state); +} + +void PQCLEAN_DILITHIUM5_CLEAN_dilithium_shake256_stream_init(shake256incctx *state, const uint8_t seed[CRHBYTES], uint16_t nonce) { + uint8_t t[2]; + t[0] = (uint8_t) nonce; + t[1] = (uint8_t) (nonce >> 8); + + shake256_inc_init(state); + shake256_inc_absorb(state, seed, CRHBYTES); + shake256_inc_absorb(state, t, 2); + shake256_inc_finalize(state); +} diff --git a/crypto_sign/dilithium/dilithium5/clean/symmetric.h b/crypto_sign/dilithium/dilithium5/clean/symmetric.h new file mode 100644 index 00000000..92ed263b --- /dev/null +++ b/crypto_sign/dilithium/dilithium5/clean/symmetric.h @@ -0,0 +1,36 @@ +#ifndef PQCLEAN_DILITHIUM5_CLEAN_SYMMETRIC_H +#define PQCLEAN_DILITHIUM5_CLEAN_SYMMETRIC_H +#include "fips202.h" +#include "params.h" +#include + + + +typedef shake128incctx stream128_state; +typedef shake256incctx stream256_state; + +void PQCLEAN_DILITHIUM5_CLEAN_dilithium_shake128_stream_init(shake128incctx *state, + const uint8_t seed[SEEDBYTES], + uint16_t nonce); + +void PQCLEAN_DILITHIUM5_CLEAN_dilithium_shake256_stream_init(shake256incctx *state, + const uint8_t seed[CRHBYTES], + uint16_t nonce); + +#define STREAM128_BLOCKBYTES SHAKE128_RATE +#define STREAM256_BLOCKBYTES SHAKE256_RATE + +#define crh(OUT, IN, INBYTES) shake256(OUT, CRHBYTES, IN, INBYTES) +#define stream128_init(STATE, SEED, NONCE) \ + PQCLEAN_DILITHIUM5_CLEAN_dilithium_shake128_stream_init(STATE, SEED, NONCE) +#define stream128_squeezeblocks(OUT, OUTBLOCKS, STATE) \ + shake128_inc_squeeze(OUT, (OUTBLOCKS)*(SHAKE128_RATE), STATE) +#define stream128_release(STATE) shake128_inc_ctx_release(STATE) +#define stream256_init(STATE, SEED, NONCE) \ + PQCLEAN_DILITHIUM5_CLEAN_dilithium_shake256_stream_init(STATE, SEED, NONCE) +#define stream256_squeezeblocks(OUT, OUTBLOCKS, STATE) \ + shake256_inc_squeeze(OUT, (OUTBLOCKS)*(SHAKE256_RATE), STATE) +#define stream256_release(STATE) shake256_inc_ctx_release(STATE) + + +#endif diff --git a/crypto_sign/falcon/falcon-1024/META.yml b/crypto_sign/falcon/falcon-1024/META.yml new file mode 100644 index 00000000..fb088b45 --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/META.yml @@ -0,0 +1,32 @@ +name: Falcon-1024 +type: signature +claimed-nist-level: 5 +length-public-key: 1793 +length-secret-key: 2305 +length-signature: 1330 +nistkat-sha256: ef2104e326221515621638ca03cd99802271bdd9907e2ae5fc7b8d19d696c584 +testvectors-sha256: 14ee0e3f0ea4b9b25193a54eed9100b1bb1cf5dbc7813fd9dc9180c1ea1a1042 +principal-submitters: + - Thomas Prest +auxiliary-submitters: + - Pierre-Alain Fouque + - Jeffrey Hoffstein + - Paul Kirchner + - Vadim Lyubashevsky + - Thomas Pornin + - Thomas Ricosset + - Gregor Seiler + - William Whyte + - Zhenfei Zhang +implementations: + - name: clean + version: supercop-20201018 via https://github.com/jschanck/package-pqclean/tree/78831f03/falcon + - name: avx2 + version: supercop-20201018 via https://github.com/jschanck/package-pqclean/tree/78831f03/falcon + supported_platforms: + - architecture: x86_64 + operating_systems: + - Linux + - Darwin + required_flags: + - avx2 diff --git a/crypto_sign/falcon/falcon-1024/avx2/LICENSE b/crypto_sign/falcon/falcon-1024/avx2/LICENSE new file mode 100644 index 00000000..12c7b56c --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/avx2/LICENSE @@ -0,0 +1,24 @@ +\ +MIT License + +Copyright (c) 2017-2019 Falcon Project + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/crypto_sign/falcon/falcon-1024/avx2/api.h b/crypto_sign/falcon/falcon-1024/avx2/api.h new file mode 100644 index 00000000..b92c7c2c --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/avx2/api.h @@ -0,0 +1,80 @@ +#ifndef PQCLEAN_FALCON1024_AVX2_API_H +#define PQCLEAN_FALCON1024_AVX2_API_H + +#include +#include + +#define PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES 2305 +#define PQCLEAN_FALCON1024_AVX2_CRYPTO_PUBLICKEYBYTES 1793 +#define PQCLEAN_FALCON1024_AVX2_CRYPTO_BYTES 1330 + +#define PQCLEAN_FALCON1024_AVX2_CRYPTO_ALGNAME "Falcon-1024" + +/* + * Generate a new key pair. Public key goes into pk[], private key in sk[]. + * Key sizes are exact (in bytes): + * public (pk): PQCLEAN_FALCON1024_AVX2_CRYPTO_PUBLICKEYBYTES + * private (sk): PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES + * + * Return value: 0 on success, -1 on error. + */ +int PQCLEAN_FALCON1024_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/* + * Compute a signature on a provided message (m, mlen), with a given + * private key (sk). Signature is written in sig[], with length written + * into *siglen. Signature length is variable; maximum signature length + * (in bytes) is PQCLEAN_FALCON1024_AVX2_CRYPTO_BYTES. + * + * sig[], m[] and sk[] may overlap each other arbitrarily. + * + * Return value: 0 on success, -1 on error. + */ +int PQCLEAN_FALCON1024_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/* + * Verify a signature (sig, siglen) on a message (m, mlen) with a given + * public key (pk). + * + * sig[], m[] and pk[] may overlap each other arbitrarily. + * + * Return value: 0 on success, -1 on error. + */ +int PQCLEAN_FALCON1024_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/* + * Compute a signature on a message and pack the signature and message + * into a single object, written into sm[]. The length of that output is + * written in *smlen; that length may be larger than the message length + * (mlen) by up to PQCLEAN_FALCON1024_AVX2_CRYPTO_BYTES. + * + * sm[] and m[] may overlap each other arbitrarily; however, sm[] shall + * not overlap with sk[]. + * + * Return value: 0 on success, -1 on error. + */ +int PQCLEAN_FALCON1024_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/* + * Open a signed message object (sm, smlen) and verify the signature; + * on success, the message itself is written into m[] and its length + * into *mlen. The message is shorter than the signed message object, + * but the size difference depends on the signature value; the difference + * may range up to PQCLEAN_FALCON1024_AVX2_CRYPTO_BYTES. + * + * m[], sm[] and pk[] may overlap each other arbitrarily. + * + * Return value: 0 on success, -1 on error. + */ +int PQCLEAN_FALCON1024_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/falcon/falcon-1024/avx2/codec.c b/crypto_sign/falcon/falcon-1024/avx2/codec.c new file mode 100644 index 00000000..3f47ed37 --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/avx2/codec.c @@ -0,0 +1,555 @@ +#include "inner.h" + +/* + * Encoding/decoding of keys and signatures. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +/* see inner.h */ +size_t +PQCLEAN_FALCON1024_AVX2_modq_encode( + void *out, size_t max_out_len, + const uint16_t *x, unsigned logn) { + size_t n, out_len, u; + uint8_t *buf; + uint32_t acc; + int acc_len; + + n = (size_t)1 << logn; + for (u = 0; u < n; u ++) { + if (x[u] >= 12289) { + return 0; + } + } + out_len = ((n * 14) + 7) >> 3; + if (out == NULL) { + return out_len; + } + if (out_len > max_out_len) { + return 0; + } + buf = out; + acc = 0; + acc_len = 0; + for (u = 0; u < n; u ++) { + acc = (acc << 14) | x[u]; + acc_len += 14; + while (acc_len >= 8) { + acc_len -= 8; + *buf ++ = (uint8_t)(acc >> acc_len); + } + } + if (acc_len > 0) { + *buf = (uint8_t)(acc << (8 - acc_len)); + } + return out_len; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON1024_AVX2_modq_decode( + uint16_t *x, unsigned logn, + const void *in, size_t max_in_len) { + size_t n, in_len, u; + const uint8_t *buf; + uint32_t acc; + int acc_len; + + n = (size_t)1 << logn; + in_len = ((n * 14) + 7) >> 3; + if (in_len > max_in_len) { + return 0; + } + buf = in; + acc = 0; + acc_len = 0; + u = 0; + while (u < n) { + acc = (acc << 8) | (*buf ++); + acc_len += 8; + if (acc_len >= 14) { + unsigned w; + + acc_len -= 14; + w = (acc >> acc_len) & 0x3FFF; + if (w >= 12289) { + return 0; + } + x[u ++] = (uint16_t)w; + } + } + if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { + return 0; + } + return in_len; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON1024_AVX2_trim_i16_encode( + void *out, size_t max_out_len, + const int16_t *x, unsigned logn, unsigned bits) { + size_t n, u, out_len; + int minv, maxv; + uint8_t *buf; + uint32_t acc, mask; + unsigned acc_len; + + n = (size_t)1 << logn; + maxv = (1 << (bits - 1)) - 1; + minv = -maxv; + for (u = 0; u < n; u ++) { + if (x[u] < minv || x[u] > maxv) { + return 0; + } + } + out_len = ((n * bits) + 7) >> 3; + if (out == NULL) { + return out_len; + } + if (out_len > max_out_len) { + return 0; + } + buf = out; + acc = 0; + acc_len = 0; + mask = ((uint32_t)1 << bits) - 1; + for (u = 0; u < n; u ++) { + acc = (acc << bits) | ((uint16_t)x[u] & mask); + acc_len += bits; + while (acc_len >= 8) { + acc_len -= 8; + *buf ++ = (uint8_t)(acc >> acc_len); + } + } + if (acc_len > 0) { + *buf ++ = (uint8_t)(acc << (8 - acc_len)); + } + return out_len; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON1024_AVX2_trim_i16_decode( + int16_t *x, unsigned logn, unsigned bits, + const void *in, size_t max_in_len) { + size_t n, in_len; + const uint8_t *buf; + size_t u; + uint32_t acc, mask1, mask2; + unsigned acc_len; + + n = (size_t)1 << logn; + in_len = ((n * bits) + 7) >> 3; + if (in_len > max_in_len) { + return 0; + } + buf = in; + u = 0; + acc = 0; + acc_len = 0; + mask1 = ((uint32_t)1 << bits) - 1; + mask2 = (uint32_t)1 << (bits - 1); + while (u < n) { + acc = (acc << 8) | *buf ++; + acc_len += 8; + while (acc_len >= bits && u < n) { + uint32_t w; + + acc_len -= bits; + w = (acc >> acc_len) & mask1; + w |= -(w & mask2); + if (w == -mask2) { + /* + * The -2^(bits-1) value is forbidden. + */ + return 0; + } + w |= -(w & mask2); + x[u ++] = (int16_t) * (int32_t *)&w; + } + } + if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { + /* + * Extra bits in the last byte must be zero. + */ + return 0; + } + return in_len; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON1024_AVX2_trim_i8_encode( + void *out, size_t max_out_len, + const int8_t *x, unsigned logn, unsigned bits) { + size_t n, u, out_len; + int minv, maxv; + uint8_t *buf; + uint32_t acc, mask; + unsigned acc_len; + + n = (size_t)1 << logn; + maxv = (1 << (bits - 1)) - 1; + minv = -maxv; + for (u = 0; u < n; u ++) { + if (x[u] < minv || x[u] > maxv) { + return 0; + } + } + out_len = ((n * bits) + 7) >> 3; + if (out == NULL) { + return out_len; + } + if (out_len > max_out_len) { + return 0; + } + buf = out; + acc = 0; + acc_len = 0; + mask = ((uint32_t)1 << bits) - 1; + for (u = 0; u < n; u ++) { + acc = (acc << bits) | ((uint8_t)x[u] & mask); + acc_len += bits; + while (acc_len >= 8) { + acc_len -= 8; + *buf ++ = (uint8_t)(acc >> acc_len); + } + } + if (acc_len > 0) { + *buf ++ = (uint8_t)(acc << (8 - acc_len)); + } + return out_len; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON1024_AVX2_trim_i8_decode( + int8_t *x, unsigned logn, unsigned bits, + const void *in, size_t max_in_len) { + size_t n, in_len; + const uint8_t *buf; + size_t u; + uint32_t acc, mask1, mask2; + unsigned acc_len; + + n = (size_t)1 << logn; + in_len = ((n * bits) + 7) >> 3; + if (in_len > max_in_len) { + return 0; + } + buf = in; + u = 0; + acc = 0; + acc_len = 0; + mask1 = ((uint32_t)1 << bits) - 1; + mask2 = (uint32_t)1 << (bits - 1); + while (u < n) { + acc = (acc << 8) | *buf ++; + acc_len += 8; + while (acc_len >= bits && u < n) { + uint32_t w; + + acc_len -= bits; + w = (acc >> acc_len) & mask1; + w |= -(w & mask2); + if (w == -mask2) { + /* + * The -2^(bits-1) value is forbidden. + */ + return 0; + } + x[u ++] = (int8_t) * (int32_t *)&w; + } + } + if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { + /* + * Extra bits in the last byte must be zero. + */ + return 0; + } + return in_len; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON1024_AVX2_comp_encode( + void *out, size_t max_out_len, + const int16_t *x, unsigned logn) { + uint8_t *buf; + size_t n, u, v; + uint32_t acc; + unsigned acc_len; + + n = (size_t)1 << logn; + buf = out; + + /* + * Make sure that all values are within the -2047..+2047 range. + */ + for (u = 0; u < n; u ++) { + if (x[u] < -2047 || x[u] > +2047) { + return 0; + } + } + + acc = 0; + acc_len = 0; + v = 0; + for (u = 0; u < n; u ++) { + int t; + unsigned w; + + /* + * Get sign and absolute value of next integer; push the + * sign bit. + */ + acc <<= 1; + t = x[u]; + if (t < 0) { + t = -t; + acc |= 1; + } + w = (unsigned)t; + + /* + * Push the low 7 bits of the absolute value. + */ + acc <<= 7; + acc |= w & 127u; + w >>= 7; + + /* + * We pushed exactly 8 bits. + */ + acc_len += 8; + + /* + * Push as many zeros as necessary, then a one. Since the + * absolute value is at most 2047, w can only range up to + * 15 at this point, thus we will add at most 16 bits + * here. With the 8 bits above and possibly up to 7 bits + * from previous iterations, we may go up to 31 bits, which + * will fit in the accumulator, which is an uint32_t. + */ + acc <<= (w + 1); + acc |= 1; + acc_len += w + 1; + + /* + * Produce all full bytes. + */ + while (acc_len >= 8) { + acc_len -= 8; + if (buf != NULL) { + if (v >= max_out_len) { + return 0; + } + buf[v] = (uint8_t)(acc >> acc_len); + } + v ++; + } + } + + /* + * Flush remaining bits (if any). + */ + if (acc_len > 0) { + if (buf != NULL) { + if (v >= max_out_len) { + return 0; + } + buf[v] = (uint8_t)(acc << (8 - acc_len)); + } + v ++; + } + + return v; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON1024_AVX2_comp_decode( + int16_t *x, unsigned logn, + const void *in, size_t max_in_len) { + const uint8_t *buf; + size_t n, u, v; + uint32_t acc; + unsigned acc_len; + + n = (size_t)1 << logn; + buf = in; + acc = 0; + acc_len = 0; + v = 0; + for (u = 0; u < n; u ++) { + unsigned b, s, m; + + /* + * Get next eight bits: sign and low seven bits of the + * absolute value. + */ + if (v >= max_in_len) { + return 0; + } + acc = (acc << 8) | (uint32_t)buf[v ++]; + b = acc >> acc_len; + s = b & 128; + m = b & 127; + + /* + * Get next bits until a 1 is reached. + */ + for (;;) { + if (acc_len == 0) { + if (v >= max_in_len) { + return 0; + } + acc = (acc << 8) | (uint32_t)buf[v ++]; + acc_len = 8; + } + acc_len --; + if (((acc >> acc_len) & 1) != 0) { + break; + } + m += 128; + if (m > 2047) { + return 0; + } + } + x[u] = (int16_t) m; + if (s) { + x[u] = (int16_t) - x[u]; + } + } + return v; +} + +/* + * Key elements and signatures are polynomials with small integer + * coefficients. Here are some statistics gathered over many + * generated key pairs (10000 or more for each degree): + * + * log(n) n max(f,g) std(f,g) max(F,G) std(F,G) + * 1 2 129 56.31 143 60.02 + * 2 4 123 40.93 160 46.52 + * 3 8 97 28.97 159 38.01 + * 4 16 100 21.48 154 32.50 + * 5 32 71 15.41 151 29.36 + * 6 64 59 11.07 138 27.77 + * 7 128 39 7.91 144 27.00 + * 8 256 32 5.63 148 26.61 + * 9 512 22 4.00 137 26.46 + * 10 1024 15 2.84 146 26.41 + * + * We want a compact storage format for private key, and, as part of + * key generation, we are allowed to reject some keys which would + * otherwise be fine (this does not induce any noticeable vulnerability + * as long as we reject only a small proportion of possible keys). + * Hence, we enforce at key generation time maximum values for the + * elements of f, g, F and G, so that their encoding can be expressed + * in fixed-width values. Limits have been chosen so that generated + * keys are almost always within bounds, thus not impacting neither + * security or performance. + * + * IMPORTANT: the code assumes that all coefficients of f, g, F and G + * ultimately fit in the -127..+127 range. Thus, none of the elements + * of max_fg_bits[] and max_FG_bits[] shall be greater than 8. + */ + +const uint8_t PQCLEAN_FALCON1024_AVX2_max_fg_bits[] = { + 0, /* unused */ + 8, + 8, + 8, + 8, + 8, + 7, + 7, + 6, + 6, + 5 +}; + +const uint8_t PQCLEAN_FALCON1024_AVX2_max_FG_bits[] = { + 0, /* unused */ + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8 +}; + +/* + * When generating a new key pair, we can always reject keys which + * feature an abnormally large coefficient. This can also be done for + * signatures, albeit with some care: in case the signature process is + * used in a derandomized setup (explicitly seeded with the message and + * private key), we have to follow the specification faithfully, and the + * specification only enforces a limit on the L2 norm of the signature + * vector. The limit on the L2 norm implies that the absolute value of + * a coefficient of the signature cannot be more than the following: + * + * log(n) n max sig coeff (theoretical) + * 1 2 412 + * 2 4 583 + * 3 8 824 + * 4 16 1166 + * 5 32 1649 + * 6 64 2332 + * 7 128 3299 + * 8 256 4665 + * 9 512 6598 + * 10 1024 9331 + * + * However, the largest observed signature coefficients during our + * experiments was 1077 (in absolute value), hence we can assume that, + * with overwhelming probability, signature coefficients will fit + * in -2047..2047, i.e. 12 bits. + */ + +const uint8_t PQCLEAN_FALCON1024_AVX2_max_sig_bits[] = { + 0, /* unused */ + 10, + 11, + 11, + 12, + 12, + 12, + 12, + 12, + 12, + 12 +}; diff --git a/crypto_sign/falcon/falcon-1024/avx2/common.c b/crypto_sign/falcon/falcon-1024/avx2/common.c new file mode 100644 index 00000000..c8a9066f --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/avx2/common.c @@ -0,0 +1,294 @@ +#include "inner.h" + +/* + * Support functions for signatures (hash-to-point, norm). + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_hash_to_point_vartime( + inner_shake256_context *sc, + uint16_t *x, unsigned logn) { + /* + * This is the straightforward per-the-spec implementation. It + * is not constant-time, thus it might reveal information on the + * plaintext (at least, enough to check the plaintext against a + * list of potential plaintexts) in a scenario where the + * attacker does not have access to the signature value or to + * the public key, but knows the nonce (without knowledge of the + * nonce, the hashed output cannot be matched against potential + * plaintexts). + */ + size_t n; + + n = (size_t)1 << logn; + while (n > 0) { + uint8_t buf[2]; + uint32_t w; + + inner_shake256_extract(sc, (void *)buf, sizeof buf); + w = ((unsigned)buf[0] << 8) | (unsigned)buf[1]; + if (w < 61445) { + while (w >= 12289) { + w -= 12289; + } + *x ++ = (uint16_t)w; + n --; + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_hash_to_point_ct( + inner_shake256_context *sc, + uint16_t *x, unsigned logn, uint8_t *tmp) { + /* + * Each 16-bit sample is a value in 0..65535. The value is + * kept if it falls in 0..61444 (because 61445 = 5*12289) + * and rejected otherwise; thus, each sample has probability + * about 0.93758 of being selected. + * + * We want to oversample enough to be sure that we will + * have enough values with probability at least 1 - 2^(-256). + * Depending on degree N, this leads to the following + * required oversampling: + * + * logn n oversampling + * 1 2 65 + * 2 4 67 + * 3 8 71 + * 4 16 77 + * 5 32 86 + * 6 64 100 + * 7 128 122 + * 8 256 154 + * 9 512 205 + * 10 1024 287 + * + * If logn >= 7, then the provided temporary buffer is large + * enough. Otherwise, we use a stack buffer of 63 entries + * (i.e. 126 bytes) for the values that do not fit in tmp[]. + */ + + static const uint16_t overtab[] = { + 0, /* unused */ + 65, + 67, + 71, + 77, + 86, + 100, + 122, + 154, + 205, + 287 + }; + + unsigned n, n2, u, m, p, over; + uint16_t *tt1, tt2[63]; + + /* + * We first generate m 16-bit value. Values 0..n-1 go to x[]. + * Values n..2*n-1 go to tt1[]. Values 2*n and later go to tt2[]. + * We also reduce modulo q the values; rejected values are set + * to 0xFFFF. + */ + n = 1U << logn; + n2 = n << 1; + over = overtab[logn]; + m = n + over; + tt1 = (uint16_t *)tmp; + for (u = 0; u < m; u ++) { + uint8_t buf[2]; + uint32_t w, wr; + + inner_shake256_extract(sc, buf, sizeof buf); + w = ((uint32_t)buf[0] << 8) | (uint32_t)buf[1]; + wr = w - ((uint32_t)24578 & (((w - 24578) >> 31) - 1)); + wr = wr - ((uint32_t)24578 & (((wr - 24578) >> 31) - 1)); + wr = wr - ((uint32_t)12289 & (((wr - 12289) >> 31) - 1)); + wr |= ((w - 61445) >> 31) - 1; + if (u < n) { + x[u] = (uint16_t)wr; + } else if (u < n2) { + tt1[u - n] = (uint16_t)wr; + } else { + tt2[u - n2] = (uint16_t)wr; + } + } + + /* + * Now we must "squeeze out" the invalid values. We do this in + * a logarithmic sequence of passes; each pass computes where a + * value should go, and moves it down by 'p' slots if necessary, + * where 'p' uses an increasing powers-of-two scale. It can be + * shown that in all cases where the loop decides that a value + * has to be moved down by p slots, the destination slot is + * "free" (i.e. contains an invalid value). + */ + for (p = 1; p <= over; p <<= 1) { + unsigned v; + + /* + * In the loop below: + * + * - v contains the index of the final destination of + * the value; it is recomputed dynamically based on + * whether values are valid or not. + * + * - u is the index of the value we consider ("source"); + * its address is s. + * + * - The loop may swap the value with the one at index + * u-p. The address of the swap destination is d. + */ + v = 0; + for (u = 0; u < m; u ++) { + uint16_t *s, *d; + unsigned j, sv, dv, mk; + + if (u < n) { + s = &x[u]; + } else if (u < n2) { + s = &tt1[u - n]; + } else { + s = &tt2[u - n2]; + } + sv = *s; + + /* + * The value in sv should ultimately go to + * address v, i.e. jump back by u-v slots. + */ + j = u - v; + + /* + * We increment v for the next iteration, but + * only if the source value is valid. The mask + * 'mk' is -1 if the value is valid, 0 otherwise, + * so we _subtract_ mk. + */ + mk = (sv >> 15) - 1U; + v -= mk; + + /* + * In this loop we consider jumps by p slots; if + * u < p then there is nothing more to do. + */ + if (u < p) { + continue; + } + + /* + * Destination for the swap: value at address u-p. + */ + if ((u - p) < n) { + d = &x[u - p]; + } else if ((u - p) < n2) { + d = &tt1[(u - p) - n]; + } else { + d = &tt2[(u - p) - n2]; + } + dv = *d; + + /* + * The swap should be performed only if the source + * is valid AND the jump j has its 'p' bit set. + */ + mk &= -(((j & p) + 0x1FF) >> 9); + + *s = (uint16_t)(sv ^ (mk & (sv ^ dv))); + *d = (uint16_t)(dv ^ (mk & (sv ^ dv))); + } + } +} + +/* see inner.h */ +int +PQCLEAN_FALCON1024_AVX2_is_short( + const int16_t *s1, const int16_t *s2, unsigned logn) { + /* + * We use the l2-norm. Code below uses only 32-bit operations to + * compute the square of the norm with saturation to 2^32-1 if + * the value exceeds 2^31-1. + */ + size_t n, u; + uint32_t s, ng; + + n = (size_t)1 << logn; + s = 0; + ng = 0; + for (u = 0; u < n; u ++) { + int32_t z; + + z = s1[u]; + s += (uint32_t)(z * z); + ng |= s; + z = s2[u]; + s += (uint32_t)(z * z); + ng |= s; + } + s |= -(ng >> 31); + + /* + * Acceptance bound on the l2-norm is: + * 1.2*1.55*sqrt(q)*sqrt(2*N) + * Value 7085 is floor((1.2^2)*(1.55^2)*2*1024). + */ + return s < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn)); +} + +/* see inner.h */ +int +PQCLEAN_FALCON1024_AVX2_is_short_half( + uint32_t sqn, const int16_t *s2, unsigned logn) { + size_t n, u; + uint32_t ng; + + n = (size_t)1 << logn; + ng = -(sqn >> 31); + for (u = 0; u < n; u ++) { + int32_t z; + + z = s2[u]; + sqn += (uint32_t)(z * z); + ng |= sqn; + } + sqn |= -(ng >> 31); + + /* + * Acceptance bound on the l2-norm is: + * 1.2*1.55*sqrt(q)*sqrt(2*N) + * Value 7085 is floor((1.2^2)*(1.55^2)*2*1024). + */ + return sqn < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn)); +} diff --git a/crypto_sign/falcon/falcon-1024/avx2/fft.c b/crypto_sign/falcon/falcon-1024/avx2/fft.c new file mode 100644 index 00000000..1398f99b --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/avx2/fft.c @@ -0,0 +1,1109 @@ +#include "inner.h" + +/* + * FFT code. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +/* + * Rules for complex number macros: + * -------------------------------- + * + * Operand order is: destination, source1, source2... + * + * Each operand is a real and an imaginary part. + * + * All overlaps are allowed. + */ + +/* + * Addition of two complex numbers (d = a + b). + */ +#define FPC_ADD(d_re, d_im, a_re, a_im, b_re, b_im) do { \ + fpr fpct_re, fpct_im; \ + fpct_re = fpr_add(a_re, b_re); \ + fpct_im = fpr_add(a_im, b_im); \ + (d_re) = fpct_re; \ + (d_im) = fpct_im; \ + } while (0) + +/* + * Subtraction of two complex numbers (d = a - b). + */ +#define FPC_SUB(d_re, d_im, a_re, a_im, b_re, b_im) do { \ + fpr fpct_re, fpct_im; \ + fpct_re = fpr_sub(a_re, b_re); \ + fpct_im = fpr_sub(a_im, b_im); \ + (d_re) = fpct_re; \ + (d_im) = fpct_im; \ + } while (0) + +/* + * Multplication of two complex numbers (d = a * b). + */ +#define FPC_MUL(d_re, d_im, a_re, a_im, b_re, b_im) do { \ + fpr fpct_a_re, fpct_a_im; \ + fpr fpct_b_re, fpct_b_im; \ + fpr fpct_d_re, fpct_d_im; \ + fpct_a_re = (a_re); \ + fpct_a_im = (a_im); \ + fpct_b_re = (b_re); \ + fpct_b_im = (b_im); \ + fpct_d_re = fpr_sub( \ + fpr_mul(fpct_a_re, fpct_b_re), \ + fpr_mul(fpct_a_im, fpct_b_im)); \ + fpct_d_im = fpr_add( \ + fpr_mul(fpct_a_re, fpct_b_im), \ + fpr_mul(fpct_a_im, fpct_b_re)); \ + (d_re) = fpct_d_re; \ + (d_im) = fpct_d_im; \ + } while (0) + +/* + * Squaring of a complex number (d = a * a). + */ +#define FPC_SQR(d_re, d_im, a_re, a_im) do { \ + fpr fpct_a_re, fpct_a_im; \ + fpr fpct_d_re, fpct_d_im; \ + fpct_a_re = (a_re); \ + fpct_a_im = (a_im); \ + fpct_d_re = fpr_sub(fpr_sqr(fpct_a_re), fpr_sqr(fpct_a_im)); \ + fpct_d_im = fpr_double(fpr_mul(fpct_a_re, fpct_a_im)); \ + (d_re) = fpct_d_re; \ + (d_im) = fpct_d_im; \ + } while (0) + +/* + * Inversion of a complex number (d = 1 / a). + */ +#define FPC_INV(d_re, d_im, a_re, a_im) do { \ + fpr fpct_a_re, fpct_a_im; \ + fpr fpct_d_re, fpct_d_im; \ + fpr fpct_m; \ + fpct_a_re = (a_re); \ + fpct_a_im = (a_im); \ + fpct_m = fpr_add(fpr_sqr(fpct_a_re), fpr_sqr(fpct_a_im)); \ + fpct_m = fpr_inv(fpct_m); \ + fpct_d_re = fpr_mul(fpct_a_re, fpct_m); \ + fpct_d_im = fpr_mul(fpr_neg(fpct_a_im), fpct_m); \ + (d_re) = fpct_d_re; \ + (d_im) = fpct_d_im; \ + } while (0) + +/* + * Division of complex numbers (d = a / b). + */ +#define FPC_DIV(d_re, d_im, a_re, a_im, b_re, b_im) do { \ + fpr fpct_a_re, fpct_a_im; \ + fpr fpct_b_re, fpct_b_im; \ + fpr fpct_d_re, fpct_d_im; \ + fpr fpct_m; \ + fpct_a_re = (a_re); \ + fpct_a_im = (a_im); \ + fpct_b_re = (b_re); \ + fpct_b_im = (b_im); \ + fpct_m = fpr_add(fpr_sqr(fpct_b_re), fpr_sqr(fpct_b_im)); \ + fpct_m = fpr_inv(fpct_m); \ + fpct_b_re = fpr_mul(fpct_b_re, fpct_m); \ + fpct_b_im = fpr_mul(fpr_neg(fpct_b_im), fpct_m); \ + fpct_d_re = fpr_sub( \ + fpr_mul(fpct_a_re, fpct_b_re), \ + fpr_mul(fpct_a_im, fpct_b_im)); \ + fpct_d_im = fpr_add( \ + fpr_mul(fpct_a_re, fpct_b_im), \ + fpr_mul(fpct_a_im, fpct_b_re)); \ + (d_re) = fpct_d_re; \ + (d_im) = fpct_d_im; \ + } while (0) + +/* + * Let w = exp(i*pi/N); w is a primitive 2N-th root of 1. We define the + * values w_j = w^(2j+1) for all j from 0 to N-1: these are the roots + * of X^N+1 in the field of complex numbers. A crucial property is that + * w_{N-1-j} = conj(w_j) = 1/w_j for all j. + * + * FFT representation of a polynomial f (taken modulo X^N+1) is the + * set of values f(w_j). Since f is real, conj(f(w_j)) = f(conj(w_j)), + * thus f(w_{N-1-j}) = conj(f(w_j)). We thus store only half the values, + * for j = 0 to N/2-1; the other half can be recomputed easily when (if) + * needed. A consequence is that FFT representation has the same size + * as normal representation: N/2 complex numbers use N real numbers (each + * complex number is the combination of a real and an imaginary part). + * + * We use a specific ordering which makes computations easier. Let rev() + * be the bit-reversal function over log(N) bits. For j in 0..N/2-1, we + * store the real and imaginary parts of f(w_j) in slots: + * + * Re(f(w_j)) -> slot rev(j)/2 + * Im(f(w_j)) -> slot rev(j)/2+N/2 + * + * (Note that rev(j) is even for j < N/2.) + */ + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_FFT(fpr *f, unsigned logn) { + /* + * FFT algorithm in bit-reversal order uses the following + * iterative algorithm: + * + * t = N + * for m = 1; m < N; m *= 2: + * ht = t/2 + * for i1 = 0; i1 < m; i1 ++: + * j1 = i1 * t + * s = GM[m + i1] + * for j = j1; j < (j1 + ht); j ++: + * x = f[j] + * y = s * f[j + ht] + * f[j] = x + y + * f[j + ht] = x - y + * t = ht + * + * GM[k] contains w^rev(k) for primitive root w = exp(i*pi/N). + * + * In the description above, f[] is supposed to contain complex + * numbers. In our in-memory representation, the real and + * imaginary parts of f[k] are in array slots k and k+N/2. + * + * We only keep the first half of the complex numbers. We can + * see that after the first iteration, the first and second halves + * of the array of complex numbers have separate lives, so we + * simply ignore the second part. + */ + + unsigned u; + size_t t, n, hn, m; + + /* + * First iteration: compute f[j] + i * f[j+N/2] for all j < N/2 + * (because GM[1] = w^rev(1) = w^(N/2) = i). + * In our chosen representation, this is a no-op: everything is + * already where it should be. + */ + + /* + * Subsequent iterations are truncated to use only the first + * half of values. + */ + n = (size_t)1 << logn; + hn = n >> 1; + t = hn; + for (u = 1, m = 2; u < logn; u ++, m <<= 1) { + size_t ht, hm, i1, j1; + + ht = t >> 1; + hm = m >> 1; + for (i1 = 0, j1 = 0; i1 < hm; i1 ++, j1 += t) { + size_t j, j2; + + j2 = j1 + ht; + if (ht >= 4) { + __m256d s_re, s_im; + + s_re = _mm256_set1_pd( + fpr_gm_tab[((m + i1) << 1) + 0].v); + s_im = _mm256_set1_pd( + fpr_gm_tab[((m + i1) << 1) + 1].v); + for (j = j1; j < j2; j += 4) { + __m256d x_re, x_im, y_re, y_im; + __m256d z_re, z_im; + + x_re = _mm256_loadu_pd(&f[j].v); + x_im = _mm256_loadu_pd(&f[j + hn].v); + z_re = _mm256_loadu_pd(&f[j + ht].v); + z_im = _mm256_loadu_pd(&f[j + ht + hn].v); + y_re = FMSUB(z_re, s_re, + _mm256_mul_pd(z_im, s_im)); + y_im = FMADD(z_re, s_im, + _mm256_mul_pd(z_im, s_re)); + _mm256_storeu_pd(&f[j].v, + _mm256_add_pd(x_re, y_re)); + _mm256_storeu_pd(&f[j + hn].v, + _mm256_add_pd(x_im, y_im)); + _mm256_storeu_pd(&f[j + ht].v, + _mm256_sub_pd(x_re, y_re)); + _mm256_storeu_pd(&f[j + ht + hn].v, + _mm256_sub_pd(x_im, y_im)); + } + } else { + fpr s_re, s_im; + + s_re = fpr_gm_tab[((m + i1) << 1) + 0]; + s_im = fpr_gm_tab[((m + i1) << 1) + 1]; + for (j = j1; j < j2; j ++) { + fpr x_re, x_im, y_re, y_im; + + x_re = f[j]; + x_im = f[j + hn]; + y_re = f[j + ht]; + y_im = f[j + ht + hn]; + FPC_MUL(y_re, y_im, + y_re, y_im, s_re, s_im); + FPC_ADD(f[j], f[j + hn], + x_re, x_im, y_re, y_im); + FPC_SUB(f[j + ht], f[j + ht + hn], + x_re, x_im, y_re, y_im); + } + } + } + t = ht; + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_iFFT(fpr *f, unsigned logn) { + /* + * Inverse FFT algorithm in bit-reversal order uses the following + * iterative algorithm: + * + * t = 1 + * for m = N; m > 1; m /= 2: + * hm = m/2 + * dt = t*2 + * for i1 = 0; i1 < hm; i1 ++: + * j1 = i1 * dt + * s = iGM[hm + i1] + * for j = j1; j < (j1 + t); j ++: + * x = f[j] + * y = f[j + t] + * f[j] = x + y + * f[j + t] = s * (x - y) + * t = dt + * for i1 = 0; i1 < N; i1 ++: + * f[i1] = f[i1] / N + * + * iGM[k] contains (1/w)^rev(k) for primitive root w = exp(i*pi/N) + * (actually, iGM[k] = 1/GM[k] = conj(GM[k])). + * + * In the main loop (not counting the final division loop), in + * all iterations except the last, the first and second half of f[] + * (as an array of complex numbers) are separate. In our chosen + * representation, we do not keep the second half. + * + * The last iteration recombines the recomputed half with the + * implicit half, and should yield only real numbers since the + * target polynomial is real; moreover, s = i at that step. + * Thus, when considering x and y: + * y = conj(x) since the final f[j] must be real + * Therefore, f[j] is filled with 2*Re(x), and f[j + t] is + * filled with 2*Im(x). + * But we already have Re(x) and Im(x) in array slots j and j+t + * in our chosen representation. That last iteration is thus a + * simple doubling of the values in all the array. + * + * We make the last iteration a no-op by tweaking the final + * division into a division by N/2, not N. + */ + size_t u, n, hn, t, m; + + n = (size_t)1 << logn; + t = 1; + m = n; + hn = n >> 1; + for (u = logn; u > 1; u --) { + size_t hm, dt, i1, j1; + + hm = m >> 1; + dt = t << 1; + for (i1 = 0, j1 = 0; j1 < hn; i1 ++, j1 += dt) { + size_t j, j2; + + j2 = j1 + t; + if (t >= 4) { + __m256d s_re, s_im; + + s_re = _mm256_set1_pd( + fpr_gm_tab[((hm + i1) << 1) + 0].v); + s_im = _mm256_set1_pd( + fpr_gm_tab[((hm + i1) << 1) + 1].v); + for (j = j1; j < j2; j += 4) { + __m256d x_re, x_im, y_re, y_im; + __m256d z_re, z_im; + + x_re = _mm256_loadu_pd(&f[j].v); + x_im = _mm256_loadu_pd(&f[j + hn].v); + y_re = _mm256_loadu_pd(&f[j + t].v); + y_im = _mm256_loadu_pd(&f[j + t + hn].v); + _mm256_storeu_pd(&f[j].v, + _mm256_add_pd(x_re, y_re)); + _mm256_storeu_pd(&f[j + hn].v, + _mm256_add_pd(x_im, y_im)); + x_re = _mm256_sub_pd(y_re, x_re); + x_im = _mm256_sub_pd(x_im, y_im); + z_re = FMSUB(x_im, s_im, + _mm256_mul_pd(x_re, s_re)); + z_im = FMADD(x_re, s_im, + _mm256_mul_pd(x_im, s_re)); + _mm256_storeu_pd(&f[j + t].v, z_re); + _mm256_storeu_pd(&f[j + t + hn].v, z_im); + } + } else { + fpr s_re, s_im; + + s_re = fpr_gm_tab[((hm + i1) << 1) + 0]; + s_im = fpr_neg(fpr_gm_tab[((hm + i1) << 1) + 1]); + for (j = j1; j < j2; j ++) { + fpr x_re, x_im, y_re, y_im; + + x_re = f[j]; + x_im = f[j + hn]; + y_re = f[j + t]; + y_im = f[j + t + hn]; + FPC_ADD(f[j], f[j + hn], + x_re, x_im, y_re, y_im); + FPC_SUB(x_re, x_im, + x_re, x_im, y_re, y_im); + FPC_MUL(f[j + t], f[j + t + hn], + x_re, x_im, s_re, s_im); + } + } + } + t = dt; + m = hm; + } + + /* + * Last iteration is a no-op, provided that we divide by N/2 + * instead of N. We need to make a special case for logn = 0. + */ + if (logn > 0) { + fpr ni; + + ni = fpr_p2_tab[logn]; + for (u = 0; u < n; u ++) { + f[u] = fpr_mul(f[u], ni); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_poly_add( + fpr *a, const fpr *b, unsigned logn) { + size_t n, u; + + n = (size_t)1 << logn; + if (n >= 4) { + for (u = 0; u < n; u += 4) { + _mm256_storeu_pd(&a[u].v, + _mm256_add_pd( + _mm256_loadu_pd(&a[u].v), + _mm256_loadu_pd(&b[u].v))); + } + } else { + for (u = 0; u < n; u ++) { + a[u] = fpr_add(a[u], b[u]); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_poly_sub( + fpr *a, const fpr *b, unsigned logn) { + size_t n, u; + + n = (size_t)1 << logn; + if (n >= 4) { + for (u = 0; u < n; u += 4) { + _mm256_storeu_pd(&a[u].v, + _mm256_sub_pd( + _mm256_loadu_pd(&a[u].v), + _mm256_loadu_pd(&b[u].v))); + } + } else { + for (u = 0; u < n; u ++) { + a[u] = fpr_sub(a[u], b[u]); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_poly_neg(fpr *a, unsigned logn) { + size_t n, u; + + n = (size_t)1 << logn; + if (n >= 4) { + __m256d s; + + s = _mm256_set1_pd(-0.0); + for (u = 0; u < n; u += 4) { + _mm256_storeu_pd(&a[u].v, + _mm256_xor_pd(_mm256_loadu_pd(&a[u].v), s)); + } + } else { + for (u = 0; u < n; u ++) { + a[u] = fpr_neg(a[u]); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_poly_adj_fft(fpr *a, unsigned logn) { + size_t n, u; + + n = (size_t)1 << logn; + if (n >= 8) { + __m256d s; + + s = _mm256_set1_pd(-0.0); + for (u = (n >> 1); u < n; u += 4) { + _mm256_storeu_pd(&a[u].v, + _mm256_xor_pd(_mm256_loadu_pd(&a[u].v), s)); + } + } else { + for (u = (n >> 1); u < n; u ++) { + a[u] = fpr_neg(a[u]); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_poly_mul_fft( + fpr *a, const fpr *b, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + if (n >= 8) { + for (u = 0; u < hn; u += 4) { + __m256d a_re, a_im, b_re, b_im, c_re, c_im; + + a_re = _mm256_loadu_pd(&a[u].v); + a_im = _mm256_loadu_pd(&a[u + hn].v); + b_re = _mm256_loadu_pd(&b[u].v); + b_im = _mm256_loadu_pd(&b[u + hn].v); + c_re = FMSUB( + a_re, b_re, _mm256_mul_pd(a_im, b_im)); + c_im = FMADD( + a_re, b_im, _mm256_mul_pd(a_im, b_re)); + _mm256_storeu_pd(&a[u].v, c_re); + _mm256_storeu_pd(&a[u + hn].v, c_im); + } + } else { + for (u = 0; u < hn; u ++) { + fpr a_re, a_im, b_re, b_im; + + a_re = a[u]; + a_im = a[u + hn]; + b_re = b[u]; + b_im = b[u + hn]; + FPC_MUL(a[u], a[u + hn], a_re, a_im, b_re, b_im); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_poly_muladj_fft( + fpr *a, const fpr *b, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + if (n >= 8) { + for (u = 0; u < hn; u += 4) { + __m256d a_re, a_im, b_re, b_im, c_re, c_im; + + a_re = _mm256_loadu_pd(&a[u].v); + a_im = _mm256_loadu_pd(&a[u + hn].v); + b_re = _mm256_loadu_pd(&b[u].v); + b_im = _mm256_loadu_pd(&b[u + hn].v); + c_re = FMADD( + a_re, b_re, _mm256_mul_pd(a_im, b_im)); + c_im = FMSUB( + a_im, b_re, _mm256_mul_pd(a_re, b_im)); + _mm256_storeu_pd(&a[u].v, c_re); + _mm256_storeu_pd(&a[u + hn].v, c_im); + } + } else { + for (u = 0; u < hn; u ++) { + fpr a_re, a_im, b_re, b_im; + + a_re = a[u]; + a_im = a[u + hn]; + b_re = b[u]; + b_im = fpr_neg(b[u + hn]); + FPC_MUL(a[u], a[u + hn], a_re, a_im, b_re, b_im); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_poly_mulselfadj_fft(fpr *a, unsigned logn) { + /* + * Since each coefficient is multiplied with its own conjugate, + * the result contains only real values. + */ + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + if (n >= 8) { + __m256d zero; + + zero = _mm256_setzero_pd(); + for (u = 0; u < hn; u += 4) { + __m256d a_re, a_im; + + a_re = _mm256_loadu_pd(&a[u].v); + a_im = _mm256_loadu_pd(&a[u + hn].v); + _mm256_storeu_pd(&a[u].v, + FMADD(a_re, a_re, + _mm256_mul_pd(a_im, a_im))); + _mm256_storeu_pd(&a[u + hn].v, zero); + } + } else { + for (u = 0; u < hn; u ++) { + fpr a_re, a_im; + + a_re = a[u]; + a_im = a[u + hn]; + a[u] = fpr_add(fpr_sqr(a_re), fpr_sqr(a_im)); + a[u + hn] = fpr_zero; + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_poly_mulconst(fpr *a, fpr x, unsigned logn) { + size_t n, u; + + n = (size_t)1 << logn; + if (n >= 4) { + __m256d x4; + + x4 = _mm256_set1_pd(x.v); + for (u = 0; u < n; u += 4) { + _mm256_storeu_pd(&a[u].v, + _mm256_mul_pd(x4, _mm256_loadu_pd(&a[u].v))); + } + } else { + for (u = 0; u < n; u ++) { + a[u] = fpr_mul(a[u], x); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_poly_div_fft( + fpr *a, const fpr *b, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + if (n >= 8) { + __m256d one; + + one = _mm256_set1_pd(1.0); + for (u = 0; u < hn; u += 4) { + __m256d a_re, a_im, b_re, b_im, c_re, c_im, t; + + a_re = _mm256_loadu_pd(&a[u].v); + a_im = _mm256_loadu_pd(&a[u + hn].v); + b_re = _mm256_loadu_pd(&b[u].v); + b_im = _mm256_loadu_pd(&b[u + hn].v); + t = _mm256_div_pd(one, + FMADD(b_re, b_re, + _mm256_mul_pd(b_im, b_im))); + b_re = _mm256_mul_pd(b_re, t); + b_im = _mm256_mul_pd(b_im, t); + c_re = FMADD( + a_re, b_re, _mm256_mul_pd(a_im, b_im)); + c_im = FMSUB( + a_im, b_re, _mm256_mul_pd(a_re, b_im)); + _mm256_storeu_pd(&a[u].v, c_re); + _mm256_storeu_pd(&a[u + hn].v, c_im); + } + } else { + for (u = 0; u < hn; u ++) { + fpr a_re, a_im, b_re, b_im; + + a_re = a[u]; + a_im = a[u + hn]; + b_re = b[u]; + b_im = b[u + hn]; + FPC_DIV(a[u], a[u + hn], a_re, a_im, b_re, b_im); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_poly_invnorm2_fft(fpr *d, + const fpr *a, const fpr *b, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + if (n >= 8) { + __m256d one; + + one = _mm256_set1_pd(1.0); + for (u = 0; u < hn; u += 4) { + __m256d a_re, a_im, b_re, b_im, dv; + + a_re = _mm256_loadu_pd(&a[u].v); + a_im = _mm256_loadu_pd(&a[u + hn].v); + b_re = _mm256_loadu_pd(&b[u].v); + b_im = _mm256_loadu_pd(&b[u + hn].v); + dv = _mm256_div_pd(one, + _mm256_add_pd( + FMADD(a_re, a_re, + _mm256_mul_pd(a_im, a_im)), + FMADD(b_re, b_re, + _mm256_mul_pd(b_im, b_im)))); + _mm256_storeu_pd(&d[u].v, dv); + } + } else { + for (u = 0; u < hn; u ++) { + fpr a_re, a_im; + fpr b_re, b_im; + + a_re = a[u]; + a_im = a[u + hn]; + b_re = b[u]; + b_im = b[u + hn]; + d[u] = fpr_inv(fpr_add( + fpr_add(fpr_sqr(a_re), fpr_sqr(a_im)), + fpr_add(fpr_sqr(b_re), fpr_sqr(b_im)))); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_poly_add_muladj_fft(fpr *d, + const fpr *F, const fpr *G, + const fpr *f, const fpr *g, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + if (n >= 8) { + for (u = 0; u < hn; u += 4) { + __m256d F_re, F_im, G_re, G_im; + __m256d f_re, f_im, g_re, g_im; + __m256d a_re, a_im, b_re, b_im; + + F_re = _mm256_loadu_pd(&F[u].v); + F_im = _mm256_loadu_pd(&F[u + hn].v); + G_re = _mm256_loadu_pd(&G[u].v); + G_im = _mm256_loadu_pd(&G[u + hn].v); + f_re = _mm256_loadu_pd(&f[u].v); + f_im = _mm256_loadu_pd(&f[u + hn].v); + g_re = _mm256_loadu_pd(&g[u].v); + g_im = _mm256_loadu_pd(&g[u + hn].v); + + a_re = FMADD(F_re, f_re, + _mm256_mul_pd(F_im, f_im)); + a_im = FMSUB(F_im, f_re, + _mm256_mul_pd(F_re, f_im)); + b_re = FMADD(G_re, g_re, + _mm256_mul_pd(G_im, g_im)); + b_im = FMSUB(G_im, g_re, + _mm256_mul_pd(G_re, g_im)); + _mm256_storeu_pd(&d[u].v, + _mm256_add_pd(a_re, b_re)); + _mm256_storeu_pd(&d[u + hn].v, + _mm256_add_pd(a_im, b_im)); + } + } else { + for (u = 0; u < hn; u ++) { + fpr F_re, F_im, G_re, G_im; + fpr f_re, f_im, g_re, g_im; + fpr a_re, a_im, b_re, b_im; + + F_re = F[u]; + F_im = F[u + hn]; + G_re = G[u]; + G_im = G[u + hn]; + f_re = f[u]; + f_im = f[u + hn]; + g_re = g[u]; + g_im = g[u + hn]; + + FPC_MUL(a_re, a_im, F_re, F_im, f_re, fpr_neg(f_im)); + FPC_MUL(b_re, b_im, G_re, G_im, g_re, fpr_neg(g_im)); + d[u] = fpr_add(a_re, b_re); + d[u + hn] = fpr_add(a_im, b_im); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_poly_mul_autoadj_fft( + fpr *a, const fpr *b, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + if (n >= 8) { + for (u = 0; u < hn; u += 4) { + __m256d a_re, a_im, bv; + + a_re = _mm256_loadu_pd(&a[u].v); + a_im = _mm256_loadu_pd(&a[u + hn].v); + bv = _mm256_loadu_pd(&b[u].v); + _mm256_storeu_pd(&a[u].v, + _mm256_mul_pd(a_re, bv)); + _mm256_storeu_pd(&a[u + hn].v, + _mm256_mul_pd(a_im, bv)); + } + } else { + for (u = 0; u < hn; u ++) { + a[u] = fpr_mul(a[u], b[u]); + a[u + hn] = fpr_mul(a[u + hn], b[u]); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_poly_div_autoadj_fft( + fpr *a, const fpr *b, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + if (n >= 8) { + __m256d one; + + one = _mm256_set1_pd(1.0); + for (u = 0; u < hn; u += 4) { + __m256d ib, a_re, a_im; + + ib = _mm256_div_pd(one, _mm256_loadu_pd(&b[u].v)); + a_re = _mm256_loadu_pd(&a[u].v); + a_im = _mm256_loadu_pd(&a[u + hn].v); + _mm256_storeu_pd(&a[u].v, _mm256_mul_pd(a_re, ib)); + _mm256_storeu_pd(&a[u + hn].v, _mm256_mul_pd(a_im, ib)); + } + } else { + for (u = 0; u < hn; u ++) { + fpr ib; + + ib = fpr_inv(b[u]); + a[u] = fpr_mul(a[u], ib); + a[u + hn] = fpr_mul(a[u + hn], ib); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_poly_LDL_fft( + const fpr *g00, + fpr *g01, fpr *g11, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + if (n >= 8) { + __m256d one; + + one = _mm256_set1_pd(1.0); + for (u = 0; u < hn; u += 4) { + __m256d g00_re, g00_im, g01_re, g01_im, g11_re, g11_im; + __m256d t, mu_re, mu_im, xi_re, xi_im; + + g00_re = _mm256_loadu_pd(&g00[u].v); + g00_im = _mm256_loadu_pd(&g00[u + hn].v); + g01_re = _mm256_loadu_pd(&g01[u].v); + g01_im = _mm256_loadu_pd(&g01[u + hn].v); + g11_re = _mm256_loadu_pd(&g11[u].v); + g11_im = _mm256_loadu_pd(&g11[u + hn].v); + + t = _mm256_div_pd(one, + FMADD(g00_re, g00_re, + _mm256_mul_pd(g00_im, g00_im))); + g00_re = _mm256_mul_pd(g00_re, t); + g00_im = _mm256_mul_pd(g00_im, t); + mu_re = FMADD(g01_re, g00_re, + _mm256_mul_pd(g01_im, g00_im)); + mu_im = FMSUB(g01_re, g00_im, + _mm256_mul_pd(g01_im, g00_re)); + xi_re = FMSUB(mu_re, g01_re, + _mm256_mul_pd(mu_im, g01_im)); + xi_im = FMADD(mu_im, g01_re, + _mm256_mul_pd(mu_re, g01_im)); + _mm256_storeu_pd(&g11[u].v, + _mm256_sub_pd(g11_re, xi_re)); + _mm256_storeu_pd(&g11[u + hn].v, + _mm256_add_pd(g11_im, xi_im)); + _mm256_storeu_pd(&g01[u].v, mu_re); + _mm256_storeu_pd(&g01[u + hn].v, mu_im); + } + } else { + for (u = 0; u < hn; u ++) { + fpr g00_re, g00_im, g01_re, g01_im, g11_re, g11_im; + fpr mu_re, mu_im; + + g00_re = g00[u]; + g00_im = g00[u + hn]; + g01_re = g01[u]; + g01_im = g01[u + hn]; + g11_re = g11[u]; + g11_im = g11[u + hn]; + FPC_DIV(mu_re, mu_im, g01_re, g01_im, g00_re, g00_im); + FPC_MUL(g01_re, g01_im, + mu_re, mu_im, g01_re, fpr_neg(g01_im)); + FPC_SUB(g11[u], g11[u + hn], + g11_re, g11_im, g01_re, g01_im); + g01[u] = mu_re; + g01[u + hn] = fpr_neg(mu_im); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_poly_LDLmv_fft( + fpr *d11, fpr *l10, + const fpr *g00, const fpr *g01, + const fpr *g11, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + if (n >= 8) { + __m256d one; + + one = _mm256_set1_pd(1.0); + for (u = 0; u < hn; u += 4) { + __m256d g00_re, g00_im, g01_re, g01_im, g11_re, g11_im; + __m256d t, mu_re, mu_im, xi_re, xi_im; + + g00_re = _mm256_loadu_pd(&g00[u].v); + g00_im = _mm256_loadu_pd(&g00[u + hn].v); + g01_re = _mm256_loadu_pd(&g01[u].v); + g01_im = _mm256_loadu_pd(&g01[u + hn].v); + g11_re = _mm256_loadu_pd(&g11[u].v); + g11_im = _mm256_loadu_pd(&g11[u + hn].v); + + t = _mm256_div_pd(one, + FMADD(g00_re, g00_re, + _mm256_mul_pd(g00_im, g00_im))); + g00_re = _mm256_mul_pd(g00_re, t); + g00_im = _mm256_mul_pd(g00_im, t); + mu_re = FMADD(g01_re, g00_re, + _mm256_mul_pd(g01_im, g00_im)); + mu_im = FMSUB(g01_re, g00_im, + _mm256_mul_pd(g01_im, g00_re)); + xi_re = FMSUB(mu_re, g01_re, + _mm256_mul_pd(mu_im, g01_im)); + xi_im = FMADD(mu_im, g01_re, + _mm256_mul_pd(mu_re, g01_im)); + _mm256_storeu_pd(&d11[u].v, + _mm256_sub_pd(g11_re, xi_re)); + _mm256_storeu_pd(&d11[u + hn].v, + _mm256_add_pd(g11_im, xi_im)); + _mm256_storeu_pd(&l10[u].v, mu_re); + _mm256_storeu_pd(&l10[u + hn].v, mu_im); + } + } else { + for (u = 0; u < hn; u ++) { + fpr g00_re, g00_im, g01_re, g01_im, g11_re, g11_im; + fpr mu_re, mu_im; + + g00_re = g00[u]; + g00_im = g00[u + hn]; + g01_re = g01[u]; + g01_im = g01[u + hn]; + g11_re = g11[u]; + g11_im = g11[u + hn]; + FPC_DIV(mu_re, mu_im, g01_re, g01_im, g00_re, g00_im); + FPC_MUL(g01_re, g01_im, + mu_re, mu_im, g01_re, fpr_neg(g01_im)); + FPC_SUB(d11[u], d11[u + hn], + g11_re, g11_im, g01_re, g01_im); + l10[u] = mu_re; + l10[u + hn] = fpr_neg(mu_im); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_poly_split_fft( + fpr *f0, fpr *f1, + const fpr *f, unsigned logn) { + /* + * The FFT representation we use is in bit-reversed order + * (element i contains f(w^(rev(i))), where rev() is the + * bit-reversal function over the ring degree. This changes + * indexes with regards to the Falcon specification. + */ + size_t n, hn, qn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + qn = hn >> 1; + + if (n >= 8) { + __m256d half, sv; + + half = _mm256_set1_pd(0.5); + sv = _mm256_set_pd(-0.0, 0.0, -0.0, 0.0); + for (u = 0; u < qn; u += 2) { + __m256d ab_re, ab_im, ff0, ff1, ff2, ff3, gmt; + + ab_re = _mm256_loadu_pd(&f[(u << 1)].v); + ab_im = _mm256_loadu_pd(&f[(u << 1) + hn].v); + ff0 = _mm256_mul_pd(_mm256_hadd_pd(ab_re, ab_im), half); + ff0 = _mm256_permute4x64_pd(ff0, 0xD8); + _mm_storeu_pd(&f0[u].v, + _mm256_extractf128_pd(ff0, 0)); + _mm_storeu_pd(&f0[u + qn].v, + _mm256_extractf128_pd(ff0, 1)); + + ff1 = _mm256_mul_pd(_mm256_hsub_pd(ab_re, ab_im), half); + gmt = _mm256_loadu_pd(&fpr_gm_tab[(u + hn) << 1].v); + ff2 = _mm256_shuffle_pd(ff1, ff1, 0x5); + ff3 = _mm256_hadd_pd( + _mm256_mul_pd(ff1, gmt), + _mm256_xor_pd(_mm256_mul_pd(ff2, gmt), sv)); + ff3 = _mm256_permute4x64_pd(ff3, 0xD8); + _mm_storeu_pd(&f1[u].v, + _mm256_extractf128_pd(ff3, 0)); + _mm_storeu_pd(&f1[u + qn].v, + _mm256_extractf128_pd(ff3, 1)); + } + } else { + f0[0] = f[0]; + f1[0] = f[hn]; + + for (u = 0; u < qn; u ++) { + fpr a_re, a_im, b_re, b_im; + fpr t_re, t_im; + + a_re = f[(u << 1) + 0]; + a_im = f[(u << 1) + 0 + hn]; + b_re = f[(u << 1) + 1]; + b_im = f[(u << 1) + 1 + hn]; + + FPC_ADD(t_re, t_im, a_re, a_im, b_re, b_im); + f0[u] = fpr_half(t_re); + f0[u + qn] = fpr_half(t_im); + + FPC_SUB(t_re, t_im, a_re, a_im, b_re, b_im); + FPC_MUL(t_re, t_im, t_re, t_im, + fpr_gm_tab[((u + hn) << 1) + 0], + fpr_neg(fpr_gm_tab[((u + hn) << 1) + 1])); + f1[u] = fpr_half(t_re); + f1[u + qn] = fpr_half(t_im); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_poly_merge_fft( + fpr *f, + const fpr *f0, const fpr *f1, unsigned logn) { + size_t n, hn, qn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + qn = hn >> 1; + + if (n >= 16) { + for (u = 0; u < qn; u += 4) { + __m256d a_re, a_im, b_re, b_im, c_re, c_im; + __m256d gm1, gm2, g_re, g_im; + __m256d t_re, t_im, u_re, u_im; + __m256d tu1_re, tu2_re, tu1_im, tu2_im; + + a_re = _mm256_loadu_pd(&f0[u].v); + a_im = _mm256_loadu_pd(&f0[u + qn].v); + c_re = _mm256_loadu_pd(&f1[u].v); + c_im = _mm256_loadu_pd(&f1[u + qn].v); + + gm1 = _mm256_loadu_pd(&fpr_gm_tab[(u + hn) << 1].v); + gm2 = _mm256_loadu_pd(&fpr_gm_tab[(u + 2 + hn) << 1].v); + g_re = _mm256_unpacklo_pd(gm1, gm2); + g_im = _mm256_unpackhi_pd(gm1, gm2); + g_re = _mm256_permute4x64_pd(g_re, 0xD8); + g_im = _mm256_permute4x64_pd(g_im, 0xD8); + + b_re = FMSUB( + c_re, g_re, _mm256_mul_pd(c_im, g_im)); + b_im = FMADD( + c_re, g_im, _mm256_mul_pd(c_im, g_re)); + + t_re = _mm256_add_pd(a_re, b_re); + t_im = _mm256_add_pd(a_im, b_im); + u_re = _mm256_sub_pd(a_re, b_re); + u_im = _mm256_sub_pd(a_im, b_im); + + tu1_re = _mm256_unpacklo_pd(t_re, u_re); + tu2_re = _mm256_unpackhi_pd(t_re, u_re); + tu1_im = _mm256_unpacklo_pd(t_im, u_im); + tu2_im = _mm256_unpackhi_pd(t_im, u_im); + _mm256_storeu_pd(&f[(u << 1)].v, + _mm256_permute2f128_pd(tu1_re, tu2_re, 0x20)); + _mm256_storeu_pd(&f[(u << 1) + 4].v, + _mm256_permute2f128_pd(tu1_re, tu2_re, 0x31)); + _mm256_storeu_pd(&f[(u << 1) + hn].v, + _mm256_permute2f128_pd(tu1_im, tu2_im, 0x20)); + _mm256_storeu_pd(&f[(u << 1) + 4 + hn].v, + _mm256_permute2f128_pd(tu1_im, tu2_im, 0x31)); + } + } else { + f[0] = f0[0]; + f[hn] = f1[0]; + + for (u = 0; u < qn; u ++) { + fpr a_re, a_im, b_re, b_im; + fpr t_re, t_im; + + a_re = f0[u]; + a_im = f0[u + qn]; + FPC_MUL(b_re, b_im, f1[u], f1[u + qn], + fpr_gm_tab[((u + hn) << 1) + 0], + fpr_gm_tab[((u + hn) << 1) + 1]); + FPC_ADD(t_re, t_im, a_re, a_im, b_re, b_im); + f[(u << 1) + 0] = t_re; + f[(u << 1) + 0 + hn] = t_im; + FPC_SUB(t_re, t_im, a_re, a_im, b_re, b_im); + f[(u << 1) + 1] = t_re; + f[(u << 1) + 1 + hn] = t_im; + } + } +} diff --git a/crypto_sign/falcon/falcon-1024/avx2/fpr.c b/crypto_sign/falcon/falcon-1024/avx2/fpr.c new file mode 100644 index 00000000..2f04a35d --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/avx2/fpr.c @@ -0,0 +1,1078 @@ +#include "inner.h" + +/* + * Floating-point operations. + * + * This file implements the non-inline functions declared in + * fpr.h, as well as the constants for FFT / iFFT. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + + +const fpr fpr_gm_tab[] = { + {0}, {0}, /* unused */ + {-0.000000000000000000000000000}, { 1.000000000000000000000000000}, + { 0.707106781186547524400844362}, { 0.707106781186547524400844362}, + {-0.707106781186547524400844362}, { 0.707106781186547524400844362}, + { 0.923879532511286756128183189}, { 0.382683432365089771728459984}, + {-0.382683432365089771728459984}, { 0.923879532511286756128183189}, + { 0.382683432365089771728459984}, { 0.923879532511286756128183189}, + {-0.923879532511286756128183189}, { 0.382683432365089771728459984}, + { 0.980785280403230449126182236}, { 0.195090322016128267848284868}, + {-0.195090322016128267848284868}, { 0.980785280403230449126182236}, + { 0.555570233019602224742830814}, { 0.831469612302545237078788378}, + {-0.831469612302545237078788378}, { 0.555570233019602224742830814}, + { 0.831469612302545237078788378}, { 0.555570233019602224742830814}, + {-0.555570233019602224742830814}, { 0.831469612302545237078788378}, + { 0.195090322016128267848284868}, { 0.980785280403230449126182236}, + {-0.980785280403230449126182236}, { 0.195090322016128267848284868}, + { 0.995184726672196886244836953}, { 0.098017140329560601994195564}, + {-0.098017140329560601994195564}, { 0.995184726672196886244836953}, + { 0.634393284163645498215171613}, { 0.773010453362736960810906610}, + {-0.773010453362736960810906610}, { 0.634393284163645498215171613}, + { 0.881921264348355029712756864}, { 0.471396736825997648556387626}, + {-0.471396736825997648556387626}, { 0.881921264348355029712756864}, + { 0.290284677254462367636192376}, { 0.956940335732208864935797887}, + {-0.956940335732208864935797887}, { 0.290284677254462367636192376}, + { 0.956940335732208864935797887}, { 0.290284677254462367636192376}, + {-0.290284677254462367636192376}, { 0.956940335732208864935797887}, + { 0.471396736825997648556387626}, { 0.881921264348355029712756864}, + {-0.881921264348355029712756864}, { 0.471396736825997648556387626}, + { 0.773010453362736960810906610}, { 0.634393284163645498215171613}, + {-0.634393284163645498215171613}, { 0.773010453362736960810906610}, + { 0.098017140329560601994195564}, { 0.995184726672196886244836953}, + {-0.995184726672196886244836953}, { 0.098017140329560601994195564}, + { 0.998795456205172392714771605}, { 0.049067674327418014254954977}, + {-0.049067674327418014254954977}, { 0.998795456205172392714771605}, + { 0.671558954847018400625376850}, { 0.740951125354959091175616897}, + {-0.740951125354959091175616897}, { 0.671558954847018400625376850}, + { 0.903989293123443331586200297}, { 0.427555093430282094320966857}, + {-0.427555093430282094320966857}, { 0.903989293123443331586200297}, + { 0.336889853392220050689253213}, { 0.941544065183020778412509403}, + {-0.941544065183020778412509403}, { 0.336889853392220050689253213}, + { 0.970031253194543992603984207}, { 0.242980179903263889948274162}, + {-0.242980179903263889948274162}, { 0.970031253194543992603984207}, + { 0.514102744193221726593693839}, { 0.857728610000272069902269984}, + {-0.857728610000272069902269984}, { 0.514102744193221726593693839}, + { 0.803207531480644909806676513}, { 0.595699304492433343467036529}, + {-0.595699304492433343467036529}, { 0.803207531480644909806676513}, + { 0.146730474455361751658850130}, { 0.989176509964780973451673738}, + {-0.989176509964780973451673738}, { 0.146730474455361751658850130}, + { 0.989176509964780973451673738}, { 0.146730474455361751658850130}, + {-0.146730474455361751658850130}, { 0.989176509964780973451673738}, + { 0.595699304492433343467036529}, { 0.803207531480644909806676513}, + {-0.803207531480644909806676513}, { 0.595699304492433343467036529}, + { 0.857728610000272069902269984}, { 0.514102744193221726593693839}, + {-0.514102744193221726593693839}, { 0.857728610000272069902269984}, + { 0.242980179903263889948274162}, { 0.970031253194543992603984207}, + {-0.970031253194543992603984207}, { 0.242980179903263889948274162}, + { 0.941544065183020778412509403}, { 0.336889853392220050689253213}, + {-0.336889853392220050689253213}, { 0.941544065183020778412509403}, + { 0.427555093430282094320966857}, { 0.903989293123443331586200297}, + {-0.903989293123443331586200297}, { 0.427555093430282094320966857}, + { 0.740951125354959091175616897}, { 0.671558954847018400625376850}, + {-0.671558954847018400625376850}, { 0.740951125354959091175616897}, + { 0.049067674327418014254954977}, { 0.998795456205172392714771605}, + {-0.998795456205172392714771605}, { 0.049067674327418014254954977}, + { 0.999698818696204220115765650}, { 0.024541228522912288031734529}, + {-0.024541228522912288031734529}, { 0.999698818696204220115765650}, + { 0.689540544737066924616730630}, { 0.724247082951466920941069243}, + {-0.724247082951466920941069243}, { 0.689540544737066924616730630}, + { 0.914209755703530654635014829}, { 0.405241314004989870908481306}, + {-0.405241314004989870908481306}, { 0.914209755703530654635014829}, + { 0.359895036534988148775104572}, { 0.932992798834738887711660256}, + {-0.932992798834738887711660256}, { 0.359895036534988148775104572}, + { 0.975702130038528544460395766}, { 0.219101240156869797227737547}, + {-0.219101240156869797227737547}, { 0.975702130038528544460395766}, + { 0.534997619887097210663076905}, { 0.844853565249707073259571205}, + {-0.844853565249707073259571205}, { 0.534997619887097210663076905}, + { 0.817584813151583696504920884}, { 0.575808191417845300745972454}, + {-0.575808191417845300745972454}, { 0.817584813151583696504920884}, + { 0.170961888760301226363642357}, { 0.985277642388941244774018433}, + {-0.985277642388941244774018433}, { 0.170961888760301226363642357}, + { 0.992479534598709998156767252}, { 0.122410675199216198498704474}, + {-0.122410675199216198498704474}, { 0.992479534598709998156767252}, + { 0.615231590580626845484913563}, { 0.788346427626606262009164705}, + {-0.788346427626606262009164705}, { 0.615231590580626845484913563}, + { 0.870086991108711418652292404}, { 0.492898192229784036873026689}, + {-0.492898192229784036873026689}, { 0.870086991108711418652292404}, + { 0.266712757474898386325286515}, { 0.963776065795439866686464356}, + {-0.963776065795439866686464356}, { 0.266712757474898386325286515}, + { 0.949528180593036667195936074}, { 0.313681740398891476656478846}, + {-0.313681740398891476656478846}, { 0.949528180593036667195936074}, + { 0.449611329654606600046294579}, { 0.893224301195515320342416447}, + {-0.893224301195515320342416447}, { 0.449611329654606600046294579}, + { 0.757208846506484547575464054}, { 0.653172842953776764084203014}, + {-0.653172842953776764084203014}, { 0.757208846506484547575464054}, + { 0.073564563599667423529465622}, { 0.997290456678690216135597140}, + {-0.997290456678690216135597140}, { 0.073564563599667423529465622}, + { 0.997290456678690216135597140}, { 0.073564563599667423529465622}, + {-0.073564563599667423529465622}, { 0.997290456678690216135597140}, + { 0.653172842953776764084203014}, { 0.757208846506484547575464054}, + {-0.757208846506484547575464054}, { 0.653172842953776764084203014}, + { 0.893224301195515320342416447}, { 0.449611329654606600046294579}, + {-0.449611329654606600046294579}, { 0.893224301195515320342416447}, + { 0.313681740398891476656478846}, { 0.949528180593036667195936074}, + {-0.949528180593036667195936074}, { 0.313681740398891476656478846}, + { 0.963776065795439866686464356}, { 0.266712757474898386325286515}, + {-0.266712757474898386325286515}, { 0.963776065795439866686464356}, + { 0.492898192229784036873026689}, { 0.870086991108711418652292404}, + {-0.870086991108711418652292404}, { 0.492898192229784036873026689}, + { 0.788346427626606262009164705}, { 0.615231590580626845484913563}, + {-0.615231590580626845484913563}, { 0.788346427626606262009164705}, + { 0.122410675199216198498704474}, { 0.992479534598709998156767252}, + {-0.992479534598709998156767252}, { 0.122410675199216198498704474}, + { 0.985277642388941244774018433}, { 0.170961888760301226363642357}, + {-0.170961888760301226363642357}, { 0.985277642388941244774018433}, + { 0.575808191417845300745972454}, { 0.817584813151583696504920884}, + {-0.817584813151583696504920884}, { 0.575808191417845300745972454}, + { 0.844853565249707073259571205}, { 0.534997619887097210663076905}, + {-0.534997619887097210663076905}, { 0.844853565249707073259571205}, + { 0.219101240156869797227737547}, { 0.975702130038528544460395766}, + {-0.975702130038528544460395766}, { 0.219101240156869797227737547}, + { 0.932992798834738887711660256}, { 0.359895036534988148775104572}, + {-0.359895036534988148775104572}, { 0.932992798834738887711660256}, + { 0.405241314004989870908481306}, { 0.914209755703530654635014829}, + {-0.914209755703530654635014829}, { 0.405241314004989870908481306}, + { 0.724247082951466920941069243}, { 0.689540544737066924616730630}, + {-0.689540544737066924616730630}, { 0.724247082951466920941069243}, + { 0.024541228522912288031734529}, { 0.999698818696204220115765650}, + {-0.999698818696204220115765650}, { 0.024541228522912288031734529}, + { 0.999924701839144540921646491}, { 0.012271538285719926079408262}, + {-0.012271538285719926079408262}, { 0.999924701839144540921646491}, + { 0.698376249408972853554813503}, { 0.715730825283818654125532623}, + {-0.715730825283818654125532623}, { 0.698376249408972853554813503}, + { 0.919113851690057743908477789}, { 0.393992040061048108596188661}, + {-0.393992040061048108596188661}, { 0.919113851690057743908477789}, + { 0.371317193951837543411934967}, { 0.928506080473215565937167396}, + {-0.928506080473215565937167396}, { 0.371317193951837543411934967}, + { 0.978317370719627633106240097}, { 0.207111376192218549708116020}, + {-0.207111376192218549708116020}, { 0.978317370719627633106240097}, + { 0.545324988422046422313987347}, { 0.838224705554838043186996856}, + {-0.838224705554838043186996856}, { 0.545324988422046422313987347}, + { 0.824589302785025264474803737}, { 0.565731810783613197389765011}, + {-0.565731810783613197389765011}, { 0.824589302785025264474803737}, + { 0.183039887955140958516532578}, { 0.983105487431216327180301155}, + {-0.983105487431216327180301155}, { 0.183039887955140958516532578}, + { 0.993906970002356041546922813}, { 0.110222207293883058807899140}, + {-0.110222207293883058807899140}, { 0.993906970002356041546922813}, + { 0.624859488142386377084072816}, { 0.780737228572094478301588484}, + {-0.780737228572094478301588484}, { 0.624859488142386377084072816}, + { 0.876070094195406607095844268}, { 0.482183772079122748517344481}, + {-0.482183772079122748517344481}, { 0.876070094195406607095844268}, + { 0.278519689385053105207848526}, { 0.960430519415565811199035138}, + {-0.960430519415565811199035138}, { 0.278519689385053105207848526}, + { 0.953306040354193836916740383}, { 0.302005949319228067003463232}, + {-0.302005949319228067003463232}, { 0.953306040354193836916740383}, + { 0.460538710958240023633181487}, { 0.887639620402853947760181617}, + {-0.887639620402853947760181617}, { 0.460538710958240023633181487}, + { 0.765167265622458925888815999}, { 0.643831542889791465068086063}, + {-0.643831542889791465068086063}, { 0.765167265622458925888815999}, + { 0.085797312344439890461556332}, { 0.996312612182778012627226190}, + {-0.996312612182778012627226190}, { 0.085797312344439890461556332}, + { 0.998118112900149207125155861}, { 0.061320736302208577782614593}, + {-0.061320736302208577782614593}, { 0.998118112900149207125155861}, + { 0.662415777590171761113069817}, { 0.749136394523459325469203257}, + {-0.749136394523459325469203257}, { 0.662415777590171761113069817}, + { 0.898674465693953843041976744}, { 0.438616238538527637647025738}, + {-0.438616238538527637647025738}, { 0.898674465693953843041976744}, + { 0.325310292162262934135954708}, { 0.945607325380521325730945387}, + {-0.945607325380521325730945387}, { 0.325310292162262934135954708}, + { 0.966976471044852109087220226}, { 0.254865659604514571553980779}, + {-0.254865659604514571553980779}, { 0.966976471044852109087220226}, + { 0.503538383725717558691867071}, { 0.863972856121586737918147054}, + {-0.863972856121586737918147054}, { 0.503538383725717558691867071}, + { 0.795836904608883536262791915}, { 0.605511041404325513920626941}, + {-0.605511041404325513920626941}, { 0.795836904608883536262791915}, + { 0.134580708507126186316358409}, { 0.990902635427780025108237011}, + {-0.990902635427780025108237011}, { 0.134580708507126186316358409}, + { 0.987301418157858382399815802}, { 0.158858143333861441684385360}, + {-0.158858143333861441684385360}, { 0.987301418157858382399815802}, + { 0.585797857456438860328080838}, { 0.810457198252594791726703434}, + {-0.810457198252594791726703434}, { 0.585797857456438860328080838}, + { 0.851355193105265142261290312}, { 0.524589682678468906215098464}, + {-0.524589682678468906215098464}, { 0.851355193105265142261290312}, + { 0.231058108280671119643236018}, { 0.972939952205560145467720114}, + {-0.972939952205560145467720114}, { 0.231058108280671119643236018}, + { 0.937339011912574923201899593}, { 0.348418680249434568419308588}, + {-0.348418680249434568419308588}, { 0.937339011912574923201899593}, + { 0.416429560097637182562598911}, { 0.909167983090522376563884788}, + {-0.909167983090522376563884788}, { 0.416429560097637182562598911}, + { 0.732654271672412834615546649}, { 0.680600997795453050594430464}, + {-0.680600997795453050594430464}, { 0.732654271672412834615546649}, + { 0.036807222941358832324332691}, { 0.999322384588349500896221011}, + {-0.999322384588349500896221011}, { 0.036807222941358832324332691}, + { 0.999322384588349500896221011}, { 0.036807222941358832324332691}, + {-0.036807222941358832324332691}, { 0.999322384588349500896221011}, + { 0.680600997795453050594430464}, { 0.732654271672412834615546649}, + {-0.732654271672412834615546649}, { 0.680600997795453050594430464}, + { 0.909167983090522376563884788}, { 0.416429560097637182562598911}, + {-0.416429560097637182562598911}, { 0.909167983090522376563884788}, + { 0.348418680249434568419308588}, { 0.937339011912574923201899593}, + {-0.937339011912574923201899593}, { 0.348418680249434568419308588}, + { 0.972939952205560145467720114}, { 0.231058108280671119643236018}, + {-0.231058108280671119643236018}, { 0.972939952205560145467720114}, + { 0.524589682678468906215098464}, { 0.851355193105265142261290312}, + {-0.851355193105265142261290312}, { 0.524589682678468906215098464}, + { 0.810457198252594791726703434}, { 0.585797857456438860328080838}, + {-0.585797857456438860328080838}, { 0.810457198252594791726703434}, + { 0.158858143333861441684385360}, { 0.987301418157858382399815802}, + {-0.987301418157858382399815802}, { 0.158858143333861441684385360}, + { 0.990902635427780025108237011}, { 0.134580708507126186316358409}, + {-0.134580708507126186316358409}, { 0.990902635427780025108237011}, + { 0.605511041404325513920626941}, { 0.795836904608883536262791915}, + {-0.795836904608883536262791915}, { 0.605511041404325513920626941}, + { 0.863972856121586737918147054}, { 0.503538383725717558691867071}, + {-0.503538383725717558691867071}, { 0.863972856121586737918147054}, + { 0.254865659604514571553980779}, { 0.966976471044852109087220226}, + {-0.966976471044852109087220226}, { 0.254865659604514571553980779}, + { 0.945607325380521325730945387}, { 0.325310292162262934135954708}, + {-0.325310292162262934135954708}, { 0.945607325380521325730945387}, + { 0.438616238538527637647025738}, { 0.898674465693953843041976744}, + {-0.898674465693953843041976744}, { 0.438616238538527637647025738}, + { 0.749136394523459325469203257}, { 0.662415777590171761113069817}, + {-0.662415777590171761113069817}, { 0.749136394523459325469203257}, + { 0.061320736302208577782614593}, { 0.998118112900149207125155861}, + {-0.998118112900149207125155861}, { 0.061320736302208577782614593}, + { 0.996312612182778012627226190}, { 0.085797312344439890461556332}, + {-0.085797312344439890461556332}, { 0.996312612182778012627226190}, + { 0.643831542889791465068086063}, { 0.765167265622458925888815999}, + {-0.765167265622458925888815999}, { 0.643831542889791465068086063}, + { 0.887639620402853947760181617}, { 0.460538710958240023633181487}, + {-0.460538710958240023633181487}, { 0.887639620402853947760181617}, + { 0.302005949319228067003463232}, { 0.953306040354193836916740383}, + {-0.953306040354193836916740383}, { 0.302005949319228067003463232}, + { 0.960430519415565811199035138}, { 0.278519689385053105207848526}, + {-0.278519689385053105207848526}, { 0.960430519415565811199035138}, + { 0.482183772079122748517344481}, { 0.876070094195406607095844268}, + {-0.876070094195406607095844268}, { 0.482183772079122748517344481}, + { 0.780737228572094478301588484}, { 0.624859488142386377084072816}, + {-0.624859488142386377084072816}, { 0.780737228572094478301588484}, + { 0.110222207293883058807899140}, { 0.993906970002356041546922813}, + {-0.993906970002356041546922813}, { 0.110222207293883058807899140}, + { 0.983105487431216327180301155}, { 0.183039887955140958516532578}, + {-0.183039887955140958516532578}, { 0.983105487431216327180301155}, + { 0.565731810783613197389765011}, { 0.824589302785025264474803737}, + {-0.824589302785025264474803737}, { 0.565731810783613197389765011}, + { 0.838224705554838043186996856}, { 0.545324988422046422313987347}, + {-0.545324988422046422313987347}, { 0.838224705554838043186996856}, + { 0.207111376192218549708116020}, { 0.978317370719627633106240097}, + {-0.978317370719627633106240097}, { 0.207111376192218549708116020}, + { 0.928506080473215565937167396}, { 0.371317193951837543411934967}, + {-0.371317193951837543411934967}, { 0.928506080473215565937167396}, + { 0.393992040061048108596188661}, { 0.919113851690057743908477789}, + {-0.919113851690057743908477789}, { 0.393992040061048108596188661}, + { 0.715730825283818654125532623}, { 0.698376249408972853554813503}, + {-0.698376249408972853554813503}, { 0.715730825283818654125532623}, + { 0.012271538285719926079408262}, { 0.999924701839144540921646491}, + {-0.999924701839144540921646491}, { 0.012271538285719926079408262}, + { 0.999981175282601142656990438}, { 0.006135884649154475359640235}, + {-0.006135884649154475359640235}, { 0.999981175282601142656990438}, + { 0.702754744457225302452914421}, { 0.711432195745216441522130290}, + {-0.711432195745216441522130290}, { 0.702754744457225302452914421}, + { 0.921514039342041943465396332}, { 0.388345046698826291624993541}, + {-0.388345046698826291624993541}, { 0.921514039342041943465396332}, + { 0.377007410216418256726567823}, { 0.926210242138311341974793388}, + {-0.926210242138311341974793388}, { 0.377007410216418256726567823}, + { 0.979569765685440534439326110}, { 0.201104634842091911558443546}, + {-0.201104634842091911558443546}, { 0.979569765685440534439326110}, + { 0.550457972936604802977289893}, { 0.834862874986380056304401383}, + {-0.834862874986380056304401383}, { 0.550457972936604802977289893}, + { 0.828045045257755752067527592}, { 0.560661576197336023839710223}, + {-0.560661576197336023839710223}, { 0.828045045257755752067527592}, + { 0.189068664149806212754997837}, { 0.981963869109555264072848154}, + {-0.981963869109555264072848154}, { 0.189068664149806212754997837}, + { 0.994564570734255452119106243}, { 0.104121633872054579120943880}, + {-0.104121633872054579120943880}, { 0.994564570734255452119106243}, + { 0.629638238914927025372981341}, { 0.776888465673232450040827983}, + {-0.776888465673232450040827983}, { 0.629638238914927025372981341}, + { 0.879012226428633477831323711}, { 0.476799230063322133342158117}, + {-0.476799230063322133342158117}, { 0.879012226428633477831323711}, + { 0.284407537211271843618310615}, { 0.958703474895871555374645792}, + {-0.958703474895871555374645792}, { 0.284407537211271843618310615}, + { 0.955141168305770721498157712}, { 0.296150888243623824121786128}, + {-0.296150888243623824121786128}, { 0.955141168305770721498157712}, + { 0.465976495767966177902756065}, { 0.884797098430937780104007041}, + {-0.884797098430937780104007041}, { 0.465976495767966177902756065}, + { 0.769103337645579639346626069}, { 0.639124444863775743801488193}, + {-0.639124444863775743801488193}, { 0.769103337645579639346626069}, + { 0.091908956497132728624990979}, { 0.995767414467659793982495643}, + {-0.995767414467659793982495643}, { 0.091908956497132728624990979}, + { 0.998475580573294752208559038}, { 0.055195244349689939809447526}, + {-0.055195244349689939809447526}, { 0.998475580573294752208559038}, + { 0.666999922303637506650154222}, { 0.745057785441465962407907310}, + {-0.745057785441465962407907310}, { 0.666999922303637506650154222}, + { 0.901348847046022014570746093}, { 0.433093818853151968484222638}, + {-0.433093818853151968484222638}, { 0.901348847046022014570746093}, + { 0.331106305759876401737190737}, { 0.943593458161960361495301445}, + {-0.943593458161960361495301445}, { 0.331106305759876401737190737}, + { 0.968522094274417316221088329}, { 0.248927605745720168110682816}, + {-0.248927605745720168110682816}, { 0.968522094274417316221088329}, + { 0.508830142543107036931749324}, { 0.860866938637767279344583877}, + {-0.860866938637767279344583877}, { 0.508830142543107036931749324}, + { 0.799537269107905033500246232}, { 0.600616479383868926653875896}, + {-0.600616479383868926653875896}, { 0.799537269107905033500246232}, + { 0.140658239332849230714788846}, { 0.990058210262297105505906464}, + {-0.990058210262297105505906464}, { 0.140658239332849230714788846}, + { 0.988257567730749491404792538}, { 0.152797185258443427720336613}, + {-0.152797185258443427720336613}, { 0.988257567730749491404792538}, + { 0.590759701858874228423887908}, { 0.806847553543799272206514313}, + {-0.806847553543799272206514313}, { 0.590759701858874228423887908}, + { 0.854557988365400520767862276}, { 0.519355990165589587361829932}, + {-0.519355990165589587361829932}, { 0.854557988365400520767862276}, + { 0.237023605994367206867735915}, { 0.971503890986251775537099622}, + {-0.971503890986251775537099622}, { 0.237023605994367206867735915}, + { 0.939459223602189911962669246}, { 0.342660717311994397592781983}, + {-0.342660717311994397592781983}, { 0.939459223602189911962669246}, + { 0.422000270799799685941287941}, { 0.906595704514915365332960588}, + {-0.906595704514915365332960588}, { 0.422000270799799685941287941}, + { 0.736816568877369875090132520}, { 0.676092703575315960360419228}, + {-0.676092703575315960360419228}, { 0.736816568877369875090132520}, + { 0.042938256934940823077124540}, { 0.999077727752645382888781997}, + {-0.999077727752645382888781997}, { 0.042938256934940823077124540}, + { 0.999529417501093163079703322}, { 0.030674803176636625934021028}, + {-0.030674803176636625934021028}, { 0.999529417501093163079703322}, + { 0.685083667772700381362052545}, { 0.728464390448225196492035438}, + {-0.728464390448225196492035438}, { 0.685083667772700381362052545}, + { 0.911706032005429851404397325}, { 0.410843171057903942183466675}, + {-0.410843171057903942183466675}, { 0.911706032005429851404397325}, + { 0.354163525420490382357395796}, { 0.935183509938947577642207480}, + {-0.935183509938947577642207480}, { 0.354163525420490382357395796}, + { 0.974339382785575860518721668}, { 0.225083911359792835991642120}, + {-0.225083911359792835991642120}, { 0.974339382785575860518721668}, + { 0.529803624686294668216054671}, { 0.848120344803297251279133563}, + {-0.848120344803297251279133563}, { 0.529803624686294668216054671}, + { 0.814036329705948361654516690}, { 0.580813958095764545075595272}, + {-0.580813958095764545075595272}, { 0.814036329705948361654516690}, + { 0.164913120489969921418189113}, { 0.986308097244598647863297524}, + {-0.986308097244598647863297524}, { 0.164913120489969921418189113}, + { 0.991709753669099522860049931}, { 0.128498110793793172624415589}, + {-0.128498110793793172624415589}, { 0.991709753669099522860049931}, + { 0.610382806276309452716352152}, { 0.792106577300212351782342879}, + {-0.792106577300212351782342879}, { 0.610382806276309452716352152}, + { 0.867046245515692651480195629}, { 0.498227666972781852410983869}, + {-0.498227666972781852410983869}, { 0.867046245515692651480195629}, + { 0.260794117915275518280186509}, { 0.965394441697689374550843858}, + {-0.965394441697689374550843858}, { 0.260794117915275518280186509}, + { 0.947585591017741134653387321}, { 0.319502030816015677901518272}, + {-0.319502030816015677901518272}, { 0.947585591017741134653387321}, + { 0.444122144570429231642069418}, { 0.895966249756185155914560282}, + {-0.895966249756185155914560282}, { 0.444122144570429231642069418}, + { 0.753186799043612482483430486}, { 0.657806693297078656931182264}, + {-0.657806693297078656931182264}, { 0.753186799043612482483430486}, + { 0.067443919563664057897972422}, { 0.997723066644191609848546728}, + {-0.997723066644191609848546728}, { 0.067443919563664057897972422}, + { 0.996820299291165714972629398}, { 0.079682437971430121147120656}, + {-0.079682437971430121147120656}, { 0.996820299291165714972629398}, + { 0.648514401022112445084560551}, { 0.761202385484261814029709836}, + {-0.761202385484261814029709836}, { 0.648514401022112445084560551}, + { 0.890448723244757889952150560}, { 0.455083587126343823535869268}, + {-0.455083587126343823535869268}, { 0.890448723244757889952150560}, + { 0.307849640041534893682063646}, { 0.951435020969008369549175569}, + {-0.951435020969008369549175569}, { 0.307849640041534893682063646}, + { 0.962121404269041595429604316}, { 0.272621355449948984493347477}, + {-0.272621355449948984493347477}, { 0.962121404269041595429604316}, + { 0.487550160148435954641485027}, { 0.873094978418290098636085973}, + {-0.873094978418290098636085973}, { 0.487550160148435954641485027}, + { 0.784556597155575233023892575}, { 0.620057211763289178646268191}, + {-0.620057211763289178646268191}, { 0.784556597155575233023892575}, + { 0.116318630911904767252544319}, { 0.993211949234794533104601012}, + {-0.993211949234794533104601012}, { 0.116318630911904767252544319}, + { 0.984210092386929073193874387}, { 0.177004220412148756196839844}, + {-0.177004220412148756196839844}, { 0.984210092386929073193874387}, + { 0.570780745886967280232652864}, { 0.821102514991104679060430820}, + {-0.821102514991104679060430820}, { 0.570780745886967280232652864}, + { 0.841554977436898409603499520}, { 0.540171472729892881297845480}, + {-0.540171472729892881297845480}, { 0.841554977436898409603499520}, + { 0.213110319916091373967757518}, { 0.977028142657754351485866211}, + {-0.977028142657754351485866211}, { 0.213110319916091373967757518}, + { 0.930766961078983731944872340}, { 0.365612997804773870011745909}, + {-0.365612997804773870011745909}, { 0.930766961078983731944872340}, + { 0.399624199845646828544117031}, { 0.916679059921042663116457013}, + {-0.916679059921042663116457013}, { 0.399624199845646828544117031}, + { 0.720002507961381629076682999}, { 0.693971460889654009003734389}, + {-0.693971460889654009003734389}, { 0.720002507961381629076682999}, + { 0.018406729905804820927366313}, { 0.999830581795823422015722275}, + {-0.999830581795823422015722275}, { 0.018406729905804820927366313}, + { 0.999830581795823422015722275}, { 0.018406729905804820927366313}, + {-0.018406729905804820927366313}, { 0.999830581795823422015722275}, + { 0.693971460889654009003734389}, { 0.720002507961381629076682999}, + {-0.720002507961381629076682999}, { 0.693971460889654009003734389}, + { 0.916679059921042663116457013}, { 0.399624199845646828544117031}, + {-0.399624199845646828544117031}, { 0.916679059921042663116457013}, + { 0.365612997804773870011745909}, { 0.930766961078983731944872340}, + {-0.930766961078983731944872340}, { 0.365612997804773870011745909}, + { 0.977028142657754351485866211}, { 0.213110319916091373967757518}, + {-0.213110319916091373967757518}, { 0.977028142657754351485866211}, + { 0.540171472729892881297845480}, { 0.841554977436898409603499520}, + {-0.841554977436898409603499520}, { 0.540171472729892881297845480}, + { 0.821102514991104679060430820}, { 0.570780745886967280232652864}, + {-0.570780745886967280232652864}, { 0.821102514991104679060430820}, + { 0.177004220412148756196839844}, { 0.984210092386929073193874387}, + {-0.984210092386929073193874387}, { 0.177004220412148756196839844}, + { 0.993211949234794533104601012}, { 0.116318630911904767252544319}, + {-0.116318630911904767252544319}, { 0.993211949234794533104601012}, + { 0.620057211763289178646268191}, { 0.784556597155575233023892575}, + {-0.784556597155575233023892575}, { 0.620057211763289178646268191}, + { 0.873094978418290098636085973}, { 0.487550160148435954641485027}, + {-0.487550160148435954641485027}, { 0.873094978418290098636085973}, + { 0.272621355449948984493347477}, { 0.962121404269041595429604316}, + {-0.962121404269041595429604316}, { 0.272621355449948984493347477}, + { 0.951435020969008369549175569}, { 0.307849640041534893682063646}, + {-0.307849640041534893682063646}, { 0.951435020969008369549175569}, + { 0.455083587126343823535869268}, { 0.890448723244757889952150560}, + {-0.890448723244757889952150560}, { 0.455083587126343823535869268}, + { 0.761202385484261814029709836}, { 0.648514401022112445084560551}, + {-0.648514401022112445084560551}, { 0.761202385484261814029709836}, + { 0.079682437971430121147120656}, { 0.996820299291165714972629398}, + {-0.996820299291165714972629398}, { 0.079682437971430121147120656}, + { 0.997723066644191609848546728}, { 0.067443919563664057897972422}, + {-0.067443919563664057897972422}, { 0.997723066644191609848546728}, + { 0.657806693297078656931182264}, { 0.753186799043612482483430486}, + {-0.753186799043612482483430486}, { 0.657806693297078656931182264}, + { 0.895966249756185155914560282}, { 0.444122144570429231642069418}, + {-0.444122144570429231642069418}, { 0.895966249756185155914560282}, + { 0.319502030816015677901518272}, { 0.947585591017741134653387321}, + {-0.947585591017741134653387321}, { 0.319502030816015677901518272}, + { 0.965394441697689374550843858}, { 0.260794117915275518280186509}, + {-0.260794117915275518280186509}, { 0.965394441697689374550843858}, + { 0.498227666972781852410983869}, { 0.867046245515692651480195629}, + {-0.867046245515692651480195629}, { 0.498227666972781852410983869}, + { 0.792106577300212351782342879}, { 0.610382806276309452716352152}, + {-0.610382806276309452716352152}, { 0.792106577300212351782342879}, + { 0.128498110793793172624415589}, { 0.991709753669099522860049931}, + {-0.991709753669099522860049931}, { 0.128498110793793172624415589}, + { 0.986308097244598647863297524}, { 0.164913120489969921418189113}, + {-0.164913120489969921418189113}, { 0.986308097244598647863297524}, + { 0.580813958095764545075595272}, { 0.814036329705948361654516690}, + {-0.814036329705948361654516690}, { 0.580813958095764545075595272}, + { 0.848120344803297251279133563}, { 0.529803624686294668216054671}, + {-0.529803624686294668216054671}, { 0.848120344803297251279133563}, + { 0.225083911359792835991642120}, { 0.974339382785575860518721668}, + {-0.974339382785575860518721668}, { 0.225083911359792835991642120}, + { 0.935183509938947577642207480}, { 0.354163525420490382357395796}, + {-0.354163525420490382357395796}, { 0.935183509938947577642207480}, + { 0.410843171057903942183466675}, { 0.911706032005429851404397325}, + {-0.911706032005429851404397325}, { 0.410843171057903942183466675}, + { 0.728464390448225196492035438}, { 0.685083667772700381362052545}, + {-0.685083667772700381362052545}, { 0.728464390448225196492035438}, + { 0.030674803176636625934021028}, { 0.999529417501093163079703322}, + {-0.999529417501093163079703322}, { 0.030674803176636625934021028}, + { 0.999077727752645382888781997}, { 0.042938256934940823077124540}, + {-0.042938256934940823077124540}, { 0.999077727752645382888781997}, + { 0.676092703575315960360419228}, { 0.736816568877369875090132520}, + {-0.736816568877369875090132520}, { 0.676092703575315960360419228}, + { 0.906595704514915365332960588}, { 0.422000270799799685941287941}, + {-0.422000270799799685941287941}, { 0.906595704514915365332960588}, + { 0.342660717311994397592781983}, { 0.939459223602189911962669246}, + {-0.939459223602189911962669246}, { 0.342660717311994397592781983}, + { 0.971503890986251775537099622}, { 0.237023605994367206867735915}, + {-0.237023605994367206867735915}, { 0.971503890986251775537099622}, + { 0.519355990165589587361829932}, { 0.854557988365400520767862276}, + {-0.854557988365400520767862276}, { 0.519355990165589587361829932}, + { 0.806847553543799272206514313}, { 0.590759701858874228423887908}, + {-0.590759701858874228423887908}, { 0.806847553543799272206514313}, + { 0.152797185258443427720336613}, { 0.988257567730749491404792538}, + {-0.988257567730749491404792538}, { 0.152797185258443427720336613}, + { 0.990058210262297105505906464}, { 0.140658239332849230714788846}, + {-0.140658239332849230714788846}, { 0.990058210262297105505906464}, + { 0.600616479383868926653875896}, { 0.799537269107905033500246232}, + {-0.799537269107905033500246232}, { 0.600616479383868926653875896}, + { 0.860866938637767279344583877}, { 0.508830142543107036931749324}, + {-0.508830142543107036931749324}, { 0.860866938637767279344583877}, + { 0.248927605745720168110682816}, { 0.968522094274417316221088329}, + {-0.968522094274417316221088329}, { 0.248927605745720168110682816}, + { 0.943593458161960361495301445}, { 0.331106305759876401737190737}, + {-0.331106305759876401737190737}, { 0.943593458161960361495301445}, + { 0.433093818853151968484222638}, { 0.901348847046022014570746093}, + {-0.901348847046022014570746093}, { 0.433093818853151968484222638}, + { 0.745057785441465962407907310}, { 0.666999922303637506650154222}, + {-0.666999922303637506650154222}, { 0.745057785441465962407907310}, + { 0.055195244349689939809447526}, { 0.998475580573294752208559038}, + {-0.998475580573294752208559038}, { 0.055195244349689939809447526}, + { 0.995767414467659793982495643}, { 0.091908956497132728624990979}, + {-0.091908956497132728624990979}, { 0.995767414467659793982495643}, + { 0.639124444863775743801488193}, { 0.769103337645579639346626069}, + {-0.769103337645579639346626069}, { 0.639124444863775743801488193}, + { 0.884797098430937780104007041}, { 0.465976495767966177902756065}, + {-0.465976495767966177902756065}, { 0.884797098430937780104007041}, + { 0.296150888243623824121786128}, { 0.955141168305770721498157712}, + {-0.955141168305770721498157712}, { 0.296150888243623824121786128}, + { 0.958703474895871555374645792}, { 0.284407537211271843618310615}, + {-0.284407537211271843618310615}, { 0.958703474895871555374645792}, + { 0.476799230063322133342158117}, { 0.879012226428633477831323711}, + {-0.879012226428633477831323711}, { 0.476799230063322133342158117}, + { 0.776888465673232450040827983}, { 0.629638238914927025372981341}, + {-0.629638238914927025372981341}, { 0.776888465673232450040827983}, + { 0.104121633872054579120943880}, { 0.994564570734255452119106243}, + {-0.994564570734255452119106243}, { 0.104121633872054579120943880}, + { 0.981963869109555264072848154}, { 0.189068664149806212754997837}, + {-0.189068664149806212754997837}, { 0.981963869109555264072848154}, + { 0.560661576197336023839710223}, { 0.828045045257755752067527592}, + {-0.828045045257755752067527592}, { 0.560661576197336023839710223}, + { 0.834862874986380056304401383}, { 0.550457972936604802977289893}, + {-0.550457972936604802977289893}, { 0.834862874986380056304401383}, + { 0.201104634842091911558443546}, { 0.979569765685440534439326110}, + {-0.979569765685440534439326110}, { 0.201104634842091911558443546}, + { 0.926210242138311341974793388}, { 0.377007410216418256726567823}, + {-0.377007410216418256726567823}, { 0.926210242138311341974793388}, + { 0.388345046698826291624993541}, { 0.921514039342041943465396332}, + {-0.921514039342041943465396332}, { 0.388345046698826291624993541}, + { 0.711432195745216441522130290}, { 0.702754744457225302452914421}, + {-0.702754744457225302452914421}, { 0.711432195745216441522130290}, + { 0.006135884649154475359640235}, { 0.999981175282601142656990438}, + {-0.999981175282601142656990438}, { 0.006135884649154475359640235}, + { 0.999995293809576171511580126}, { 0.003067956762965976270145365}, + {-0.003067956762965976270145365}, { 0.999995293809576171511580126}, + { 0.704934080375904908852523758}, { 0.709272826438865651316533772}, + {-0.709272826438865651316533772}, { 0.704934080375904908852523758}, + { 0.922701128333878570437264227}, { 0.385516053843918864075607949}, + {-0.385516053843918864075607949}, { 0.922701128333878570437264227}, + { 0.379847208924051170576281147}, { 0.925049240782677590302371869}, + {-0.925049240782677590302371869}, { 0.379847208924051170576281147}, + { 0.980182135968117392690210009}, { 0.198098410717953586179324918}, + {-0.198098410717953586179324918}, { 0.980182135968117392690210009}, + { 0.553016705580027531764226988}, { 0.833170164701913186439915922}, + {-0.833170164701913186439915922}, { 0.553016705580027531764226988}, + { 0.829761233794523042469023765}, { 0.558118531220556115693702964}, + {-0.558118531220556115693702964}, { 0.829761233794523042469023765}, + { 0.192080397049892441679288205}, { 0.981379193313754574318224190}, + {-0.981379193313754574318224190}, { 0.192080397049892441679288205}, + { 0.994879330794805620591166107}, { 0.101069862754827824987887585}, + {-0.101069862754827824987887585}, { 0.994879330794805620591166107}, + { 0.632018735939809021909403706}, { 0.774953106594873878359129282}, + {-0.774953106594873878359129282}, { 0.632018735939809021909403706}, + { 0.880470889052160770806542929}, { 0.474100214650550014398580015}, + {-0.474100214650550014398580015}, { 0.880470889052160770806542929}, + { 0.287347459544729526477331841}, { 0.957826413027532890321037029}, + {-0.957826413027532890321037029}, { 0.287347459544729526477331841}, + { 0.956045251349996443270479823}, { 0.293219162694258650606608599}, + {-0.293219162694258650606608599}, { 0.956045251349996443270479823}, + { 0.468688822035827933697617870}, { 0.883363338665731594736308015}, + {-0.883363338665731594736308015}, { 0.468688822035827933697617870}, + { 0.771060524261813773200605759}, { 0.636761861236284230413943435}, + {-0.636761861236284230413943435}, { 0.771060524261813773200605759}, + { 0.094963495329638998938034312}, { 0.995480755491926941769171600}, + {-0.995480755491926941769171600}, { 0.094963495329638998938034312}, + { 0.998640218180265222418199049}, { 0.052131704680283321236358216}, + {-0.052131704680283321236358216}, { 0.998640218180265222418199049}, + { 0.669282588346636065720696366}, { 0.743007952135121693517362293}, + {-0.743007952135121693517362293}, { 0.669282588346636065720696366}, + { 0.902673318237258806751502391}, { 0.430326481340082633908199031}, + {-0.430326481340082633908199031}, { 0.902673318237258806751502391}, + { 0.333999651442009404650865481}, { 0.942573197601446879280758735}, + {-0.942573197601446879280758735}, { 0.333999651442009404650865481}, + { 0.969281235356548486048290738}, { 0.245955050335794611599924709}, + {-0.245955050335794611599924709}, { 0.969281235356548486048290738}, + { 0.511468850437970399504391001}, { 0.859301818357008404783582139}, + {-0.859301818357008404783582139}, { 0.511468850437970399504391001}, + { 0.801376171723140219430247777}, { 0.598160706996342311724958652}, + {-0.598160706996342311724958652}, { 0.801376171723140219430247777}, + { 0.143695033150294454819773349}, { 0.989622017463200834623694454}, + {-0.989622017463200834623694454}, { 0.143695033150294454819773349}, + { 0.988721691960323767604516485}, { 0.149764534677321517229695737}, + {-0.149764534677321517229695737}, { 0.988721691960323767604516485}, + { 0.593232295039799808047809426}, { 0.805031331142963597922659282}, + {-0.805031331142963597922659282}, { 0.593232295039799808047809426}, + { 0.856147328375194481019630732}, { 0.516731799017649881508753876}, + {-0.516731799017649881508753876}, { 0.856147328375194481019630732}, + { 0.240003022448741486568922365}, { 0.970772140728950302138169611}, + {-0.970772140728950302138169611}, { 0.240003022448741486568922365}, + { 0.940506070593268323787291309}, { 0.339776884406826857828825803}, + {-0.339776884406826857828825803}, { 0.940506070593268323787291309}, + { 0.424779681209108833357226189}, { 0.905296759318118774354048329}, + {-0.905296759318118774354048329}, { 0.424779681209108833357226189}, + { 0.738887324460615147933116508}, { 0.673829000378756060917568372}, + {-0.673829000378756060917568372}, { 0.738887324460615147933116508}, + { 0.046003182130914628814301788}, { 0.998941293186856850633930266}, + {-0.998941293186856850633930266}, { 0.046003182130914628814301788}, + { 0.999618822495178597116830637}, { 0.027608145778965741612354872}, + {-0.027608145778965741612354872}, { 0.999618822495178597116830637}, + { 0.687315340891759108199186948}, { 0.726359155084345976817494315}, + {-0.726359155084345976817494315}, { 0.687315340891759108199186948}, + { 0.912962190428398164628018233}, { 0.408044162864978680820747499}, + {-0.408044162864978680820747499}, { 0.912962190428398164628018233}, + { 0.357030961233430032614954036}, { 0.934092550404258914729877883}, + {-0.934092550404258914729877883}, { 0.357030961233430032614954036}, + { 0.975025345066994146844913468}, { 0.222093620973203534094094721}, + {-0.222093620973203534094094721}, { 0.975025345066994146844913468}, + { 0.532403127877197971442805218}, { 0.846490938774052078300544488}, + {-0.846490938774052078300544488}, { 0.532403127877197971442805218}, + { 0.815814410806733789010772660}, { 0.578313796411655563342245019}, + {-0.578313796411655563342245019}, { 0.815814410806733789010772660}, + { 0.167938294974731178054745536}, { 0.985797509167567424700995000}, + {-0.985797509167567424700995000}, { 0.167938294974731178054745536}, + { 0.992099313142191757112085445}, { 0.125454983411546238542336453}, + {-0.125454983411546238542336453}, { 0.992099313142191757112085445}, + { 0.612810082429409703935211936}, { 0.790230221437310055030217152}, + {-0.790230221437310055030217152}, { 0.612810082429409703935211936}, + { 0.868570705971340895340449876}, { 0.495565261825772531150266670}, + {-0.495565261825772531150266670}, { 0.868570705971340895340449876}, + { 0.263754678974831383611349322}, { 0.964589793289812723836432159}, + {-0.964589793289812723836432159}, { 0.263754678974831383611349322}, + { 0.948561349915730288158494826}, { 0.316593375556165867243047035}, + {-0.316593375556165867243047035}, { 0.948561349915730288158494826}, + { 0.446868840162374195353044389}, { 0.894599485631382678433072126}, + {-0.894599485631382678433072126}, { 0.446868840162374195353044389}, + { 0.755201376896536527598710756}, { 0.655492852999615385312679701}, + {-0.655492852999615385312679701}, { 0.755201376896536527598710756}, + { 0.070504573389613863027351471}, { 0.997511456140303459699448390}, + {-0.997511456140303459699448390}, { 0.070504573389613863027351471}, + { 0.997060070339482978987989949}, { 0.076623861392031492278332463}, + {-0.076623861392031492278332463}, { 0.997060070339482978987989949}, + { 0.650846684996380915068975573}, { 0.759209188978388033485525443}, + {-0.759209188978388033485525443}, { 0.650846684996380915068975573}, + { 0.891840709392342727796478697}, { 0.452349587233770874133026703}, + {-0.452349587233770874133026703}, { 0.891840709392342727796478697}, + { 0.310767152749611495835997250}, { 0.950486073949481721759926101}, + {-0.950486073949481721759926101}, { 0.310767152749611495835997250}, + { 0.962953266873683886347921481}, { 0.269668325572915106525464462}, + {-0.269668325572915106525464462}, { 0.962953266873683886347921481}, + { 0.490226483288291154229598449}, { 0.871595086655951034842481435}, + {-0.871595086655951034842481435}, { 0.490226483288291154229598449}, + { 0.786455213599085757522319464}, { 0.617647307937803932403979402}, + {-0.617647307937803932403979402}, { 0.786455213599085757522319464}, + { 0.119365214810991364593637790}, { 0.992850414459865090793563344}, + {-0.992850414459865090793563344}, { 0.119365214810991364593637790}, + { 0.984748501801904218556553176}, { 0.173983873387463827950700807}, + {-0.173983873387463827950700807}, { 0.984748501801904218556553176}, + { 0.573297166698042212820171239}, { 0.819347520076796960824689637}, + {-0.819347520076796960824689637}, { 0.573297166698042212820171239}, + { 0.843208239641845437161743865}, { 0.537587076295645482502214932}, + {-0.537587076295645482502214932}, { 0.843208239641845437161743865}, + { 0.216106797076219509948385131}, { 0.976369731330021149312732194}, + {-0.976369731330021149312732194}, { 0.216106797076219509948385131}, + { 0.931884265581668106718557199}, { 0.362755724367397216204854462}, + {-0.362755724367397216204854462}, { 0.931884265581668106718557199}, + { 0.402434650859418441082533934}, { 0.915448716088267819566431292}, + {-0.915448716088267819566431292}, { 0.402434650859418441082533934}, + { 0.722128193929215321243607198}, { 0.691759258364157774906734132}, + {-0.691759258364157774906734132}, { 0.722128193929215321243607198}, + { 0.021474080275469507418374898}, { 0.999769405351215321657617036}, + {-0.999769405351215321657617036}, { 0.021474080275469507418374898}, + { 0.999882347454212525633049627}, { 0.015339206284988101044151868}, + {-0.015339206284988101044151868}, { 0.999882347454212525633049627}, + { 0.696177131491462944788582591}, { 0.717870045055731736211325329}, + {-0.717870045055731736211325329}, { 0.696177131491462944788582591}, + { 0.917900775621390457642276297}, { 0.396809987416710328595290911}, + {-0.396809987416710328595290911}, { 0.917900775621390457642276297}, + { 0.368466829953372331712746222}, { 0.929640895843181265457918066}, + {-0.929640895843181265457918066}, { 0.368466829953372331712746222}, + { 0.977677357824509979943404762}, { 0.210111836880469621717489972}, + {-0.210111836880469621717489972}, { 0.977677357824509979943404762}, + { 0.542750784864515906586768661}, { 0.839893794195999504583383987}, + {-0.839893794195999504583383987}, { 0.542750784864515906586768661}, + { 0.822849781375826332046780034}, { 0.568258952670131549790548489}, + {-0.568258952670131549790548489}, { 0.822849781375826332046780034}, + { 0.180022901405699522679906590}, { 0.983662419211730274396237776}, + {-0.983662419211730274396237776}, { 0.180022901405699522679906590}, + { 0.993564135520595333782021697}, { 0.113270952177564349018228733}, + {-0.113270952177564349018228733}, { 0.993564135520595333782021697}, + { 0.622461279374149972519166721}, { 0.782650596166575738458949301}, + {-0.782650596166575738458949301}, { 0.622461279374149972519166721}, + { 0.874586652278176112634431897}, { 0.484869248000791101822951699}, + {-0.484869248000791101822951699}, { 0.874586652278176112634431897}, + { 0.275571819310958163076425168}, { 0.961280485811320641748659653}, + {-0.961280485811320641748659653}, { 0.275571819310958163076425168}, + { 0.952375012719765858529893608}, { 0.304929229735402406490728633}, + {-0.304929229735402406490728633}, { 0.952375012719765858529893608}, + { 0.457813303598877221904961155}, { 0.889048355854664562540777729}, + {-0.889048355854664562540777729}, { 0.457813303598877221904961155}, + { 0.763188417263381271704838297}, { 0.646176012983316364832802220}, + {-0.646176012983316364832802220}, { 0.763188417263381271704838297}, + { 0.082740264549375693111987083}, { 0.996571145790554847093566910}, + {-0.996571145790554847093566910}, { 0.082740264549375693111987083}, + { 0.997925286198596012623025462}, { 0.064382630929857460819324537}, + {-0.064382630929857460819324537}, { 0.997925286198596012623025462}, + { 0.660114342067420478559490747}, { 0.751165131909686411205819422}, + {-0.751165131909686411205819422}, { 0.660114342067420478559490747}, + { 0.897324580705418281231391836}, { 0.441371268731716692879988968}, + {-0.441371268731716692879988968}, { 0.897324580705418281231391836}, + { 0.322407678801069848384807478}, { 0.946600913083283570044599823}, + {-0.946600913083283570044599823}, { 0.322407678801069848384807478}, + { 0.966190003445412555433832961}, { 0.257831102162159005614471295}, + {-0.257831102162159005614471295}, { 0.966190003445412555433832961}, + { 0.500885382611240786241285004}, { 0.865513624090569082825488358}, + {-0.865513624090569082825488358}, { 0.500885382611240786241285004}, + { 0.793975477554337164895083757}, { 0.607949784967773667243642671}, + {-0.607949784967773667243642671}, { 0.793975477554337164895083757}, + { 0.131540028702883111103387493}, { 0.991310859846115418957349799}, + {-0.991310859846115418957349799}, { 0.131540028702883111103387493}, + { 0.986809401814185476970235952}, { 0.161886393780111837641387995}, + {-0.161886393780111837641387995}, { 0.986809401814185476970235952}, + { 0.583308652937698294392830961}, { 0.812250586585203913049744181}, + {-0.812250586585203913049744181}, { 0.583308652937698294392830961}, + { 0.849741768000852489471268395}, { 0.527199134781901348464274575}, + {-0.527199134781901348464274575}, { 0.849741768000852489471268395}, + { 0.228072083170885739254457379}, { 0.973644249650811925318383912}, + {-0.973644249650811925318383912}, { 0.228072083170885739254457379}, + { 0.936265667170278246576310996}, { 0.351292756085567125601307623}, + {-0.351292756085567125601307623}, { 0.936265667170278246576310996}, + { 0.413638312238434547471944324}, { 0.910441292258067196934095369}, + {-0.910441292258067196934095369}, { 0.413638312238434547471944324}, + { 0.730562769227827561177758850}, { 0.682845546385248068164596123}, + {-0.682845546385248068164596123}, { 0.730562769227827561177758850}, + { 0.033741171851377584833716112}, { 0.999430604555461772019008327}, + {-0.999430604555461772019008327}, { 0.033741171851377584833716112}, + { 0.999204758618363895492950001}, { 0.039872927587739811128578738}, + {-0.039872927587739811128578738}, { 0.999204758618363895492950001}, + { 0.678350043129861486873655042}, { 0.734738878095963464563223604}, + {-0.734738878095963464563223604}, { 0.678350043129861486873655042}, + { 0.907886116487666212038681480}, { 0.419216888363223956433010020}, + {-0.419216888363223956433010020}, { 0.907886116487666212038681480}, + { 0.345541324963989065539191723}, { 0.938403534063108112192420774}, + {-0.938403534063108112192420774}, { 0.345541324963989065539191723}, + { 0.972226497078936305708321144}, { 0.234041958583543423191242045}, + {-0.234041958583543423191242045}, { 0.972226497078936305708321144}, + { 0.521975292937154342694258318}, { 0.852960604930363657746588082}, + {-0.852960604930363657746588082}, { 0.521975292937154342694258318}, + { 0.808656181588174991946968128}, { 0.588281548222645304786439813}, + {-0.588281548222645304786439813}, { 0.808656181588174991946968128}, + { 0.155828397654265235743101486}, { 0.987784141644572154230969032}, + {-0.987784141644572154230969032}, { 0.155828397654265235743101486}, + { 0.990485084256457037998682243}, { 0.137620121586486044948441663}, + {-0.137620121586486044948441663}, { 0.990485084256457037998682243}, + { 0.603066598540348201693430617}, { 0.797690840943391108362662755}, + {-0.797690840943391108362662755}, { 0.603066598540348201693430617}, + { 0.862423956111040538690933878}, { 0.506186645345155291048942344}, + {-0.506186645345155291048942344}, { 0.862423956111040538690933878}, + { 0.251897818154216950498106628}, { 0.967753837093475465243391912}, + {-0.967753837093475465243391912}, { 0.251897818154216950498106628}, + { 0.944604837261480265659265493}, { 0.328209843579092526107916817}, + {-0.328209843579092526107916817}, { 0.944604837261480265659265493}, + { 0.435857079922255491032544080}, { 0.900015892016160228714535267}, + {-0.900015892016160228714535267}, { 0.435857079922255491032544080}, + { 0.747100605980180144323078847}, { 0.664710978203344868130324985}, + {-0.664710978203344868130324985}, { 0.747100605980180144323078847}, + { 0.058258264500435759613979782}, { 0.998301544933892840738782163}, + {-0.998301544933892840738782163}, { 0.058258264500435759613979782}, + { 0.996044700901251989887944810}, { 0.088853552582524596561586535}, + {-0.088853552582524596561586535}, { 0.996044700901251989887944810}, + { 0.641481012808583151988739898}, { 0.767138911935820381181694573}, + {-0.767138911935820381181694573}, { 0.641481012808583151988739898}, + { 0.886222530148880631647990821}, { 0.463259783551860197390719637}, + {-0.463259783551860197390719637}, { 0.886222530148880631647990821}, + { 0.299079826308040476750336973}, { 0.954228095109105629780430732}, + {-0.954228095109105629780430732}, { 0.299079826308040476750336973}, + { 0.959571513081984528335528181}, { 0.281464937925757984095231007}, + {-0.281464937925757984095231007}, { 0.959571513081984528335528181}, + { 0.479493757660153026679839798}, { 0.877545290207261291668470750}, + {-0.877545290207261291668470750}, { 0.479493757660153026679839798}, + { 0.778816512381475953374724325}, { 0.627251815495144113509622565}, + {-0.627251815495144113509622565}, { 0.778816512381475953374724325}, + { 0.107172424956808849175529148}, { 0.994240449453187946358413442}, + {-0.994240449453187946358413442}, { 0.107172424956808849175529148}, + { 0.982539302287441255907040396}, { 0.186055151663446648105438304}, + {-0.186055151663446648105438304}, { 0.982539302287441255907040396}, + { 0.563199344013834115007363772}, { 0.826321062845663480311195452}, + {-0.826321062845663480311195452}, { 0.563199344013834115007363772}, + { 0.836547727223511984524285790}, { 0.547894059173100165608820571}, + {-0.547894059173100165608820571}, { 0.836547727223511984524285790}, + { 0.204108966092816874181696950}, { 0.978948175319062194715480124}, + {-0.978948175319062194715480124}, { 0.204108966092816874181696950}, + { 0.927362525650401087274536959}, { 0.374164062971457997104393020}, + {-0.374164062971457997104393020}, { 0.927362525650401087274536959}, + { 0.391170384302253888687512949}, { 0.920318276709110566440076541}, + {-0.920318276709110566440076541}, { 0.391170384302253888687512949}, + { 0.713584868780793592903125099}, { 0.700568793943248366792866380}, + {-0.700568793943248366792866380}, { 0.713584868780793592903125099}, + { 0.009203754782059819315102378}, { 0.999957644551963866333120920}, + {-0.999957644551963866333120920}, { 0.009203754782059819315102378}, + { 0.999957644551963866333120920}, { 0.009203754782059819315102378}, + {-0.009203754782059819315102378}, { 0.999957644551963866333120920}, + { 0.700568793943248366792866380}, { 0.713584868780793592903125099}, + {-0.713584868780793592903125099}, { 0.700568793943248366792866380}, + { 0.920318276709110566440076541}, { 0.391170384302253888687512949}, + {-0.391170384302253888687512949}, { 0.920318276709110566440076541}, + { 0.374164062971457997104393020}, { 0.927362525650401087274536959}, + {-0.927362525650401087274536959}, { 0.374164062971457997104393020}, + { 0.978948175319062194715480124}, { 0.204108966092816874181696950}, + {-0.204108966092816874181696950}, { 0.978948175319062194715480124}, + { 0.547894059173100165608820571}, { 0.836547727223511984524285790}, + {-0.836547727223511984524285790}, { 0.547894059173100165608820571}, + { 0.826321062845663480311195452}, { 0.563199344013834115007363772}, + {-0.563199344013834115007363772}, { 0.826321062845663480311195452}, + { 0.186055151663446648105438304}, { 0.982539302287441255907040396}, + {-0.982539302287441255907040396}, { 0.186055151663446648105438304}, + { 0.994240449453187946358413442}, { 0.107172424956808849175529148}, + {-0.107172424956808849175529148}, { 0.994240449453187946358413442}, + { 0.627251815495144113509622565}, { 0.778816512381475953374724325}, + {-0.778816512381475953374724325}, { 0.627251815495144113509622565}, + { 0.877545290207261291668470750}, { 0.479493757660153026679839798}, + {-0.479493757660153026679839798}, { 0.877545290207261291668470750}, + { 0.281464937925757984095231007}, { 0.959571513081984528335528181}, + {-0.959571513081984528335528181}, { 0.281464937925757984095231007}, + { 0.954228095109105629780430732}, { 0.299079826308040476750336973}, + {-0.299079826308040476750336973}, { 0.954228095109105629780430732}, + { 0.463259783551860197390719637}, { 0.886222530148880631647990821}, + {-0.886222530148880631647990821}, { 0.463259783551860197390719637}, + { 0.767138911935820381181694573}, { 0.641481012808583151988739898}, + {-0.641481012808583151988739898}, { 0.767138911935820381181694573}, + { 0.088853552582524596561586535}, { 0.996044700901251989887944810}, + {-0.996044700901251989887944810}, { 0.088853552582524596561586535}, + { 0.998301544933892840738782163}, { 0.058258264500435759613979782}, + {-0.058258264500435759613979782}, { 0.998301544933892840738782163}, + { 0.664710978203344868130324985}, { 0.747100605980180144323078847}, + {-0.747100605980180144323078847}, { 0.664710978203344868130324985}, + { 0.900015892016160228714535267}, { 0.435857079922255491032544080}, + {-0.435857079922255491032544080}, { 0.900015892016160228714535267}, + { 0.328209843579092526107916817}, { 0.944604837261480265659265493}, + {-0.944604837261480265659265493}, { 0.328209843579092526107916817}, + { 0.967753837093475465243391912}, { 0.251897818154216950498106628}, + {-0.251897818154216950498106628}, { 0.967753837093475465243391912}, + { 0.506186645345155291048942344}, { 0.862423956111040538690933878}, + {-0.862423956111040538690933878}, { 0.506186645345155291048942344}, + { 0.797690840943391108362662755}, { 0.603066598540348201693430617}, + {-0.603066598540348201693430617}, { 0.797690840943391108362662755}, + { 0.137620121586486044948441663}, { 0.990485084256457037998682243}, + {-0.990485084256457037998682243}, { 0.137620121586486044948441663}, + { 0.987784141644572154230969032}, { 0.155828397654265235743101486}, + {-0.155828397654265235743101486}, { 0.987784141644572154230969032}, + { 0.588281548222645304786439813}, { 0.808656181588174991946968128}, + {-0.808656181588174991946968128}, { 0.588281548222645304786439813}, + { 0.852960604930363657746588082}, { 0.521975292937154342694258318}, + {-0.521975292937154342694258318}, { 0.852960604930363657746588082}, + { 0.234041958583543423191242045}, { 0.972226497078936305708321144}, + {-0.972226497078936305708321144}, { 0.234041958583543423191242045}, + { 0.938403534063108112192420774}, { 0.345541324963989065539191723}, + {-0.345541324963989065539191723}, { 0.938403534063108112192420774}, + { 0.419216888363223956433010020}, { 0.907886116487666212038681480}, + {-0.907886116487666212038681480}, { 0.419216888363223956433010020}, + { 0.734738878095963464563223604}, { 0.678350043129861486873655042}, + {-0.678350043129861486873655042}, { 0.734738878095963464563223604}, + { 0.039872927587739811128578738}, { 0.999204758618363895492950001}, + {-0.999204758618363895492950001}, { 0.039872927587739811128578738}, + { 0.999430604555461772019008327}, { 0.033741171851377584833716112}, + {-0.033741171851377584833716112}, { 0.999430604555461772019008327}, + { 0.682845546385248068164596123}, { 0.730562769227827561177758850}, + {-0.730562769227827561177758850}, { 0.682845546385248068164596123}, + { 0.910441292258067196934095369}, { 0.413638312238434547471944324}, + {-0.413638312238434547471944324}, { 0.910441292258067196934095369}, + { 0.351292756085567125601307623}, { 0.936265667170278246576310996}, + {-0.936265667170278246576310996}, { 0.351292756085567125601307623}, + { 0.973644249650811925318383912}, { 0.228072083170885739254457379}, + {-0.228072083170885739254457379}, { 0.973644249650811925318383912}, + { 0.527199134781901348464274575}, { 0.849741768000852489471268395}, + {-0.849741768000852489471268395}, { 0.527199134781901348464274575}, + { 0.812250586585203913049744181}, { 0.583308652937698294392830961}, + {-0.583308652937698294392830961}, { 0.812250586585203913049744181}, + { 0.161886393780111837641387995}, { 0.986809401814185476970235952}, + {-0.986809401814185476970235952}, { 0.161886393780111837641387995}, + { 0.991310859846115418957349799}, { 0.131540028702883111103387493}, + {-0.131540028702883111103387493}, { 0.991310859846115418957349799}, + { 0.607949784967773667243642671}, { 0.793975477554337164895083757}, + {-0.793975477554337164895083757}, { 0.607949784967773667243642671}, + { 0.865513624090569082825488358}, { 0.500885382611240786241285004}, + {-0.500885382611240786241285004}, { 0.865513624090569082825488358}, + { 0.257831102162159005614471295}, { 0.966190003445412555433832961}, + {-0.966190003445412555433832961}, { 0.257831102162159005614471295}, + { 0.946600913083283570044599823}, { 0.322407678801069848384807478}, + {-0.322407678801069848384807478}, { 0.946600913083283570044599823}, + { 0.441371268731716692879988968}, { 0.897324580705418281231391836}, + {-0.897324580705418281231391836}, { 0.441371268731716692879988968}, + { 0.751165131909686411205819422}, { 0.660114342067420478559490747}, + {-0.660114342067420478559490747}, { 0.751165131909686411205819422}, + { 0.064382630929857460819324537}, { 0.997925286198596012623025462}, + {-0.997925286198596012623025462}, { 0.064382630929857460819324537}, + { 0.996571145790554847093566910}, { 0.082740264549375693111987083}, + {-0.082740264549375693111987083}, { 0.996571145790554847093566910}, + { 0.646176012983316364832802220}, { 0.763188417263381271704838297}, + {-0.763188417263381271704838297}, { 0.646176012983316364832802220}, + { 0.889048355854664562540777729}, { 0.457813303598877221904961155}, + {-0.457813303598877221904961155}, { 0.889048355854664562540777729}, + { 0.304929229735402406490728633}, { 0.952375012719765858529893608}, + {-0.952375012719765858529893608}, { 0.304929229735402406490728633}, + { 0.961280485811320641748659653}, { 0.275571819310958163076425168}, + {-0.275571819310958163076425168}, { 0.961280485811320641748659653}, + { 0.484869248000791101822951699}, { 0.874586652278176112634431897}, + {-0.874586652278176112634431897}, { 0.484869248000791101822951699}, + { 0.782650596166575738458949301}, { 0.622461279374149972519166721}, + {-0.622461279374149972519166721}, { 0.782650596166575738458949301}, + { 0.113270952177564349018228733}, { 0.993564135520595333782021697}, + {-0.993564135520595333782021697}, { 0.113270952177564349018228733}, + { 0.983662419211730274396237776}, { 0.180022901405699522679906590}, + {-0.180022901405699522679906590}, { 0.983662419211730274396237776}, + { 0.568258952670131549790548489}, { 0.822849781375826332046780034}, + {-0.822849781375826332046780034}, { 0.568258952670131549790548489}, + { 0.839893794195999504583383987}, { 0.542750784864515906586768661}, + {-0.542750784864515906586768661}, { 0.839893794195999504583383987}, + { 0.210111836880469621717489972}, { 0.977677357824509979943404762}, + {-0.977677357824509979943404762}, { 0.210111836880469621717489972}, + { 0.929640895843181265457918066}, { 0.368466829953372331712746222}, + {-0.368466829953372331712746222}, { 0.929640895843181265457918066}, + { 0.396809987416710328595290911}, { 0.917900775621390457642276297}, + {-0.917900775621390457642276297}, { 0.396809987416710328595290911}, + { 0.717870045055731736211325329}, { 0.696177131491462944788582591}, + {-0.696177131491462944788582591}, { 0.717870045055731736211325329}, + { 0.015339206284988101044151868}, { 0.999882347454212525633049627}, + {-0.999882347454212525633049627}, { 0.015339206284988101044151868}, + { 0.999769405351215321657617036}, { 0.021474080275469507418374898}, + {-0.021474080275469507418374898}, { 0.999769405351215321657617036}, + { 0.691759258364157774906734132}, { 0.722128193929215321243607198}, + {-0.722128193929215321243607198}, { 0.691759258364157774906734132}, + { 0.915448716088267819566431292}, { 0.402434650859418441082533934}, + {-0.402434650859418441082533934}, { 0.915448716088267819566431292}, + { 0.362755724367397216204854462}, { 0.931884265581668106718557199}, + {-0.931884265581668106718557199}, { 0.362755724367397216204854462}, + { 0.976369731330021149312732194}, { 0.216106797076219509948385131}, + {-0.216106797076219509948385131}, { 0.976369731330021149312732194}, + { 0.537587076295645482502214932}, { 0.843208239641845437161743865}, + {-0.843208239641845437161743865}, { 0.537587076295645482502214932}, + { 0.819347520076796960824689637}, { 0.573297166698042212820171239}, + {-0.573297166698042212820171239}, { 0.819347520076796960824689637}, + { 0.173983873387463827950700807}, { 0.984748501801904218556553176}, + {-0.984748501801904218556553176}, { 0.173983873387463827950700807}, + { 0.992850414459865090793563344}, { 0.119365214810991364593637790}, + {-0.119365214810991364593637790}, { 0.992850414459865090793563344}, + { 0.617647307937803932403979402}, { 0.786455213599085757522319464}, + {-0.786455213599085757522319464}, { 0.617647307937803932403979402}, + { 0.871595086655951034842481435}, { 0.490226483288291154229598449}, + {-0.490226483288291154229598449}, { 0.871595086655951034842481435}, + { 0.269668325572915106525464462}, { 0.962953266873683886347921481}, + {-0.962953266873683886347921481}, { 0.269668325572915106525464462}, + { 0.950486073949481721759926101}, { 0.310767152749611495835997250}, + {-0.310767152749611495835997250}, { 0.950486073949481721759926101}, + { 0.452349587233770874133026703}, { 0.891840709392342727796478697}, + {-0.891840709392342727796478697}, { 0.452349587233770874133026703}, + { 0.759209188978388033485525443}, { 0.650846684996380915068975573}, + {-0.650846684996380915068975573}, { 0.759209188978388033485525443}, + { 0.076623861392031492278332463}, { 0.997060070339482978987989949}, + {-0.997060070339482978987989949}, { 0.076623861392031492278332463}, + { 0.997511456140303459699448390}, { 0.070504573389613863027351471}, + {-0.070504573389613863027351471}, { 0.997511456140303459699448390}, + { 0.655492852999615385312679701}, { 0.755201376896536527598710756}, + {-0.755201376896536527598710756}, { 0.655492852999615385312679701}, + { 0.894599485631382678433072126}, { 0.446868840162374195353044389}, + {-0.446868840162374195353044389}, { 0.894599485631382678433072126}, + { 0.316593375556165867243047035}, { 0.948561349915730288158494826}, + {-0.948561349915730288158494826}, { 0.316593375556165867243047035}, + { 0.964589793289812723836432159}, { 0.263754678974831383611349322}, + {-0.263754678974831383611349322}, { 0.964589793289812723836432159}, + { 0.495565261825772531150266670}, { 0.868570705971340895340449876}, + {-0.868570705971340895340449876}, { 0.495565261825772531150266670}, + { 0.790230221437310055030217152}, { 0.612810082429409703935211936}, + {-0.612810082429409703935211936}, { 0.790230221437310055030217152}, + { 0.125454983411546238542336453}, { 0.992099313142191757112085445}, + {-0.992099313142191757112085445}, { 0.125454983411546238542336453}, + { 0.985797509167567424700995000}, { 0.167938294974731178054745536}, + {-0.167938294974731178054745536}, { 0.985797509167567424700995000}, + { 0.578313796411655563342245019}, { 0.815814410806733789010772660}, + {-0.815814410806733789010772660}, { 0.578313796411655563342245019}, + { 0.846490938774052078300544488}, { 0.532403127877197971442805218}, + {-0.532403127877197971442805218}, { 0.846490938774052078300544488}, + { 0.222093620973203534094094721}, { 0.975025345066994146844913468}, + {-0.975025345066994146844913468}, { 0.222093620973203534094094721}, + { 0.934092550404258914729877883}, { 0.357030961233430032614954036}, + {-0.357030961233430032614954036}, { 0.934092550404258914729877883}, + { 0.408044162864978680820747499}, { 0.912962190428398164628018233}, + {-0.912962190428398164628018233}, { 0.408044162864978680820747499}, + { 0.726359155084345976817494315}, { 0.687315340891759108199186948}, + {-0.687315340891759108199186948}, { 0.726359155084345976817494315}, + { 0.027608145778965741612354872}, { 0.999618822495178597116830637}, + {-0.999618822495178597116830637}, { 0.027608145778965741612354872}, + { 0.998941293186856850633930266}, { 0.046003182130914628814301788}, + {-0.046003182130914628814301788}, { 0.998941293186856850633930266}, + { 0.673829000378756060917568372}, { 0.738887324460615147933116508}, + {-0.738887324460615147933116508}, { 0.673829000378756060917568372}, + { 0.905296759318118774354048329}, { 0.424779681209108833357226189}, + {-0.424779681209108833357226189}, { 0.905296759318118774354048329}, + { 0.339776884406826857828825803}, { 0.940506070593268323787291309}, + {-0.940506070593268323787291309}, { 0.339776884406826857828825803}, + { 0.970772140728950302138169611}, { 0.240003022448741486568922365}, + {-0.240003022448741486568922365}, { 0.970772140728950302138169611}, + { 0.516731799017649881508753876}, { 0.856147328375194481019630732}, + {-0.856147328375194481019630732}, { 0.516731799017649881508753876}, + { 0.805031331142963597922659282}, { 0.593232295039799808047809426}, + {-0.593232295039799808047809426}, { 0.805031331142963597922659282}, + { 0.149764534677321517229695737}, { 0.988721691960323767604516485}, + {-0.988721691960323767604516485}, { 0.149764534677321517229695737}, + { 0.989622017463200834623694454}, { 0.143695033150294454819773349}, + {-0.143695033150294454819773349}, { 0.989622017463200834623694454}, + { 0.598160706996342311724958652}, { 0.801376171723140219430247777}, + {-0.801376171723140219430247777}, { 0.598160706996342311724958652}, + { 0.859301818357008404783582139}, { 0.511468850437970399504391001}, + {-0.511468850437970399504391001}, { 0.859301818357008404783582139}, + { 0.245955050335794611599924709}, { 0.969281235356548486048290738}, + {-0.969281235356548486048290738}, { 0.245955050335794611599924709}, + { 0.942573197601446879280758735}, { 0.333999651442009404650865481}, + {-0.333999651442009404650865481}, { 0.942573197601446879280758735}, + { 0.430326481340082633908199031}, { 0.902673318237258806751502391}, + {-0.902673318237258806751502391}, { 0.430326481340082633908199031}, + { 0.743007952135121693517362293}, { 0.669282588346636065720696366}, + {-0.669282588346636065720696366}, { 0.743007952135121693517362293}, + { 0.052131704680283321236358216}, { 0.998640218180265222418199049}, + {-0.998640218180265222418199049}, { 0.052131704680283321236358216}, + { 0.995480755491926941769171600}, { 0.094963495329638998938034312}, + {-0.094963495329638998938034312}, { 0.995480755491926941769171600}, + { 0.636761861236284230413943435}, { 0.771060524261813773200605759}, + {-0.771060524261813773200605759}, { 0.636761861236284230413943435}, + { 0.883363338665731594736308015}, { 0.468688822035827933697617870}, + {-0.468688822035827933697617870}, { 0.883363338665731594736308015}, + { 0.293219162694258650606608599}, { 0.956045251349996443270479823}, + {-0.956045251349996443270479823}, { 0.293219162694258650606608599}, + { 0.957826413027532890321037029}, { 0.287347459544729526477331841}, + {-0.287347459544729526477331841}, { 0.957826413027532890321037029}, + { 0.474100214650550014398580015}, { 0.880470889052160770806542929}, + {-0.880470889052160770806542929}, { 0.474100214650550014398580015}, + { 0.774953106594873878359129282}, { 0.632018735939809021909403706}, + {-0.632018735939809021909403706}, { 0.774953106594873878359129282}, + { 0.101069862754827824987887585}, { 0.994879330794805620591166107}, + {-0.994879330794805620591166107}, { 0.101069862754827824987887585}, + { 0.981379193313754574318224190}, { 0.192080397049892441679288205}, + {-0.192080397049892441679288205}, { 0.981379193313754574318224190}, + { 0.558118531220556115693702964}, { 0.829761233794523042469023765}, + {-0.829761233794523042469023765}, { 0.558118531220556115693702964}, + { 0.833170164701913186439915922}, { 0.553016705580027531764226988}, + {-0.553016705580027531764226988}, { 0.833170164701913186439915922}, + { 0.198098410717953586179324918}, { 0.980182135968117392690210009}, + {-0.980182135968117392690210009}, { 0.198098410717953586179324918}, + { 0.925049240782677590302371869}, { 0.379847208924051170576281147}, + {-0.379847208924051170576281147}, { 0.925049240782677590302371869}, + { 0.385516053843918864075607949}, { 0.922701128333878570437264227}, + {-0.922701128333878570437264227}, { 0.385516053843918864075607949}, + { 0.709272826438865651316533772}, { 0.704934080375904908852523758}, + {-0.704934080375904908852523758}, { 0.709272826438865651316533772}, + { 0.003067956762965976270145365}, { 0.999995293809576171511580126}, + {-0.999995293809576171511580126}, { 0.003067956762965976270145365} +}; + +const fpr fpr_p2_tab[] = { + { 2.00000000000 }, + { 1.00000000000 }, + { 0.50000000000 }, + { 0.25000000000 }, + { 0.12500000000 }, + { 0.06250000000 }, + { 0.03125000000 }, + { 0.01562500000 }, + { 0.00781250000 }, + { 0.00390625000 }, + { 0.00195312500 } +}; diff --git a/crypto_sign/falcon/falcon-1024/avx2/fpr.h b/crypto_sign/falcon/falcon-1024/avx2/fpr.h new file mode 100644 index 00000000..618115a3 --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/avx2/fpr.h @@ -0,0 +1,349 @@ +#ifndef PQCLEAN_FALCON1024_AVX2_FPR_H +#define PQCLEAN_FALCON1024_AVX2_FPR_H + +/* + * Floating-point operations. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +/* ====================================================================== */ + +#include +#include + +#define FMADD(a, b, c) _mm256_add_pd(_mm256_mul_pd(a, b), c) +#define FMSUB(a, b, c) _mm256_sub_pd(_mm256_mul_pd(a, b), c) + +/* + * We wrap the native 'double' type into a structure so that the C compiler + * complains if we inadvertently use raw arithmetic operators on the 'fpr' + * type instead of using the inline functions below. This should have no + * extra runtime cost, since all the functions below are 'inline'. + */ +typedef struct { + double v; +} fpr; + +static inline fpr +FPR(double v) { + fpr x; + + x.v = v; + return x; +} + +static inline fpr +fpr_of(int64_t i) { + return FPR((double)i); +} + +static const fpr fpr_q = { 12289.0 }; +static const fpr fpr_inverse_of_q = { 1.0 / 12289.0 }; +static const fpr fpr_inv_2sqrsigma0 = { .150865048875372721532312163019 }; +static const fpr fpr_inv_sigma = { .005819826392951607426919370871 }; +static const fpr fpr_sigma_min_9 = { 1.291500756233514568549480827642 }; +static const fpr fpr_sigma_min_10 = { 1.311734375905083682667395805765 }; +static const fpr fpr_log2 = { 0.69314718055994530941723212146 }; +static const fpr fpr_inv_log2 = { 1.4426950408889634073599246810 }; +static const fpr fpr_bnorm_max = { 16822.4121 }; +static const fpr fpr_zero = { 0.0 }; +static const fpr fpr_one = { 1.0 }; +static const fpr fpr_two = { 2.0 }; +static const fpr fpr_onehalf = { 0.5 }; +static const fpr fpr_invsqrt2 = { 0.707106781186547524400844362105 }; +static const fpr fpr_invsqrt8 = { 0.353553390593273762200422181052 }; +static const fpr fpr_ptwo31 = { 2147483648.0 }; +static const fpr fpr_ptwo31m1 = { 2147483647.0 }; +static const fpr fpr_mtwo31m1 = { -2147483647.0 }; +static const fpr fpr_ptwo63m1 = { 9223372036854775807.0 }; +static const fpr fpr_mtwo63m1 = { -9223372036854775807.0 }; +static const fpr fpr_ptwo63 = { 9223372036854775808.0 }; + +static inline int64_t +fpr_rint(fpr x) { + /* + * We do not want to use llrint() since it might be not + * constant-time. + * + * Suppose that x >= 0. If x >= 2^52, then it is already an + * integer. Otherwise, if x < 2^52, then computing x+2^52 will + * yield a value that will be rounded to the nearest integer + * with exactly the right rules (round-to-nearest-even). + * + * In order to have constant-time processing, we must do the + * computation for both x >= 0 and x < 0 cases, and use a + * cast to an integer to access the sign and select the proper + * value. Such casts also allow us to find out if |x| < 2^52. + */ + int64_t sx, tx, rp, rn, m; + uint32_t ub; + + sx = (int64_t)(x.v - 1.0); + tx = (int64_t)x.v; + rp = (int64_t)(x.v + 4503599627370496.0) - 4503599627370496; + rn = (int64_t)(x.v - 4503599627370496.0) + 4503599627370496; + + /* + * If tx >= 2^52 or tx < -2^52, then result is tx. + * Otherwise, if sx >= 0, then result is rp. + * Otherwise, result is rn. We use the fact that when x is + * close to 0 (|x| <= 0.25) then both rp and rn are correct; + * and if x is not close to 0, then trunc(x-1.0) yields the + * appropriate sign. + */ + + /* + * Clamp rp to zero if tx < 0. + * Clamp rn to zero if tx >= 0. + */ + m = sx >> 63; + rn &= m; + rp &= ~m; + + /* + * Get the 12 upper bits of tx; if they are not all zeros or + * all ones, then tx >= 2^52 or tx < -2^52, and we clamp both + * rp and rn to zero. Otherwise, we clamp tx to zero. + */ + ub = (uint32_t)((uint64_t)tx >> 52); + m = -(int64_t)((((ub + 1) & 0xFFF) - 2) >> 31); + rp &= m; + rn &= m; + tx &= ~m; + + /* + * Only one of tx, rn or rp (at most) can be non-zero at this + * point. + */ + return tx | rn | rp; +} + +static inline int64_t +fpr_floor(fpr x) { + int64_t r; + + /* + * The cast performs a trunc() (rounding toward 0) and thus is + * wrong by 1 for most negative values. The correction below is + * constant-time as long as the compiler turns the + * floating-point conversion result into a 0/1 integer without a + * conditional branch or another non-constant-time construction. + * This should hold on all modern architectures with an FPU (and + * if it is false on a given arch, then chances are that the FPU + * itself is not constant-time, making the point moot). + */ + r = (int64_t)x.v; + return r - (x.v < (double)r); +} + +static inline int64_t +fpr_trunc(fpr x) { + return (int64_t)x.v; +} + +static inline fpr +fpr_add(fpr x, fpr y) { + return FPR(x.v + y.v); +} + +static inline fpr +fpr_sub(fpr x, fpr y) { + return FPR(x.v - y.v); +} + +static inline fpr +fpr_neg(fpr x) { + return FPR(-x.v); +} + +static inline fpr +fpr_half(fpr x) { + return FPR(x.v * 0.5); +} + +static inline fpr +fpr_double(fpr x) { + return FPR(x.v + x.v); +} + +static inline fpr +fpr_mul(fpr x, fpr y) { + return FPR(x.v * y.v); +} + +static inline fpr +fpr_sqr(fpr x) { + return FPR(x.v * x.v); +} + +static inline fpr +fpr_inv(fpr x) { + return FPR(1.0 / x.v); +} + +static inline fpr +fpr_div(fpr x, fpr y) { + return FPR(x.v / y.v); +} + +static inline void +fpr_sqrt_avx2(double *t) { + __m128d x; + + x = _mm_load1_pd(t); + x = _mm_sqrt_pd(x); + _mm_storel_pd(t, x); +} + +static inline fpr +fpr_sqrt(fpr x) { + /* + * We prefer not to have a dependency on libm when it can be + * avoided. On x86, calling the sqrt() libm function inlines + * the relevant opcode (fsqrt or sqrtsd, depending on whether + * the 387 FPU or SSE2 is used for floating-point operations) + * but then makes an optional call to the library function + * for proper error handling, in case the operand is negative. + * + * To avoid this dependency, we use intrinsics or inline assembly + * on recognized platforms: + * + * - If AVX2 is explicitly enabled, then we use SSE2 intrinsics. + * + * - On GCC/Clang with SSE maths, we use SSE2 intrinsics. + * + * - On GCC/Clang on i386, or MSVC on i386, we use inline assembly + * to call the 387 FPU fsqrt opcode. + * + * - On GCC/Clang/XLC on PowerPC, we use inline assembly to call + * the fsqrt opcode (Clang needs a special hack). + * + * - On GCC/Clang on ARM with hardware floating-point, we use + * inline assembly to call the vqsrt.f64 opcode. Due to a + * complex ecosystem of compilers and assembly syntaxes, we + * have to call it "fsqrt" or "fsqrtd", depending on case. + * + * If the platform is not recognized, a call to the system + * library function sqrt() is performed. On some compilers, this + * may actually inline the relevant opcode, and call the library + * function only when the input is invalid (e.g. negative); + * Falcon never actually calls sqrt() on a negative value, but + * the dependency to libm will still be there. + */ + + fpr_sqrt_avx2(&x.v); + return x; +} + +static inline int +fpr_lt(fpr x, fpr y) { + return x.v < y.v; +} + +static inline uint64_t +fpr_expm_p63(fpr x, fpr ccs) { + /* + * Polynomial approximation of exp(-x) is taken from FACCT: + * https://eprint.iacr.org/2018/1234 + * Specifically, values are extracted from the implementation + * referenced from the FACCT article, and available at: + * https://github.com/raykzhao/gaussian + * Tests over more than 24 billions of random inputs in the + * 0..log(2) range have never shown a deviation larger than + * 2^(-50) from the true mathematical value. + */ + + + /* + * AVX2 implementation uses more operations than Horner's method, + * but with a lower expression tree depth. This helps because + * additions and multiplications have a latency of 4 cycles on + * a Skylake, but the CPU can issue two of them per cycle. + */ + + static const union { + double d[12]; + __m256d v[3]; + } c = { + { + 0.999999999999994892974086724280, + 0.500000000000019206858326015208, + 0.166666666666984014666397229121, + 0.041666666666110491190622155955, + 0.008333333327800835146903501993, + 0.001388888894063186997887560103, + 0.000198412739277311890541063977, + 0.000024801566833585381209939524, + 0.000002755586350219122514855659, + 0.000000275607356160477811864927, + 0.000000025299506379442070029551, + 0.000000002073772366009083061987 + } + }; + + double d1, d2, d4, d8, y; + __m256d d14, d58, d9c; + + d1 = -x.v; + d2 = d1 * d1; + d4 = d2 * d2; + d8 = d4 * d4; + d14 = _mm256_set_pd(d4, d2 * d1, d2, d1); + d58 = _mm256_mul_pd(d14, _mm256_set1_pd(d4)); + d9c = _mm256_mul_pd(d14, _mm256_set1_pd(d8)); + d14 = _mm256_mul_pd(d14, _mm256_loadu_pd(&c.d[0])); + d58 = FMADD(d58, _mm256_loadu_pd(&c.d[4]), d14); + d9c = FMADD(d9c, _mm256_loadu_pd(&c.d[8]), d58); + d9c = _mm256_hadd_pd(d9c, d9c); + y = 1.0 + _mm_cvtsd_f64(_mm256_castpd256_pd128(d9c)) // _mm256_cvtsd_f64(d9c) + + _mm_cvtsd_f64(_mm256_extractf128_pd(d9c, 1)); + y *= ccs.v; + + /* + * Final conversion goes through int64_t first, because that's what + * the underlying opcode (vcvttsd2si) will do, and we know that the + * result will fit, since x >= 0 and ccs < 1. If we did the + * conversion directly to uint64_t, then the compiler would add some + * extra code to cover the case of a source value of 2^63 or more, + * and though the alternate path would never be exercised, the + * extra comparison would cost us some cycles. + */ + return (uint64_t)(int64_t)(y * fpr_ptwo63.v); + +} + +#define fpr_gm_tab PQCLEAN_FALCON1024_AVX2_fpr_gm_tab +extern const fpr fpr_gm_tab[]; + +#define fpr_p2_tab PQCLEAN_FALCON1024_AVX2_fpr_p2_tab +extern const fpr fpr_p2_tab[]; + +/* ====================================================================== */ +#endif diff --git a/crypto_sign/falcon/falcon-1024/avx2/inner.h b/crypto_sign/falcon/falcon-1024/avx2/inner.h new file mode 100644 index 00000000..d7239cf9 --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/avx2/inner.h @@ -0,0 +1,826 @@ +#ifndef PQCLEAN_FALCON1024_AVX2_INNER_H +#define PQCLEAN_FALCON1024_AVX2_INNER_H + + +/* + * Internal functions for Falcon. This is not the API intended to be + * used by applications; instead, this internal API provides all the + * primitives on which wrappers build to provide external APIs. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +/* + * IMPORTANT API RULES + * ------------------- + * + * This API has some non-trivial usage rules: + * + * + * - All public functions (i.e. the non-static ones) must be referenced + * with the PQCLEAN_FALCON1024_AVX2_ macro (e.g. PQCLEAN_FALCON1024_AVX2_verify_raw for the verify_raw() + * function). That macro adds a prefix to the name, which is + * configurable with the FALCON_PREFIX macro. This allows compiling + * the code into a specific "namespace" and potentially including + * several versions of this code into a single application (e.g. to + * have an AVX2 and a non-AVX2 variants and select the one to use at + * runtime based on availability of AVX2 opcodes). + * + * - Functions that need temporary buffers expects them as a final + * tmp[] array of type uint8_t*, with a size which is documented for + * each function. However, most have some alignment requirements, + * because they will use the array to store 16-bit, 32-bit or 64-bit + * values (e.g. uint64_t or double). The caller must ensure proper + * alignment. What happens on unaligned access depends on the + * underlying architecture, ranging from a slight time penalty + * to immediate termination of the process. + * + * - Some functions rely on specific rounding rules and precision for + * floating-point numbers. On some systems (in particular 32-bit x86 + * with the 387 FPU), this requires setting an hardware control + * word. The caller MUST use set_fpu_cw() to ensure proper precision: + * + * oldcw = set_fpu_cw(2); + * PQCLEAN_FALCON1024_AVX2_sign_dyn(...); + * set_fpu_cw(oldcw); + * + * On systems where the native floating-point precision is already + * proper, or integer-based emulation is used, the set_fpu_cw() + * function does nothing, so it can be called systematically. + */ +#include "fips202.h" +#include "fpr.h" +#include +#include +#include + +/* + * Some computations with floating-point elements, in particular + * rounding to the nearest integer, rely on operations using _exactly_ + * the precision of IEEE-754 binary64 type (i.e. 52 bits). On 32-bit + * x86, the 387 FPU may be used (depending on the target OS) and, in + * that case, may use more precision bits (i.e. 64 bits, for an 80-bit + * total type length); to prevent miscomputations, we define an explicit + * function that modifies the precision in the FPU control word. + * + * set_fpu_cw() sets the precision to the provided value, and returns + * the previously set precision; callers are supposed to restore the + * previous precision on exit. The correct (52-bit) precision is + * configured with the value "2". On unsupported compilers, or on + * targets other than 32-bit x86, or when the native 'double' type is + * not used, the set_fpu_cw() function does nothing at all. + */ +static inline unsigned +set_fpu_cw(unsigned x) { + return x; +} + + + + +/* ==================================================================== */ +/* + * SHAKE256 implementation (shake.c). + * + * API is defined to be easily replaced with the fips202.h API defined + * as part of PQClean. + */ + + + +#define inner_shake256_context shake256incctx +#define inner_shake256_init(sc) shake256_inc_init(sc) +#define inner_shake256_inject(sc, in, len) shake256_inc_absorb(sc, in, len) +#define inner_shake256_flip(sc) shake256_inc_finalize(sc) +#define inner_shake256_extract(sc, out, len) shake256_inc_squeeze(out, len, sc) +#define inner_shake256_ctx_release(sc) shake256_inc_ctx_release(sc) + + +/* ==================================================================== */ +/* + * Encoding/decoding functions (codec.c). + * + * Encoding functions take as parameters an output buffer (out) with + * a given maximum length (max_out_len); returned value is the actual + * number of bytes which have been written. If the output buffer is + * not large enough, then 0 is returned (some bytes may have been + * written to the buffer). If 'out' is NULL, then 'max_out_len' is + * ignored; instead, the function computes and returns the actual + * required output length (in bytes). + * + * Decoding functions take as parameters an input buffer (in) with + * its maximum length (max_in_len); returned value is the actual number + * of bytes that have been read from the buffer. If the provided length + * is too short, then 0 is returned. + * + * Values to encode or decode are vectors of integers, with N = 2^logn + * elements. + * + * Three encoding formats are defined: + * + * - modq: sequence of values modulo 12289, each encoded over exactly + * 14 bits. The encoder and decoder verify that integers are within + * the valid range (0..12288). Values are arrays of uint16. + * + * - trim: sequence of signed integers, a specified number of bits + * each. The number of bits is provided as parameter and includes + * the sign bit. Each integer x must be such that |x| < 2^(bits-1) + * (which means that the -2^(bits-1) value is forbidden); encode and + * decode functions check that property. Values are arrays of + * int16_t or int8_t, corresponding to names 'trim_i16' and + * 'trim_i8', respectively. + * + * - comp: variable-length encoding for signed integers; each integer + * uses a minimum of 9 bits, possibly more. This is normally used + * only for signatures. + * + */ + +size_t PQCLEAN_FALCON1024_AVX2_modq_encode(void *out, size_t max_out_len, + const uint16_t *x, unsigned logn); +size_t PQCLEAN_FALCON1024_AVX2_trim_i16_encode(void *out, size_t max_out_len, + const int16_t *x, unsigned logn, unsigned bits); +size_t PQCLEAN_FALCON1024_AVX2_trim_i8_encode(void *out, size_t max_out_len, + const int8_t *x, unsigned logn, unsigned bits); +size_t PQCLEAN_FALCON1024_AVX2_comp_encode(void *out, size_t max_out_len, + const int16_t *x, unsigned logn); + +size_t PQCLEAN_FALCON1024_AVX2_modq_decode(uint16_t *x, unsigned logn, + const void *in, size_t max_in_len); +size_t PQCLEAN_FALCON1024_AVX2_trim_i16_decode(int16_t *x, unsigned logn, unsigned bits, + const void *in, size_t max_in_len); +size_t PQCLEAN_FALCON1024_AVX2_trim_i8_decode(int8_t *x, unsigned logn, unsigned bits, + const void *in, size_t max_in_len); +size_t PQCLEAN_FALCON1024_AVX2_comp_decode(int16_t *x, unsigned logn, + const void *in, size_t max_in_len); + +/* + * Number of bits for key elements, indexed by logn (1 to 10). This + * is at most 8 bits for all degrees, but some degrees may have shorter + * elements. + */ +extern const uint8_t PQCLEAN_FALCON1024_AVX2_max_fg_bits[]; +extern const uint8_t PQCLEAN_FALCON1024_AVX2_max_FG_bits[]; + +/* + * Maximum size, in bits, of elements in a signature, indexed by logn + * (1 to 10). The size includes the sign bit. + */ +extern const uint8_t PQCLEAN_FALCON1024_AVX2_max_sig_bits[]; + +/* ==================================================================== */ +/* + * Support functions used for both signature generation and signature + * verification (common.c). + */ + +/* + * From a SHAKE256 context (must be already flipped), produce a new + * point. This is the non-constant-time version, which may leak enough + * information to serve as a stop condition on a brute force attack on + * the hashed message (provided that the nonce value is known). + */ +void PQCLEAN_FALCON1024_AVX2_hash_to_point_vartime(inner_shake256_context *sc, + uint16_t *x, unsigned logn); + +/* + * From a SHAKE256 context (must be already flipped), produce a new + * point. The temporary buffer (tmp) must have room for 2*2^logn bytes. + * This function is constant-time but is typically more expensive than + * PQCLEAN_FALCON1024_AVX2_hash_to_point_vartime(). + * + * tmp[] must have 16-bit alignment. + */ +void PQCLEAN_FALCON1024_AVX2_hash_to_point_ct(inner_shake256_context *sc, + uint16_t *x, unsigned logn, uint8_t *tmp); + +/* + * Tell whether a given vector (2N coordinates, in two halves) is + * acceptable as a signature. This compares the appropriate norm of the + * vector with the acceptance bound. Returned value is 1 on success + * (vector is short enough to be acceptable), 0 otherwise. + */ +int PQCLEAN_FALCON1024_AVX2_is_short(const int16_t *s1, const int16_t *s2, unsigned logn); + +/* + * Tell whether a given vector (2N coordinates, in two halves) is + * acceptable as a signature. Instead of the first half s1, this + * function receives the "saturated squared norm" of s1, i.e. the + * sum of the squares of the coordinates of s1 (saturated at 2^32-1 + * if the sum exceeds 2^31-1). + * + * Returned value is 1 on success (vector is short enough to be + * acceptable), 0 otherwise. + */ +int PQCLEAN_FALCON1024_AVX2_is_short_half(uint32_t sqn, const int16_t *s2, unsigned logn); + +/* ==================================================================== */ +/* + * Signature verification functions (vrfy.c). + */ + +/* + * Convert a public key to NTT + Montgomery format. Conversion is done + * in place. + */ +void PQCLEAN_FALCON1024_AVX2_to_ntt_monty(uint16_t *h, unsigned logn); + +/* + * Internal signature verification code: + * c0[] contains the hashed nonce+message + * s2[] is the decoded signature + * h[] contains the public key, in NTT + Montgomery format + * logn is the degree log + * tmp[] temporary, must have at least 2*2^logn bytes + * Returned value is 1 on success, 0 on error. + * + * tmp[] must have 16-bit alignment. + */ +int PQCLEAN_FALCON1024_AVX2_verify_raw(const uint16_t *c0, const int16_t *s2, + const uint16_t *h, unsigned logn, uint8_t *tmp); + +/* + * Compute the public key h[], given the private key elements f[] and + * g[]. This computes h = g/f mod phi mod q, where phi is the polynomial + * modulus. This function returns 1 on success, 0 on error (an error is + * reported if f is not invertible mod phi mod q). + * + * The tmp[] array must have room for at least 2*2^logn elements. + * tmp[] must have 16-bit alignment. + */ +int PQCLEAN_FALCON1024_AVX2_compute_public(uint16_t *h, + const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp); + +/* + * Recompute the fourth private key element. Private key consists in + * four polynomials with small coefficients f, g, F and G, which are + * such that fG - gF = q mod phi; furthermore, f is invertible modulo + * phi and modulo q. This function recomputes G from f, g and F. + * + * The tmp[] array must have room for at least 4*2^logn bytes. + * + * Returned value is 1 in success, 0 on error (f not invertible). + * tmp[] must have 16-bit alignment. + */ +int PQCLEAN_FALCON1024_AVX2_complete_private(int8_t *G, + const int8_t *f, const int8_t *g, const int8_t *F, + unsigned logn, uint8_t *tmp); + +/* + * Test whether a given polynomial is invertible modulo phi and q. + * Polynomial coefficients are small integers. + * + * tmp[] must have 16-bit alignment. + */ +int PQCLEAN_FALCON1024_AVX2_is_invertible( + const int16_t *s2, unsigned logn, uint8_t *tmp); + +/* + * Count the number of elements of value zero in the NTT representation + * of the given polynomial: this is the number of primitive 2n-th roots + * of unity (modulo q = 12289) that are roots of the provided polynomial + * (taken modulo q). + * + * tmp[] must have 16-bit alignment. + */ +int PQCLEAN_FALCON1024_AVX2_count_nttzero(const int16_t *sig, unsigned logn, uint8_t *tmp); + +/* + * Internal signature verification with public key recovery: + * h[] receives the public key (NOT in NTT/Montgomery format) + * c0[] contains the hashed nonce+message + * s1[] is the first signature half + * s2[] is the second signature half + * logn is the degree log + * tmp[] temporary, must have at least 2*2^logn bytes + * Returned value is 1 on success, 0 on error. Success is returned if + * the signature is a short enough vector; in that case, the public + * key has been written to h[]. However, the caller must still + * verify that h[] is the correct value (e.g. with regards to a known + * hash of the public key). + * + * h[] may not overlap with any of the other arrays. + * + * tmp[] must have 16-bit alignment. + */ +int PQCLEAN_FALCON1024_AVX2_verify_recover(uint16_t *h, + const uint16_t *c0, const int16_t *s1, const int16_t *s2, + unsigned logn, uint8_t *tmp); + +/* ==================================================================== */ +/* + * Implementation of floating-point real numbers (fpr.h, fpr.c). + */ + +/* + * Real numbers are implemented by an extra header file, included below. + * This is meant to support pluggable implementations. The default + * implementation relies on the C type 'double'. + * + * The included file must define the following types, functions and + * constants: + * + * fpr + * type for a real number + * + * fpr fpr_of(int64_t i) + * cast an integer into a real number; source must be in the + * -(2^63-1)..+(2^63-1) range + * + * fpr fpr_scaled(int64_t i, int sc) + * compute i*2^sc as a real number; source 'i' must be in the + * -(2^63-1)..+(2^63-1) range + * + * fpr fpr_ldexp(fpr x, int e) + * compute x*2^e + * + * int64_t fpr_rint(fpr x) + * round x to the nearest integer; x must be in the -(2^63-1) + * to +(2^63-1) range + * + * int64_t fpr_trunc(fpr x) + * round to an integer; this rounds towards zero; value must + * be in the -(2^63-1) to +(2^63-1) range + * + * fpr fpr_add(fpr x, fpr y) + * compute x + y + * + * fpr fpr_sub(fpr x, fpr y) + * compute x - y + * + * fpr fpr_neg(fpr x) + * compute -x + * + * fpr fpr_half(fpr x) + * compute x/2 + * + * fpr fpr_double(fpr x) + * compute x*2 + * + * fpr fpr_mul(fpr x, fpr y) + * compute x * y + * + * fpr fpr_sqr(fpr x) + * compute x * x + * + * fpr fpr_inv(fpr x) + * compute 1/x + * + * fpr fpr_div(fpr x, fpr y) + * compute x/y + * + * fpr fpr_sqrt(fpr x) + * compute the square root of x + * + * int fpr_lt(fpr x, fpr y) + * return 1 if x < y, 0 otherwise + * + * uint64_t fpr_expm_p63(fpr x) + * return exp(x), assuming that 0 <= x < log(2). Returned value + * is scaled to 63 bits (i.e. it really returns 2^63*exp(-x), + * rounded to the nearest integer). Computation should have a + * precision of at least 45 bits. + * + * const fpr fpr_gm_tab[] + * array of constants for FFT / iFFT + * + * const fpr fpr_p2_tab[] + * precomputed powers of 2 (by index, 0 to 10) + * + * Constants of type 'fpr': + * + * fpr fpr_q 12289 + * fpr fpr_inverse_of_q 1/12289 + * fpr fpr_inv_2sqrsigma0 1/(2*(1.8205^2)) + * fpr fpr_inv_sigma 1/(1.55*sqrt(12289)) + * fpr fpr_sigma_min_9 1.291500756233514568549480827642 + * fpr fpr_sigma_min_10 1.311734375905083682667395805765 + * fpr fpr_log2 log(2) + * fpr fpr_inv_log2 1/log(2) + * fpr fpr_bnorm_max 16822.4121 + * fpr fpr_zero 0 + * fpr fpr_one 1 + * fpr fpr_two 2 + * fpr fpr_onehalf 0.5 + * fpr fpr_ptwo31 2^31 + * fpr fpr_ptwo31m1 2^31-1 + * fpr fpr_mtwo31m1 -(2^31-1) + * fpr fpr_ptwo63m1 2^63-1 + * fpr fpr_mtwo63m1 -(2^63-1) + * fpr fpr_ptwo63 2^63 + */ + +/* ==================================================================== */ +/* + * RNG (rng.c). + * + * A PRNG based on ChaCha20 is implemented; it is seeded from a SHAKE256 + * context (flipped) and is used for bulk pseudorandom generation. + * A system-dependent seed generator is also provided. + */ + +/* + * Obtain a random seed from the system RNG. + * + * Returned value is 1 on success, 0 on error. + */ +int PQCLEAN_FALCON1024_AVX2_get_seed(void *seed, size_t seed_len); + +/* + * Structure for a PRNG. This includes a large buffer so that values + * get generated in advance. The 'state' is used to keep the current + * PRNG algorithm state (contents depend on the selected algorithm). + * + * The unions with 'dummy_u64' are there to ensure proper alignment for + * 64-bit direct access. + */ +typedef struct { + union { + uint8_t d[512]; /* MUST be 512, exactly */ + uint64_t dummy_u64; + } buf; + size_t ptr; + union { + uint8_t d[256]; + uint64_t dummy_u64; + } state; + int type; +} prng; + +/* + * Instantiate a PRNG. That PRNG will feed over the provided SHAKE256 + * context (in "flipped" state) to obtain its initial state. + */ +void PQCLEAN_FALCON1024_AVX2_prng_init(prng *p, inner_shake256_context *src); + +/* + * Refill the PRNG buffer. This is normally invoked automatically, and + * is declared here only so that prng_get_u64() may be inlined. + */ +void PQCLEAN_FALCON1024_AVX2_prng_refill(prng *p); + +/* + * Get some bytes from a PRNG. + */ +void PQCLEAN_FALCON1024_AVX2_prng_get_bytes(prng *p, void *dst, size_t len); + +/* + * Get a 64-bit random value from a PRNG. + */ +static inline uint64_t +prng_get_u64(prng *p) { + size_t u; + + /* + * If there are less than 9 bytes in the buffer, we refill it. + * This means that we may drop the last few bytes, but this allows + * for faster extraction code. Also, it means that we never leave + * an empty buffer. + */ + u = p->ptr; + if (u >= (sizeof p->buf.d) - 9) { + PQCLEAN_FALCON1024_AVX2_prng_refill(p); + u = 0; + } + p->ptr = u + 8; + + return (uint64_t)p->buf.d[u + 0] + | ((uint64_t)p->buf.d[u + 1] << 8) + | ((uint64_t)p->buf.d[u + 2] << 16) + | ((uint64_t)p->buf.d[u + 3] << 24) + | ((uint64_t)p->buf.d[u + 4] << 32) + | ((uint64_t)p->buf.d[u + 5] << 40) + | ((uint64_t)p->buf.d[u + 6] << 48) + | ((uint64_t)p->buf.d[u + 7] << 56); +} + +/* + * Get an 8-bit random value from a PRNG. + */ +static inline unsigned +prng_get_u8(prng *p) { + unsigned v; + + v = p->buf.d[p->ptr ++]; + if (p->ptr == sizeof p->buf.d) { + PQCLEAN_FALCON1024_AVX2_prng_refill(p); + } + return v; +} + +/* ==================================================================== */ +/* + * FFT (falcon-fft.c). + * + * A real polynomial is represented as an array of N 'fpr' elements. + * The FFT representation of a real polynomial contains N/2 complex + * elements; each is stored as two real numbers, for the real and + * imaginary parts, respectively. See falcon-fft.c for details on the + * internal representation. + */ + +/* + * Compute FFT in-place: the source array should contain a real + * polynomial (N coefficients); its storage area is reused to store + * the FFT representation of that polynomial (N/2 complex numbers). + * + * 'logn' MUST lie between 1 and 10 (inclusive). + */ +void PQCLEAN_FALCON1024_AVX2_FFT(fpr *f, unsigned logn); + +/* + * Compute the inverse FFT in-place: the source array should contain the + * FFT representation of a real polynomial (N/2 elements); the resulting + * real polynomial (N coefficients of type 'fpr') is written over the + * array. + * + * 'logn' MUST lie between 1 and 10 (inclusive). + */ +void PQCLEAN_FALCON1024_AVX2_iFFT(fpr *f, unsigned logn); + +/* + * Add polynomial b to polynomial a. a and b MUST NOT overlap. This + * function works in both normal and FFT representations. + */ +void PQCLEAN_FALCON1024_AVX2_poly_add(fpr *a, const fpr *b, unsigned logn); + +/* + * Subtract polynomial b from polynomial a. a and b MUST NOT overlap. This + * function works in both normal and FFT representations. + */ +void PQCLEAN_FALCON1024_AVX2_poly_sub(fpr *a, const fpr *b, unsigned logn); + +/* + * Negate polynomial a. This function works in both normal and FFT + * representations. + */ +void PQCLEAN_FALCON1024_AVX2_poly_neg(fpr *a, unsigned logn); + +/* + * Compute adjoint of polynomial a. This function works only in FFT + * representation. + */ +void PQCLEAN_FALCON1024_AVX2_poly_adj_fft(fpr *a, unsigned logn); + +/* + * Multiply polynomial a with polynomial b. a and b MUST NOT overlap. + * This function works only in FFT representation. + */ +void PQCLEAN_FALCON1024_AVX2_poly_mul_fft(fpr *a, const fpr *b, unsigned logn); + +/* + * Multiply polynomial a with the adjoint of polynomial b. a and b MUST NOT + * overlap. This function works only in FFT representation. + */ +void PQCLEAN_FALCON1024_AVX2_poly_muladj_fft(fpr *a, const fpr *b, unsigned logn); + +/* + * Multiply polynomial with its own adjoint. This function works only in FFT + * representation. + */ +void PQCLEAN_FALCON1024_AVX2_poly_mulselfadj_fft(fpr *a, unsigned logn); + +/* + * Multiply polynomial with a real constant. This function works in both + * normal and FFT representations. + */ +void PQCLEAN_FALCON1024_AVX2_poly_mulconst(fpr *a, fpr x, unsigned logn); + +/* + * Divide polynomial a by polynomial b, modulo X^N+1 (FFT representation). + * a and b MUST NOT overlap. + */ +void PQCLEAN_FALCON1024_AVX2_poly_div_fft(fpr *a, const fpr *b, unsigned logn); + +/* + * Given f and g (in FFT representation), compute 1/(f*adj(f)+g*adj(g)) + * (also in FFT representation). Since the result is auto-adjoint, all its + * coordinates in FFT representation are real; as such, only the first N/2 + * values of d[] are filled (the imaginary parts are skipped). + * + * Array d MUST NOT overlap with either a or b. + */ +void PQCLEAN_FALCON1024_AVX2_poly_invnorm2_fft(fpr *d, + const fpr *a, const fpr *b, unsigned logn); + +/* + * Given F, G, f and g (in FFT representation), compute F*adj(f)+G*adj(g) + * (also in FFT representation). Destination d MUST NOT overlap with + * any of the source arrays. + */ +void PQCLEAN_FALCON1024_AVX2_poly_add_muladj_fft(fpr *d, + const fpr *F, const fpr *G, + const fpr *f, const fpr *g, unsigned logn); + +/* + * Multiply polynomial a by polynomial b, where b is autoadjoint. Both + * a and b are in FFT representation. Since b is autoadjoint, all its + * FFT coefficients are real, and the array b contains only N/2 elements. + * a and b MUST NOT overlap. + */ +void PQCLEAN_FALCON1024_AVX2_poly_mul_autoadj_fft(fpr *a, + const fpr *b, unsigned logn); + +/* + * Divide polynomial a by polynomial b, where b is autoadjoint. Both + * a and b are in FFT representation. Since b is autoadjoint, all its + * FFT coefficients are real, and the array b contains only N/2 elements. + * a and b MUST NOT overlap. + */ +void PQCLEAN_FALCON1024_AVX2_poly_div_autoadj_fft(fpr *a, + const fpr *b, unsigned logn); + +/* + * Perform an LDL decomposition of an auto-adjoint matrix G, in FFT + * representation. On input, g00, g01 and g11 are provided (where the + * matrix G = [[g00, g01], [adj(g01), g11]]). On output, the d00, l10 + * and d11 values are written in g00, g01 and g11, respectively + * (with D = [[d00, 0], [0, d11]] and L = [[1, 0], [l10, 1]]). + * (In fact, d00 = g00, so the g00 operand is left unmodified.) + */ +void PQCLEAN_FALCON1024_AVX2_poly_LDL_fft(const fpr *g00, + fpr *g01, fpr *g11, unsigned logn); + +/* + * Perform an LDL decomposition of an auto-adjoint matrix G, in FFT + * representation. This is identical to poly_LDL_fft() except that + * g00, g01 and g11 are unmodified; the outputs d11 and l10 are written + * in two other separate buffers provided as extra parameters. + */ +void PQCLEAN_FALCON1024_AVX2_poly_LDLmv_fft(fpr *d11, fpr *l10, + const fpr *g00, const fpr *g01, + const fpr *g11, unsigned logn); + +/* + * Apply "split" operation on a polynomial in FFT representation: + * f = f0(x^2) + x*f1(x^2), for half-size polynomials f0 and f1 + * (polynomials modulo X^(N/2)+1). f0, f1 and f MUST NOT overlap. + */ +void PQCLEAN_FALCON1024_AVX2_poly_split_fft(fpr *f0, fpr *f1, + const fpr *f, unsigned logn); + +/* + * Apply "merge" operation on two polynomials in FFT representation: + * given f0 and f1, polynomials moduo X^(N/2)+1, this function computes + * f = f0(x^2) + x*f1(x^2), in FFT representation modulo X^N+1. + * f MUST NOT overlap with either f0 or f1. + */ +void PQCLEAN_FALCON1024_AVX2_poly_merge_fft(fpr *f, + const fpr *f0, const fpr *f1, unsigned logn); + +/* ==================================================================== */ +/* + * Key pair generation. + */ + +/* + * Required sizes of the temporary buffer (in bytes). + * + * This size is 28*2^logn bytes, except for degrees 2 and 4 (logn = 1 + * or 2) where it is slightly greater. + */ +#define FALCON_KEYGEN_TEMP_1 136 +#define FALCON_KEYGEN_TEMP_2 272 +#define FALCON_KEYGEN_TEMP_3 224 +#define FALCON_KEYGEN_TEMP_4 448 +#define FALCON_KEYGEN_TEMP_5 896 +#define FALCON_KEYGEN_TEMP_6 1792 +#define FALCON_KEYGEN_TEMP_7 3584 +#define FALCON_KEYGEN_TEMP_8 7168 +#define FALCON_KEYGEN_TEMP_9 14336 +#define FALCON_KEYGEN_TEMP_10 28672 + +/* + * Generate a new key pair. Randomness is extracted from the provided + * SHAKE256 context, which must have already been seeded and flipped. + * The tmp[] array must have suitable size (see FALCON_KEYGEN_TEMP_* + * macros) and be aligned for the uint32_t, uint64_t and fpr types. + * + * The private key elements are written in f, g, F and G, and the + * public key is written in h. Either or both of G and h may be NULL, + * in which case the corresponding element is not returned (they can + * be recomputed from f, g and F). + * + * tmp[] must have 64-bit alignment. + * This function uses floating-point rounding (see set_fpu_cw()). + */ +void PQCLEAN_FALCON1024_AVX2_keygen(inner_shake256_context *rng, + int8_t *f, int8_t *g, int8_t *F, int8_t *G, uint16_t *h, + unsigned logn, uint8_t *tmp); + +/* ==================================================================== */ +/* + * Signature generation. + */ + +/* + * Expand a private key into the B0 matrix in FFT representation and + * the LDL tree. All the values are written in 'expanded_key', for + * a total of (8*logn+40)*2^logn bytes. + * + * The tmp[] array must have room for at least 48*2^logn bytes. + * + * tmp[] must have 64-bit alignment. + * This function uses floating-point rounding (see set_fpu_cw()). + */ +void PQCLEAN_FALCON1024_AVX2_expand_privkey(fpr *expanded_key, + const int8_t *f, const int8_t *g, const int8_t *F, const int8_t *G, + unsigned logn, uint8_t *tmp); + +/* + * Compute a signature over the provided hashed message (hm); the + * signature value is one short vector. This function uses an + * expanded key (as generated by PQCLEAN_FALCON1024_AVX2_expand_privkey()). + * + * The sig[] and hm[] buffers may overlap. + * + * On successful output, the start of the tmp[] buffer contains the s1 + * vector (as int16_t elements). + * + * The minimal size (in bytes) of tmp[] is 48*2^logn bytes. + * + * tmp[] must have 64-bit alignment. + * This function uses floating-point rounding (see set_fpu_cw()). + */ +void PQCLEAN_FALCON1024_AVX2_sign_tree(int16_t *sig, inner_shake256_context *rng, + const fpr *expanded_key, + const uint16_t *hm, unsigned logn, uint8_t *tmp); + +/* + * Compute a signature over the provided hashed message (hm); the + * signature value is one short vector. This function uses a raw + * key and dynamically recompute the B0 matrix and LDL tree; this + * saves RAM since there is no needed for an expanded key, but + * increases the signature cost. + * + * The sig[] and hm[] buffers may overlap. + * + * On successful output, the start of the tmp[] buffer contains the s1 + * vector (as int16_t elements). + * + * The minimal size (in bytes) of tmp[] is 72*2^logn bytes. + * + * tmp[] must have 64-bit alignment. + * This function uses floating-point rounding (see set_fpu_cw()). + */ +void PQCLEAN_FALCON1024_AVX2_sign_dyn(int16_t *sig, inner_shake256_context *rng, + const int8_t *f, const int8_t *g, + const int8_t *F, const int8_t *G, + const uint16_t *hm, unsigned logn, uint8_t *tmp); + +/* + * Internal sampler engine. Exported for tests. + * + * sampler_context wraps around a source of random numbers (PRNG) and + * the sigma_min value (nominally dependent on the degree). + * + * sampler() takes as parameters: + * ctx pointer to the sampler_context structure + * mu center for the distribution + * isigma inverse of the distribution standard deviation + * It returns an integer sampled along the Gaussian distribution centered + * on mu and of standard deviation sigma = 1/isigma. + * + * gaussian0_sampler() takes as parameter a pointer to a PRNG, and + * returns an integer sampled along a half-Gaussian with standard + * deviation sigma0 = 1.8205 (center is 0, returned value is + * nonnegative). + */ + +typedef struct { + prng p; + fpr sigma_min; +} sampler_context; + +int PQCLEAN_FALCON1024_AVX2_sampler(void *ctx, fpr mu, fpr isigma); + +int PQCLEAN_FALCON1024_AVX2_gaussian0_sampler(prng *p); + +/* ==================================================================== */ + +#endif diff --git a/crypto_sign/falcon/falcon-1024/avx2/keygen.c b/crypto_sign/falcon/falcon-1024/avx2/keygen.c new file mode 100644 index 00000000..53a3682d --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/avx2/keygen.c @@ -0,0 +1,4231 @@ +#include "inner.h" + +/* + * Falcon key pair generation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +#define MKN(logn) ((size_t)1 << (logn)) + +/* ==================================================================== */ +/* + * Modular arithmetics. + * + * We implement a few functions for computing modulo a small integer p. + * + * All functions require that 2^30 < p < 2^31. Moreover, operands must + * be in the 0..p-1 range. + * + * Modular addition and subtraction work for all such p. + * + * Montgomery multiplication requires that p is odd, and must be provided + * with an additional value p0i = -1/p mod 2^31. See below for some basics + * on Montgomery multiplication. + * + * Division computes an inverse modulo p by an exponentiation (with + * exponent p-2): this works only if p is prime. Multiplication + * requirements also apply, i.e. p must be odd and p0i must be provided. + * + * The NTT and inverse NTT need all of the above, and also that + * p = 1 mod 2048. + * + * ----------------------------------------------------------------------- + * + * We use Montgomery representation with 31-bit values: + * + * Let R = 2^31 mod p. When 2^30 < p < 2^31, R = 2^31 - p. + * Montgomery representation of an integer x modulo p is x*R mod p. + * + * Montgomery multiplication computes (x*y)/R mod p for + * operands x and y. Therefore: + * + * - if operands are x*R and y*R (Montgomery representations of x and + * y), then Montgomery multiplication computes (x*R*y*R)/R = (x*y)*R + * mod p, which is the Montgomery representation of the product x*y; + * + * - if operands are x*R and y (or x and y*R), then Montgomery + * multiplication returns x*y mod p: mixed-representation + * multiplications yield results in normal representation. + * + * To convert to Montgomery representation, we multiply by R, which is done + * by Montgomery-multiplying by R^2. Stand-alone conversion back from + * Montgomery representation is Montgomery-multiplication by 1. + */ + +/* + * Precomputed small primes. Each element contains the following: + * + * p The prime itself. + * + * g A primitive root of phi = X^N+1 (in field Z_p). + * + * s The inverse of the product of all previous primes in the array, + * computed modulo p and in Montgomery representation. + * + * All primes are such that p = 1 mod 2048, and are lower than 2^31. They + * are listed in decreasing order. + */ + +typedef struct { + uint32_t p; + uint32_t g; + uint32_t s; +} small_prime; + +static const small_prime PRIMES[] = { + { 2147473409, 383167813, 10239 }, + { 2147389441, 211808905, 471403745 }, + { 2147387393, 37672282, 1329335065 }, + { 2147377153, 1977035326, 968223422 }, + { 2147358721, 1067163706, 132460015 }, + { 2147352577, 1606082042, 598693809 }, + { 2147346433, 2033915641, 1056257184 }, + { 2147338241, 1653770625, 421286710 }, + { 2147309569, 631200819, 1111201074 }, + { 2147297281, 2038364663, 1042003613 }, + { 2147295233, 1962540515, 19440033 }, + { 2147239937, 2100082663, 353296760 }, + { 2147235841, 1991153006, 1703918027 }, + { 2147217409, 516405114, 1258919613 }, + { 2147205121, 409347988, 1089726929 }, + { 2147196929, 927788991, 1946238668 }, + { 2147178497, 1136922411, 1347028164 }, + { 2147100673, 868626236, 701164723 }, + { 2147082241, 1897279176, 617820870 }, + { 2147074049, 1888819123, 158382189 }, + { 2147051521, 25006327, 522758543 }, + { 2147043329, 327546255, 37227845 }, + { 2147039233, 766324424, 1133356428 }, + { 2146988033, 1862817362, 73861329 }, + { 2146963457, 404622040, 653019435 }, + { 2146959361, 1936581214, 995143093 }, + { 2146938881, 1559770096, 634921513 }, + { 2146908161, 422623708, 1985060172 }, + { 2146885633, 1751189170, 298238186 }, + { 2146871297, 578919515, 291810829 }, + { 2146846721, 1114060353, 915902322 }, + { 2146834433, 2069565474, 47859524 }, + { 2146818049, 1552824584, 646281055 }, + { 2146775041, 1906267847, 1597832891 }, + { 2146756609, 1847414714, 1228090888 }, + { 2146744321, 1818792070, 1176377637 }, + { 2146738177, 1118066398, 1054971214 }, + { 2146736129, 52057278, 933422153 }, + { 2146713601, 592259376, 1406621510 }, + { 2146695169, 263161877, 1514178701 }, + { 2146656257, 685363115, 384505091 }, + { 2146650113, 927727032, 537575289 }, + { 2146646017, 52575506, 1799464037 }, + { 2146643969, 1276803876, 1348954416 }, + { 2146603009, 814028633, 1521547704 }, + { 2146572289, 1846678872, 1310832121 }, + { 2146547713, 919368090, 1019041349 }, + { 2146508801, 671847612, 38582496 }, + { 2146492417, 283911680, 532424562 }, + { 2146490369, 1780044827, 896447978 }, + { 2146459649, 327980850, 1327906900 }, + { 2146447361, 1310561493, 958645253 }, + { 2146441217, 412148926, 287271128 }, + { 2146437121, 293186449, 2009822534 }, + { 2146430977, 179034356, 1359155584 }, + { 2146418689, 1517345488, 1790248672 }, + { 2146406401, 1615820390, 1584833571 }, + { 2146404353, 826651445, 607120498 }, + { 2146379777, 3816988, 1897049071 }, + { 2146363393, 1221409784, 1986921567 }, + { 2146355201, 1388081168, 849968120 }, + { 2146336769, 1803473237, 1655544036 }, + { 2146312193, 1023484977, 273671831 }, + { 2146293761, 1074591448, 467406983 }, + { 2146283521, 831604668, 1523950494 }, + { 2146203649, 712865423, 1170834574 }, + { 2146154497, 1764991362, 1064856763 }, + { 2146142209, 627386213, 1406840151 }, + { 2146127873, 1638674429, 2088393537 }, + { 2146099201, 1516001018, 690673370 }, + { 2146093057, 1294931393, 315136610 }, + { 2146091009, 1942399533, 973539425 }, + { 2146078721, 1843461814, 2132275436 }, + { 2146060289, 1098740778, 360423481 }, + { 2146048001, 1617213232, 1951981294 }, + { 2146041857, 1805783169, 2075683489 }, + { 2146019329, 272027909, 1753219918 }, + { 2145986561, 1206530344, 2034028118 }, + { 2145976321, 1243769360, 1173377644 }, + { 2145964033, 887200839, 1281344586 }, + { 2145906689, 1651026455, 906178216 }, + { 2145875969, 1673238256, 1043521212 }, + { 2145871873, 1226591210, 1399796492 }, + { 2145841153, 1465353397, 1324527802 }, + { 2145832961, 1150638905, 554084759 }, + { 2145816577, 221601706, 427340863 }, + { 2145785857, 608896761, 316590738 }, + { 2145755137, 1712054942, 1684294304 }, + { 2145742849, 1302302867, 724873116 }, + { 2145728513, 516717693, 431671476 }, + { 2145699841, 524575579, 1619722537 }, + { 2145691649, 1925625239, 982974435 }, + { 2145687553, 463795662, 1293154300 }, + { 2145673217, 771716636, 881778029 }, + { 2145630209, 1509556977, 837364988 }, + { 2145595393, 229091856, 851648427 }, + { 2145587201, 1796903241, 635342424 }, + { 2145525761, 715310882, 1677228081 }, + { 2145495041, 1040930522, 200685896 }, + { 2145466369, 949804237, 1809146322 }, + { 2145445889, 1673903706, 95316881 }, + { 2145390593, 806941852, 1428671135 }, + { 2145372161, 1402525292, 159350694 }, + { 2145361921, 2124760298, 1589134749 }, + { 2145359873, 1217503067, 1561543010 }, + { 2145355777, 338341402, 83865711 }, + { 2145343489, 1381532164, 641430002 }, + { 2145325057, 1883895478, 1528469895 }, + { 2145318913, 1335370424, 65809740 }, + { 2145312769, 2000008042, 1919775760 }, + { 2145300481, 961450962, 1229540578 }, + { 2145282049, 910466767, 1964062701 }, + { 2145232897, 816527501, 450152063 }, + { 2145218561, 1435128058, 1794509700 }, + { 2145187841, 33505311, 1272467582 }, + { 2145181697, 269767433, 1380363849 }, + { 2145175553, 56386299, 1316870546 }, + { 2145079297, 2106880293, 1391797340 }, + { 2145021953, 1347906152, 720510798 }, + { 2145015809, 206769262, 1651459955 }, + { 2145003521, 1885513236, 1393381284 }, + { 2144960513, 1810381315, 31937275 }, + { 2144944129, 1306487838, 2019419520 }, + { 2144935937, 37304730, 1841489054 }, + { 2144894977, 1601434616, 157985831 }, + { 2144888833, 98749330, 2128592228 }, + { 2144880641, 1772327002, 2076128344 }, + { 2144864257, 1404514762, 2029969964 }, + { 2144827393, 801236594, 406627220 }, + { 2144806913, 349217443, 1501080290 }, + { 2144796673, 1542656776, 2084736519 }, + { 2144778241, 1210734884, 1746416203 }, + { 2144759809, 1146598851, 716464489 }, + { 2144757761, 286328400, 1823728177 }, + { 2144729089, 1347555695, 1836644881 }, + { 2144727041, 1795703790, 520296412 }, + { 2144696321, 1302475157, 852964281 }, + { 2144667649, 1075877614, 504992927 }, + { 2144573441, 198765808, 1617144982 }, + { 2144555009, 321528767, 155821259 }, + { 2144550913, 814139516, 1819937644 }, + { 2144536577, 571143206, 962942255 }, + { 2144524289, 1746733766, 2471321 }, + { 2144512001, 1821415077, 124190939 }, + { 2144468993, 917871546, 1260072806 }, + { 2144458753, 378417981, 1569240563 }, + { 2144421889, 175229668, 1825620763 }, + { 2144409601, 1699216963, 351648117 }, + { 2144370689, 1071885991, 958186029 }, + { 2144348161, 1763151227, 540353574 }, + { 2144335873, 1060214804, 919598847 }, + { 2144329729, 663515846, 1448552668 }, + { 2144327681, 1057776305, 590222840 }, + { 2144309249, 1705149168, 1459294624 }, + { 2144296961, 325823721, 1649016934 }, + { 2144290817, 738775789, 447427206 }, + { 2144243713, 962347618, 893050215 }, + { 2144237569, 1655257077, 900860862 }, + { 2144161793, 242206694, 1567868672 }, + { 2144155649, 769415308, 1247993134 }, + { 2144137217, 320492023, 515841070 }, + { 2144120833, 1639388522, 770877302 }, + { 2144071681, 1761785233, 964296120 }, + { 2144065537, 419817825, 204564472 }, + { 2144028673, 666050597, 2091019760 }, + { 2144010241, 1413657615, 1518702610 }, + { 2143952897, 1238327946, 475672271 }, + { 2143940609, 307063413, 1176750846 }, + { 2143918081, 2062905559, 786785803 }, + { 2143899649, 1338112849, 1562292083 }, + { 2143891457, 68149545, 87166451 }, + { 2143885313, 921750778, 394460854 }, + { 2143854593, 719766593, 133877196 }, + { 2143836161, 1149399850, 1861591875 }, + { 2143762433, 1848739366, 1335934145 }, + { 2143756289, 1326674710, 102999236 }, + { 2143713281, 808061791, 1156900308 }, + { 2143690753, 388399459, 1926468019 }, + { 2143670273, 1427891374, 1756689401 }, + { 2143666177, 1912173949, 986629565 }, + { 2143645697, 2041160111, 371842865 }, + { 2143641601, 1279906897, 2023974350 }, + { 2143635457, 720473174, 1389027526 }, + { 2143621121, 1298309455, 1732632006 }, + { 2143598593, 1548762216, 1825417506 }, + { 2143567873, 620475784, 1073787233 }, + { 2143561729, 1932954575, 949167309 }, + { 2143553537, 354315656, 1652037534 }, + { 2143541249, 577424288, 1097027618 }, + { 2143531009, 357862822, 478640055 }, + { 2143522817, 2017706025, 1550531668 }, + { 2143506433, 2078127419, 1824320165 }, + { 2143488001, 613475285, 1604011510 }, + { 2143469569, 1466594987, 502095196 }, + { 2143426561, 1115430331, 1044637111 }, + { 2143383553, 9778045, 1902463734 }, + { 2143377409, 1557401276, 2056861771 }, + { 2143363073, 652036455, 1965915971 }, + { 2143260673, 1464581171, 1523257541 }, + { 2143246337, 1876119649, 764541916 }, + { 2143209473, 1614992673, 1920672844 }, + { 2143203329, 981052047, 2049774209 }, + { 2143160321, 1847355533, 728535665 }, + { 2143129601, 965558457, 603052992 }, + { 2143123457, 2140817191, 8348679 }, + { 2143100929, 1547263683, 694209023 }, + { 2143092737, 643459066, 1979934533 }, + { 2143082497, 188603778, 2026175670 }, + { 2143062017, 1657329695, 377451099 }, + { 2143051777, 114967950, 979255473 }, + { 2143025153, 1698431342, 1449196896 }, + { 2143006721, 1862741675, 1739650365 }, + { 2142996481, 756660457, 996160050 }, + { 2142976001, 927864010, 1166847574 }, + { 2142965761, 905070557, 661974566 }, + { 2142916609, 40932754, 1787161127 }, + { 2142892033, 1987985648, 675335382 }, + { 2142885889, 797497211, 1323096997 }, + { 2142871553, 2068025830, 1411877159 }, + { 2142861313, 1217177090, 1438410687 }, + { 2142830593, 409906375, 1767860634 }, + { 2142803969, 1197788993, 359782919 }, + { 2142785537, 643817365, 513932862 }, + { 2142779393, 1717046338, 218943121 }, + { 2142724097, 89336830, 416687049 }, + { 2142707713, 5944581, 1356813523 }, + { 2142658561, 887942135, 2074011722 }, + { 2142638081, 151851972, 1647339939 }, + { 2142564353, 1691505537, 1483107336 }, + { 2142533633, 1989920200, 1135938817 }, + { 2142529537, 959263126, 1531961857 }, + { 2142527489, 453251129, 1725566162 }, + { 2142502913, 1536028102, 182053257 }, + { 2142498817, 570138730, 701443447 }, + { 2142416897, 326965800, 411931819 }, + { 2142363649, 1675665410, 1517191733 }, + { 2142351361, 968529566, 1575712703 }, + { 2142330881, 1384953238, 1769087884 }, + { 2142314497, 1977173242, 1833745524 }, + { 2142289921, 95082313, 1714775493 }, + { 2142283777, 109377615, 1070584533 }, + { 2142277633, 16960510, 702157145 }, + { 2142263297, 553850819, 431364395 }, + { 2142208001, 241466367, 2053967982 }, + { 2142164993, 1795661326, 1031836848 }, + { 2142097409, 1212530046, 712772031 }, + { 2142087169, 1763869720, 822276067 }, + { 2142078977, 644065713, 1765268066 }, + { 2142074881, 112671944, 643204925 }, + { 2142044161, 1387785471, 1297890174 }, + { 2142025729, 783885537, 1000425730 }, + { 2142011393, 905662232, 1679401033 }, + { 2141974529, 799788433, 468119557 }, + { 2141943809, 1932544124, 449305555 }, + { 2141933569, 1527403256, 841867925 }, + { 2141931521, 1247076451, 743823916 }, + { 2141902849, 1199660531, 401687910 }, + { 2141890561, 150132350, 1720336972 }, + { 2141857793, 1287438162, 663880489 }, + { 2141833217, 618017731, 1819208266 }, + { 2141820929, 999578638, 1403090096 }, + { 2141786113, 81834325, 1523542501 }, + { 2141771777, 120001928, 463556492 }, + { 2141759489, 122455485, 2124928282 }, + { 2141749249, 141986041, 940339153 }, + { 2141685761, 889088734, 477141499 }, + { 2141673473, 324212681, 1122558298 }, + { 2141669377, 1175806187, 1373818177 }, + { 2141655041, 1113654822, 296887082 }, + { 2141587457, 991103258, 1585913875 }, + { 2141583361, 1401451409, 1802457360 }, + { 2141575169, 1571977166, 712760980 }, + { 2141546497, 1107849376, 1250270109 }, + { 2141515777, 196544219, 356001130 }, + { 2141495297, 1733571506, 1060744866 }, + { 2141483009, 321552363, 1168297026 }, + { 2141458433, 505818251, 733225819 }, + { 2141360129, 1026840098, 948342276 }, + { 2141325313, 945133744, 2129965998 }, + { 2141317121, 1871100260, 1843844634 }, + { 2141286401, 1790639498, 1750465696 }, + { 2141267969, 1376858592, 186160720 }, + { 2141255681, 2129698296, 1876677959 }, + { 2141243393, 2138900688, 1340009628 }, + { 2141214721, 1933049835, 1087819477 }, + { 2141212673, 1898664939, 1786328049 }, + { 2141202433, 990234828, 940682169 }, + { 2141175809, 1406392421, 993089586 }, + { 2141165569, 1263518371, 289019479 }, + { 2141073409, 1485624211, 507864514 }, + { 2141052929, 1885134788, 311252465 }, + { 2141040641, 1285021247, 280941862 }, + { 2141028353, 1527610374, 375035110 }, + { 2141011969, 1400626168, 164696620 }, + { 2140999681, 632959608, 966175067 }, + { 2140997633, 2045628978, 1290889438 }, + { 2140993537, 1412755491, 375366253 }, + { 2140942337, 719477232, 785367828 }, + { 2140925953, 45224252, 836552317 }, + { 2140917761, 1157376588, 1001839569 }, + { 2140887041, 278480752, 2098732796 }, + { 2140837889, 1663139953, 924094810 }, + { 2140788737, 802501511, 2045368990 }, + { 2140766209, 1820083885, 1800295504 }, + { 2140764161, 1169561905, 2106792035 }, + { 2140696577, 127781498, 1885987531 }, + { 2140684289, 16014477, 1098116827 }, + { 2140653569, 665960598, 1796728247 }, + { 2140594177, 1043085491, 377310938 }, + { 2140579841, 1732838211, 1504505945 }, + { 2140569601, 302071939, 358291016 }, + { 2140567553, 192393733, 1909137143 }, + { 2140557313, 406595731, 1175330270 }, + { 2140549121, 1748850918, 525007007 }, + { 2140477441, 499436566, 1031159814 }, + { 2140469249, 1886004401, 1029951320 }, + { 2140426241, 1483168100, 1676273461 }, + { 2140420097, 1779917297, 846024476 }, + { 2140413953, 522948893, 1816354149 }, + { 2140383233, 1931364473, 1296921241 }, + { 2140366849, 1917356555, 147196204 }, + { 2140354561, 16466177, 1349052107 }, + { 2140348417, 1875366972, 1860485634 }, + { 2140323841, 456498717, 1790256483 }, + { 2140321793, 1629493973, 150031888 }, + { 2140315649, 1904063898, 395510935 }, + { 2140280833, 1784104328, 831417909 }, + { 2140250113, 256087139, 697349101 }, + { 2140229633, 388553070, 243875754 }, + { 2140223489, 747459608, 1396270850 }, + { 2140200961, 507423743, 1895572209 }, + { 2140162049, 580106016, 2045297469 }, + { 2140149761, 712426444, 785217995 }, + { 2140137473, 1441607584, 536866543 }, + { 2140119041, 346538902, 1740434653 }, + { 2140090369, 282642885, 21051094 }, + { 2140076033, 1407456228, 319910029 }, + { 2140047361, 1619330500, 1488632070 }, + { 2140041217, 2089408064, 2012026134 }, + { 2140008449, 1705524800, 1613440760 }, + { 2139924481, 1846208233, 1280649481 }, + { 2139906049, 989438755, 1185646076 }, + { 2139867137, 1522314850, 372783595 }, + { 2139842561, 1681587377, 216848235 }, + { 2139826177, 2066284988, 1784999464 }, + { 2139824129, 480888214, 1513323027 }, + { 2139789313, 847937200, 858192859 }, + { 2139783169, 1642000434, 1583261448 }, + { 2139770881, 940699589, 179702100 }, + { 2139768833, 315623242, 964612676 }, + { 2139666433, 331649203, 764666914 }, + { 2139641857, 2118730799, 1313764644 }, + { 2139635713, 519149027, 519212449 }, + { 2139598849, 1526413634, 1769667104 }, + { 2139574273, 551148610, 820739925 }, + { 2139568129, 1386800242, 472447405 }, + { 2139549697, 813760130, 1412328531 }, + { 2139537409, 1615286260, 1609362979 }, + { 2139475969, 1352559299, 1696720421 }, + { 2139455489, 1048691649, 1584935400 }, + { 2139432961, 836025845, 950121150 }, + { 2139424769, 1558281165, 1635486858 }, + { 2139406337, 1728402143, 1674423301 }, + { 2139396097, 1727715782, 1483470544 }, + { 2139383809, 1092853491, 1741699084 }, + { 2139369473, 690776899, 1242798709 }, + { 2139351041, 1768782380, 2120712049 }, + { 2139334657, 1739968247, 1427249225 }, + { 2139332609, 1547189119, 623011170 }, + { 2139310081, 1346827917, 1605466350 }, + { 2139303937, 369317948, 828392831 }, + { 2139301889, 1560417239, 1788073219 }, + { 2139283457, 1303121623, 595079358 }, + { 2139248641, 1354555286, 573424177 }, + { 2139240449, 60974056, 885781403 }, + { 2139222017, 355573421, 1221054839 }, + { 2139215873, 566477826, 1724006500 }, + { 2139150337, 871437673, 1609133294 }, + { 2139144193, 1478130914, 1137491905 }, + { 2139117569, 1854880922, 964728507 }, + { 2139076609, 202405335, 756508944 }, + { 2139062273, 1399715741, 884826059 }, + { 2139045889, 1051045798, 1202295476 }, + { 2139033601, 1707715206, 632234634 }, + { 2139006977, 2035853139, 231626690 }, + { 2138951681, 183867876, 838350879 }, + { 2138945537, 1403254661, 404460202 }, + { 2138920961, 310865011, 1282911681 }, + { 2138910721, 1328496553, 103472415 }, + { 2138904577, 78831681, 993513549 }, + { 2138902529, 1319697451, 1055904361 }, + { 2138816513, 384338872, 1706202469 }, + { 2138810369, 1084868275, 405677177 }, + { 2138787841, 401181788, 1964773901 }, + { 2138775553, 1850532988, 1247087473 }, + { 2138767361, 874261901, 1576073565 }, + { 2138757121, 1187474742, 993541415 }, + { 2138748929, 1782458888, 1043206483 }, + { 2138744833, 1221500487, 800141243 }, + { 2138738689, 413465368, 1450660558 }, + { 2138695681, 739045140, 342611472 }, + { 2138658817, 1355845756, 672674190 }, + { 2138644481, 608379162, 1538874380 }, + { 2138632193, 1444914034, 686911254 }, + { 2138607617, 484707818, 1435142134 }, + { 2138591233, 539460669, 1290458549 }, + { 2138572801, 2093538990, 2011138646 }, + { 2138552321, 1149786988, 1076414907 }, + { 2138546177, 840688206, 2108985273 }, + { 2138533889, 209669619, 198172413 }, + { 2138523649, 1975879426, 1277003968 }, + { 2138490881, 1351891144, 1976858109 }, + { 2138460161, 1817321013, 1979278293 }, + { 2138429441, 1950077177, 203441928 }, + { 2138400769, 908970113, 628395069 }, + { 2138398721, 219890864, 758486760 }, + { 2138376193, 1306654379, 977554090 }, + { 2138351617, 298822498, 2004708503 }, + { 2138337281, 441457816, 1049002108 }, + { 2138320897, 1517731724, 1442269609 }, + { 2138290177, 1355911197, 1647139103 }, + { 2138234881, 531313247, 1746591962 }, + { 2138214401, 1899410930, 781416444 }, + { 2138202113, 1813477173, 1622508515 }, + { 2138191873, 1086458299, 1025408615 }, + { 2138183681, 1998800427, 827063290 }, + { 2138173441, 1921308898, 749670117 }, + { 2138103809, 1620902804, 2126787647 }, + { 2138099713, 828647069, 1892961817 }, + { 2138085377, 179405355, 1525506535 }, + { 2138060801, 615683235, 1259580138 }, + { 2138044417, 2030277840, 1731266562 }, + { 2138042369, 2087222316, 1627902259 }, + { 2138032129, 126388712, 1108640984 }, + { 2138011649, 715026550, 1017980050 }, + { 2137993217, 1693714349, 1351778704 }, + { 2137888769, 1289762259, 1053090405 }, + { 2137853953, 199991890, 1254192789 }, + { 2137833473, 941421685, 896995556 }, + { 2137817089, 750416446, 1251031181 }, + { 2137792513, 798075119, 368077456 }, + { 2137786369, 878543495, 1035375025 }, + { 2137767937, 9351178, 1156563902 }, + { 2137755649, 1382297614, 1686559583 }, + { 2137724929, 1345472850, 1681096331 }, + { 2137704449, 834666929, 630551727 }, + { 2137673729, 1646165729, 1892091571 }, + { 2137620481, 778943821, 48456461 }, + { 2137618433, 1730837875, 1713336725 }, + { 2137581569, 805610339, 1378891359 }, + { 2137538561, 204342388, 1950165220 }, + { 2137526273, 1947629754, 1500789441 }, + { 2137516033, 719902645, 1499525372 }, + { 2137491457, 230451261, 556382829 }, + { 2137440257, 979573541, 412760291 }, + { 2137374721, 927841248, 1954137185 }, + { 2137362433, 1243778559, 861024672 }, + { 2137313281, 1341338501, 980638386 }, + { 2137311233, 937415182, 1793212117 }, + { 2137255937, 795331324, 1410253405 }, + { 2137243649, 150756339, 1966999887 }, + { 2137182209, 163346914, 1939301431 }, + { 2137171969, 1952552395, 758913141 }, + { 2137159681, 570788721, 218668666 }, + { 2137147393, 1896656810, 2045670345 }, + { 2137141249, 358493842, 518199643 }, + { 2137139201, 1505023029, 674695848 }, + { 2137133057, 27911103, 830956306 }, + { 2137122817, 439771337, 1555268614 }, + { 2137116673, 790988579, 1871449599 }, + { 2137110529, 432109234, 811805080 }, + { 2137102337, 1357900653, 1184997641 }, + { 2137098241, 515119035, 1715693095 }, + { 2137090049, 408575203, 2085660657 }, + { 2137085953, 2097793407, 1349626963 }, + { 2137055233, 1556739954, 1449960883 }, + { 2137030657, 1545758650, 1369303716 }, + { 2136987649, 332602570, 103875114 }, + { 2136969217, 1499989506, 1662964115 }, + { 2136924161, 857040753, 4738842 }, + { 2136895489, 1948872712, 570436091 }, + { 2136893441, 58969960, 1568349634 }, + { 2136887297, 2127193379, 273612548 }, + { 2136850433, 111208983, 1181257116 }, + { 2136809473, 1627275942, 1680317971 }, + { 2136764417, 1574888217, 14011331 }, + { 2136741889, 14011055, 1129154251 }, + { 2136727553, 35862563, 1838555253 }, + { 2136721409, 310235666, 1363928244 }, + { 2136698881, 1612429202, 1560383828 }, + { 2136649729, 1138540131, 800014364 }, + { 2136606721, 602323503, 1433096652 }, + { 2136563713, 182209265, 1919611038 }, + { 2136555521, 324156477, 165591039 }, + { 2136549377, 195513113, 217165345 }, + { 2136526849, 1050768046, 939647887 }, + { 2136508417, 1886286237, 1619926572 }, + { 2136477697, 609647664, 35065157 }, + { 2136471553, 679352216, 1452259468 }, + { 2136457217, 128630031, 824816521 }, + { 2136422401, 19787464, 1526049830 }, + { 2136420353, 698316836, 1530623527 }, + { 2136371201, 1651862373, 1804812805 }, + { 2136334337, 326596005, 336977082 }, + { 2136322049, 63253370, 1904972151 }, + { 2136297473, 312176076, 172182411 }, + { 2136248321, 381261841, 369032670 }, + { 2136242177, 358688773, 1640007994 }, + { 2136229889, 512677188, 75585225 }, + { 2136219649, 2095003250, 1970086149 }, + { 2136207361, 1909650722, 537760675 }, + { 2136176641, 1334616195, 1533487619 }, + { 2136158209, 2096285632, 1793285210 }, + { 2136143873, 1897347517, 293843959 }, + { 2136133633, 923586222, 1022655978 }, + { 2136096769, 1464868191, 1515074410 }, + { 2136094721, 2020679520, 2061636104 }, + { 2136076289, 290798503, 1814726809 }, + { 2136041473, 156415894, 1250757633 }, + { 2135996417, 297459940, 1132158924 }, + { 2135955457, 538755304, 1688831340 }, + { 0, 0, 0 } +}; + +/* + * Reduce a small signed integer modulo a small prime. The source + * value x MUST be such that -p < x < p. + */ +static inline uint32_t +modp_set(int32_t x, uint32_t p) { + uint32_t w; + + w = (uint32_t)x; + w += p & -(w >> 31); + return w; +} + +/* + * Normalize a modular integer around 0. + */ +static inline int32_t +modp_norm(uint32_t x, uint32_t p) { + return (int32_t)(x - (p & (((x - ((p + 1) >> 1)) >> 31) - 1))); +} + +/* + * Compute -1/p mod 2^31. This works for all odd integers p that fit + * on 31 bits. + */ +static uint32_t +modp_ninv31(uint32_t p) { + uint32_t y; + + y = 2 - p; + y *= 2 - p * y; + y *= 2 - p * y; + y *= 2 - p * y; + y *= 2 - p * y; + return (uint32_t)0x7FFFFFFF & -y; +} + +/* + * Compute R = 2^31 mod p. + */ +static inline uint32_t +modp_R(uint32_t p) { + /* + * Since 2^30 < p < 2^31, we know that 2^31 mod p is simply + * 2^31 - p. + */ + return ((uint32_t)1 << 31) - p; +} + +/* + * Addition modulo p. + */ +static inline uint32_t +modp_add(uint32_t a, uint32_t b, uint32_t p) { + uint32_t d; + + d = a + b - p; + d += p & -(d >> 31); + return d; +} + +/* + * Subtraction modulo p. + */ +static inline uint32_t +modp_sub(uint32_t a, uint32_t b, uint32_t p) { + uint32_t d; + + d = a - b; + d += p & -(d >> 31); + return d; +} + +/* + * Halving modulo p. + */ +/* unused +static inline uint32_t +modp_half(uint32_t a, uint32_t p) +{ + a += p & -(a & 1); + return a >> 1; +} +*/ + +/* + * Montgomery multiplication modulo p. The 'p0i' value is -1/p mod 2^31. + * It is required that p is an odd integer. + */ +static inline uint32_t +modp_montymul(uint32_t a, uint32_t b, uint32_t p, uint32_t p0i) { + uint64_t z, w; + uint32_t d; + + z = (uint64_t)a * (uint64_t)b; + w = ((z * p0i) & (uint64_t)0x7FFFFFFF) * p; + d = (uint32_t)((z + w) >> 31) - p; + d += p & -(d >> 31); + return d; +} + +/* + * Compute R2 = 2^62 mod p. + */ +static uint32_t +modp_R2(uint32_t p, uint32_t p0i) { + uint32_t z; + + /* + * Compute z = 2^31 mod p (this is the value 1 in Montgomery + * representation), then double it with an addition. + */ + z = modp_R(p); + z = modp_add(z, z, p); + + /* + * Square it five times to obtain 2^32 in Montgomery representation + * (i.e. 2^63 mod p). + */ + z = modp_montymul(z, z, p, p0i); + z = modp_montymul(z, z, p, p0i); + z = modp_montymul(z, z, p, p0i); + z = modp_montymul(z, z, p, p0i); + z = modp_montymul(z, z, p, p0i); + + /* + * Halve the value mod p to get 2^62. + */ + z = (z + (p & -(z & 1))) >> 1; + return z; +} + +/* + * Compute 2^(31*x) modulo p. This works for integers x up to 2^11. + * p must be prime such that 2^30 < p < 2^31; p0i must be equal to + * -1/p mod 2^31; R2 must be equal to 2^62 mod p. + */ +static inline uint32_t +modp_Rx(unsigned x, uint32_t p, uint32_t p0i, uint32_t R2) { + int i; + uint32_t r, z; + + /* + * 2^(31*x) = (2^31)*(2^(31*(x-1))); i.e. we want the Montgomery + * representation of (2^31)^e mod p, where e = x-1. + * R2 is 2^31 in Montgomery representation. + */ + x --; + r = R2; + z = modp_R(p); + for (i = 0; (1U << i) <= x; i ++) { + if ((x & (1U << i)) != 0) { + z = modp_montymul(z, r, p, p0i); + } + r = modp_montymul(r, r, p, p0i); + } + return z; +} + +/* + * Division modulo p. If the divisor (b) is 0, then 0 is returned. + * This function computes proper results only when p is prime. + * Parameters: + * a dividend + * b divisor + * p odd prime modulus + * p0i -1/p mod 2^31 + * R 2^31 mod R + */ +static uint32_t +modp_div(uint32_t a, uint32_t b, uint32_t p, uint32_t p0i, uint32_t R) { + uint32_t z, e; + int i; + + e = p - 2; + z = R; + for (i = 30; i >= 0; i --) { + uint32_t z2; + + z = modp_montymul(z, z, p, p0i); + z2 = modp_montymul(z, b, p, p0i); + z ^= (z ^ z2) & -(uint32_t)((e >> i) & 1); + } + + /* + * The loop above just assumed that b was in Montgomery + * representation, i.e. really contained b*R; under that + * assumption, it returns 1/b in Montgomery representation, + * which is R/b. But we gave it b in normal representation, + * so the loop really returned R/(b/R) = R^2/b. + * + * We want a/b, so we need one Montgomery multiplication with a, + * which also remove one of the R factors, and another such + * multiplication to remove the second R factor. + */ + z = modp_montymul(z, 1, p, p0i); + return modp_montymul(a, z, p, p0i); +} + +/* + * Bit-reversal index table. + */ +static const uint16_t REV10[] = { + 0, 512, 256, 768, 128, 640, 384, 896, 64, 576, 320, 832, + 192, 704, 448, 960, 32, 544, 288, 800, 160, 672, 416, 928, + 96, 608, 352, 864, 224, 736, 480, 992, 16, 528, 272, 784, + 144, 656, 400, 912, 80, 592, 336, 848, 208, 720, 464, 976, + 48, 560, 304, 816, 176, 688, 432, 944, 112, 624, 368, 880, + 240, 752, 496, 1008, 8, 520, 264, 776, 136, 648, 392, 904, + 72, 584, 328, 840, 200, 712, 456, 968, 40, 552, 296, 808, + 168, 680, 424, 936, 104, 616, 360, 872, 232, 744, 488, 1000, + 24, 536, 280, 792, 152, 664, 408, 920, 88, 600, 344, 856, + 216, 728, 472, 984, 56, 568, 312, 824, 184, 696, 440, 952, + 120, 632, 376, 888, 248, 760, 504, 1016, 4, 516, 260, 772, + 132, 644, 388, 900, 68, 580, 324, 836, 196, 708, 452, 964, + 36, 548, 292, 804, 164, 676, 420, 932, 100, 612, 356, 868, + 228, 740, 484, 996, 20, 532, 276, 788, 148, 660, 404, 916, + 84, 596, 340, 852, 212, 724, 468, 980, 52, 564, 308, 820, + 180, 692, 436, 948, 116, 628, 372, 884, 244, 756, 500, 1012, + 12, 524, 268, 780, 140, 652, 396, 908, 76, 588, 332, 844, + 204, 716, 460, 972, 44, 556, 300, 812, 172, 684, 428, 940, + 108, 620, 364, 876, 236, 748, 492, 1004, 28, 540, 284, 796, + 156, 668, 412, 924, 92, 604, 348, 860, 220, 732, 476, 988, + 60, 572, 316, 828, 188, 700, 444, 956, 124, 636, 380, 892, + 252, 764, 508, 1020, 2, 514, 258, 770, 130, 642, 386, 898, + 66, 578, 322, 834, 194, 706, 450, 962, 34, 546, 290, 802, + 162, 674, 418, 930, 98, 610, 354, 866, 226, 738, 482, 994, + 18, 530, 274, 786, 146, 658, 402, 914, 82, 594, 338, 850, + 210, 722, 466, 978, 50, 562, 306, 818, 178, 690, 434, 946, + 114, 626, 370, 882, 242, 754, 498, 1010, 10, 522, 266, 778, + 138, 650, 394, 906, 74, 586, 330, 842, 202, 714, 458, 970, + 42, 554, 298, 810, 170, 682, 426, 938, 106, 618, 362, 874, + 234, 746, 490, 1002, 26, 538, 282, 794, 154, 666, 410, 922, + 90, 602, 346, 858, 218, 730, 474, 986, 58, 570, 314, 826, + 186, 698, 442, 954, 122, 634, 378, 890, 250, 762, 506, 1018, + 6, 518, 262, 774, 134, 646, 390, 902, 70, 582, 326, 838, + 198, 710, 454, 966, 38, 550, 294, 806, 166, 678, 422, 934, + 102, 614, 358, 870, 230, 742, 486, 998, 22, 534, 278, 790, + 150, 662, 406, 918, 86, 598, 342, 854, 214, 726, 470, 982, + 54, 566, 310, 822, 182, 694, 438, 950, 118, 630, 374, 886, + 246, 758, 502, 1014, 14, 526, 270, 782, 142, 654, 398, 910, + 78, 590, 334, 846, 206, 718, 462, 974, 46, 558, 302, 814, + 174, 686, 430, 942, 110, 622, 366, 878, 238, 750, 494, 1006, + 30, 542, 286, 798, 158, 670, 414, 926, 94, 606, 350, 862, + 222, 734, 478, 990, 62, 574, 318, 830, 190, 702, 446, 958, + 126, 638, 382, 894, 254, 766, 510, 1022, 1, 513, 257, 769, + 129, 641, 385, 897, 65, 577, 321, 833, 193, 705, 449, 961, + 33, 545, 289, 801, 161, 673, 417, 929, 97, 609, 353, 865, + 225, 737, 481, 993, 17, 529, 273, 785, 145, 657, 401, 913, + 81, 593, 337, 849, 209, 721, 465, 977, 49, 561, 305, 817, + 177, 689, 433, 945, 113, 625, 369, 881, 241, 753, 497, 1009, + 9, 521, 265, 777, 137, 649, 393, 905, 73, 585, 329, 841, + 201, 713, 457, 969, 41, 553, 297, 809, 169, 681, 425, 937, + 105, 617, 361, 873, 233, 745, 489, 1001, 25, 537, 281, 793, + 153, 665, 409, 921, 89, 601, 345, 857, 217, 729, 473, 985, + 57, 569, 313, 825, 185, 697, 441, 953, 121, 633, 377, 889, + 249, 761, 505, 1017, 5, 517, 261, 773, 133, 645, 389, 901, + 69, 581, 325, 837, 197, 709, 453, 965, 37, 549, 293, 805, + 165, 677, 421, 933, 101, 613, 357, 869, 229, 741, 485, 997, + 21, 533, 277, 789, 149, 661, 405, 917, 85, 597, 341, 853, + 213, 725, 469, 981, 53, 565, 309, 821, 181, 693, 437, 949, + 117, 629, 373, 885, 245, 757, 501, 1013, 13, 525, 269, 781, + 141, 653, 397, 909, 77, 589, 333, 845, 205, 717, 461, 973, + 45, 557, 301, 813, 173, 685, 429, 941, 109, 621, 365, 877, + 237, 749, 493, 1005, 29, 541, 285, 797, 157, 669, 413, 925, + 93, 605, 349, 861, 221, 733, 477, 989, 61, 573, 317, 829, + 189, 701, 445, 957, 125, 637, 381, 893, 253, 765, 509, 1021, + 3, 515, 259, 771, 131, 643, 387, 899, 67, 579, 323, 835, + 195, 707, 451, 963, 35, 547, 291, 803, 163, 675, 419, 931, + 99, 611, 355, 867, 227, 739, 483, 995, 19, 531, 275, 787, + 147, 659, 403, 915, 83, 595, 339, 851, 211, 723, 467, 979, + 51, 563, 307, 819, 179, 691, 435, 947, 115, 627, 371, 883, + 243, 755, 499, 1011, 11, 523, 267, 779, 139, 651, 395, 907, + 75, 587, 331, 843, 203, 715, 459, 971, 43, 555, 299, 811, + 171, 683, 427, 939, 107, 619, 363, 875, 235, 747, 491, 1003, + 27, 539, 283, 795, 155, 667, 411, 923, 91, 603, 347, 859, + 219, 731, 475, 987, 59, 571, 315, 827, 187, 699, 443, 955, + 123, 635, 379, 891, 251, 763, 507, 1019, 7, 519, 263, 775, + 135, 647, 391, 903, 71, 583, 327, 839, 199, 711, 455, 967, + 39, 551, 295, 807, 167, 679, 423, 935, 103, 615, 359, 871, + 231, 743, 487, 999, 23, 535, 279, 791, 151, 663, 407, 919, + 87, 599, 343, 855, 215, 727, 471, 983, 55, 567, 311, 823, + 183, 695, 439, 951, 119, 631, 375, 887, 247, 759, 503, 1015, + 15, 527, 271, 783, 143, 655, 399, 911, 79, 591, 335, 847, + 207, 719, 463, 975, 47, 559, 303, 815, 175, 687, 431, 943, + 111, 623, 367, 879, 239, 751, 495, 1007, 31, 543, 287, 799, + 159, 671, 415, 927, 95, 607, 351, 863, 223, 735, 479, 991, + 63, 575, 319, 831, 191, 703, 447, 959, 127, 639, 383, 895, + 255, 767, 511, 1023 +}; + +/* + * Compute the roots for NTT and inverse NTT (binary case). Input + * parameter g is a primitive 2048-th root of 1 modulo p (i.e. g^1024 = + * -1 mod p). This fills gm[] and igm[] with powers of g and 1/g: + * gm[rev(i)] = g^i mod p + * igm[rev(i)] = (1/g)^i mod p + * where rev() is the "bit reversal" function over 10 bits. It fills + * the arrays only up to N = 2^logn values. + * + * The values stored in gm[] and igm[] are in Montgomery representation. + * + * p must be a prime such that p = 1 mod 2048. + */ +static void +modp_mkgm2(uint32_t *gm, uint32_t *igm, unsigned logn, + uint32_t g, uint32_t p, uint32_t p0i) { + size_t u, n; + unsigned k; + uint32_t ig, x1, x2, R2; + + n = (size_t)1 << logn; + + /* + * We want g such that g^(2N) = 1 mod p, but the provided + * generator has order 2048. We must square it a few times. + */ + R2 = modp_R2(p, p0i); + g = modp_montymul(g, R2, p, p0i); + for (k = logn; k < 10; k ++) { + g = modp_montymul(g, g, p, p0i); + } + + ig = modp_div(R2, g, p, p0i, modp_R(p)); + k = 10 - logn; + x1 = x2 = modp_R(p); + for (u = 0; u < n; u ++) { + size_t v; + + v = REV10[u << k]; + gm[v] = x1; + igm[v] = x2; + x1 = modp_montymul(x1, g, p, p0i); + x2 = modp_montymul(x2, ig, p, p0i); + } +} + +/* + * Compute the NTT over a polynomial (binary case). Polynomial elements + * are a[0], a[stride], a[2 * stride]... + */ +static void +modp_NTT2_ext(uint32_t *a, size_t stride, const uint32_t *gm, unsigned logn, + uint32_t p, uint32_t p0i) { + size_t t, m, n; + + if (logn == 0) { + return; + } + n = (size_t)1 << logn; + t = n; + for (m = 1; m < n; m <<= 1) { + size_t ht, u, v1; + + ht = t >> 1; + for (u = 0, v1 = 0; u < m; u ++, v1 += t) { + uint32_t s; + size_t v; + uint32_t *r1, *r2; + + s = gm[m + u]; + r1 = a + v1 * stride; + r2 = r1 + ht * stride; + for (v = 0; v < ht; v ++, r1 += stride, r2 += stride) { + uint32_t x, y; + + x = *r1; + y = modp_montymul(*r2, s, p, p0i); + *r1 = modp_add(x, y, p); + *r2 = modp_sub(x, y, p); + } + } + t = ht; + } +} + +/* + * Compute the inverse NTT over a polynomial (binary case). + */ +static void +modp_iNTT2_ext(uint32_t *a, size_t stride, const uint32_t *igm, unsigned logn, + uint32_t p, uint32_t p0i) { + size_t t, m, n, k; + uint32_t ni; + uint32_t *r; + + if (logn == 0) { + return; + } + n = (size_t)1 << logn; + t = 1; + for (m = n; m > 1; m >>= 1) { + size_t hm, dt, u, v1; + + hm = m >> 1; + dt = t << 1; + for (u = 0, v1 = 0; u < hm; u ++, v1 += dt) { + uint32_t s; + size_t v; + uint32_t *r1, *r2; + + s = igm[hm + u]; + r1 = a + v1 * stride; + r2 = r1 + t * stride; + for (v = 0; v < t; v ++, r1 += stride, r2 += stride) { + uint32_t x, y; + + x = *r1; + y = *r2; + *r1 = modp_add(x, y, p); + *r2 = modp_montymul( + modp_sub(x, y, p), s, p, p0i);; + } + } + t = dt; + } + + /* + * We need 1/n in Montgomery representation, i.e. R/n. Since + * 1 <= logn <= 10, R/n is an integer; morever, R/n <= 2^30 < p, + * thus a simple shift will do. + */ + ni = (uint32_t)1 << (31 - logn); + for (k = 0, r = a; k < n; k ++, r += stride) { + *r = modp_montymul(*r, ni, p, p0i); + } +} + +/* + * Simplified macros for NTT and iNTT (binary case) when the elements + * are consecutive in RAM. + */ +#define modp_NTT2(a, gm, logn, p, p0i) modp_NTT2_ext(a, 1, gm, logn, p, p0i) +#define modp_iNTT2(a, igm, logn, p, p0i) modp_iNTT2_ext(a, 1, igm, logn, p, p0i) + +/* + * Given polynomial f in NTT representation modulo p, compute f' of degree + * less than N/2 such that f' = f0^2 - X*f1^2, where f0 and f1 are + * polynomials of degree less than N/2 such that f = f0(X^2) + X*f1(X^2). + * + * The new polynomial is written "in place" over the first N/2 elements + * of f. + * + * If applied logn times successively on a given polynomial, the resulting + * degree-0 polynomial is the resultant of f and X^N+1 modulo p. + * + * This function applies only to the binary case; it is invoked from + * solve_NTRU_binary_depth1(). + */ +static void +modp_poly_rec_res(uint32_t *f, unsigned logn, + uint32_t p, uint32_t p0i, uint32_t R2) { + size_t hn, u; + + hn = (size_t)1 << (logn - 1); + for (u = 0; u < hn; u ++) { + uint32_t w0, w1; + + w0 = f[(u << 1) + 0]; + w1 = f[(u << 1) + 1]; + f[u] = modp_montymul(modp_montymul(w0, w1, p, p0i), R2, p, p0i); + } +} + +/* ==================================================================== */ +/* + * Custom bignum implementation. + * + * This is a very reduced set of functionalities. We need to do the + * following operations: + * + * - Rebuild the resultant and the polynomial coefficients from their + * values modulo small primes (of length 31 bits each). + * + * - Compute an extended GCD between the two computed resultants. + * + * - Extract top bits and add scaled values during the successive steps + * of Babai rounding. + * + * When rebuilding values using CRT, we must also recompute the product + * of the small prime factors. We always do it one small factor at a + * time, so the "complicated" operations can be done modulo the small + * prime with the modp_* functions. CRT coefficients (inverses) are + * precomputed. + * + * All values are positive until the last step: when the polynomial + * coefficients have been rebuilt, we normalize them around 0. But then, + * only additions and subtractions on the upper few bits are needed + * afterwards. + * + * We keep big integers as arrays of 31-bit words (in uint32_t values); + * the top bit of each uint32_t is kept equal to 0. Using 31-bit words + * makes it easier to keep track of carries. When negative values are + * used, two's complement is used. + */ + +/* + * Subtract integer b from integer a. Both integers are supposed to have + * the same size. The carry (0 or 1) is returned. Source arrays a and b + * MUST be distinct. + * + * The operation is performed as described above if ctr = 1. If + * ctl = 0, the value a[] is unmodified, but all memory accesses are + * still performed, and the carry is computed and returned. + */ +static uint32_t +zint_sub(uint32_t *a, const uint32_t *b, size_t len, + uint32_t ctl) { + size_t u; + uint32_t cc, m; + + cc = 0; + m = -ctl; + for (u = 0; u < len; u ++) { + uint32_t aw, w; + + aw = a[u]; + w = aw - b[u] - cc; + cc = w >> 31; + aw ^= ((w & 0x7FFFFFFF) ^ aw) & m; + a[u] = aw; + } + return cc; +} + +/* + * Mutiply the provided big integer m with a small value x. + * This function assumes that x < 2^31. The carry word is returned. + */ +static uint32_t +zint_mul_small(uint32_t *m, size_t mlen, uint32_t x) { + size_t u; + uint32_t cc; + + cc = 0; + for (u = 0; u < mlen; u ++) { + uint64_t z; + + z = (uint64_t)m[u] * (uint64_t)x + cc; + m[u] = (uint32_t)z & 0x7FFFFFFF; + cc = (uint32_t)(z >> 31); + } + return cc; +} + +/* + * Reduce a big integer d modulo a small integer p. + * Rules: + * d is unsigned + * p is prime + * 2^30 < p < 2^31 + * p0i = -(1/p) mod 2^31 + * R2 = 2^62 mod p + */ +static uint32_t +zint_mod_small_unsigned(const uint32_t *d, size_t dlen, + uint32_t p, uint32_t p0i, uint32_t R2) { + uint32_t x; + size_t u; + + /* + * Algorithm: we inject words one by one, starting with the high + * word. Each step is: + * - multiply x by 2^31 + * - add new word + */ + x = 0; + u = dlen; + while (u -- > 0) { + uint32_t w; + + x = modp_montymul(x, R2, p, p0i); + w = d[u] - p; + w += p & -(w >> 31); + x = modp_add(x, w, p); + } + return x; +} + +/* + * Similar to zint_mod_small_unsigned(), except that d may be signed. + * Extra parameter is Rx = 2^(31*dlen) mod p. + */ +static uint32_t +zint_mod_small_signed(const uint32_t *d, size_t dlen, + uint32_t p, uint32_t p0i, uint32_t R2, uint32_t Rx) { + uint32_t z; + + if (dlen == 0) { + return 0; + } + z = zint_mod_small_unsigned(d, dlen, p, p0i, R2); + z = modp_sub(z, Rx & -(d[dlen - 1] >> 30), p); + return z; +} + +/* + * Add y*s to x. x and y initially have length 'len' words; the new x + * has length 'len+1' words. 's' must fit on 31 bits. x[] and y[] must + * not overlap. + */ +static void +zint_add_mul_small(uint32_t *x, + const uint32_t *y, size_t len, uint32_t s) { + size_t u; + uint32_t cc; + + cc = 0; + for (u = 0; u < len; u ++) { + uint32_t xw, yw; + uint64_t z; + + xw = x[u]; + yw = y[u]; + z = (uint64_t)yw * (uint64_t)s + (uint64_t)xw + (uint64_t)cc; + x[u] = (uint32_t)z & 0x7FFFFFFF; + cc = (uint32_t)(z >> 31); + } + x[len] = cc; +} + +/* + * Normalize a modular integer around 0: if x > p/2, then x is replaced + * with x - p (signed encoding with two's complement); otherwise, x is + * untouched. The two integers x and p are encoded over the same length. + */ +static void +zint_norm_zero(uint32_t *x, const uint32_t *p, size_t len) { + size_t u; + uint32_t r, bb; + + /* + * Compare x with p/2. We use the shifted version of p, and p + * is odd, so we really compare with (p-1)/2; we want to perform + * the subtraction if and only if x > (p-1)/2. + */ + r = 0; + bb = 0; + u = len; + while (u -- > 0) { + uint32_t wx, wp, cc; + + /* + * Get the two words to compare in wx and wp (both over + * 31 bits exactly). + */ + wx = x[u]; + wp = (p[u] >> 1) | (bb << 30); + bb = p[u] & 1; + + /* + * We set cc to -1, 0 or 1, depending on whether wp is + * lower than, equal to, or greater than wx. + */ + cc = wp - wx; + cc = ((-cc) >> 31) | -(cc >> 31); + + /* + * If r != 0 then it is either 1 or -1, and we keep its + * value. Otherwise, if r = 0, then we replace it with cc. + */ + r |= cc & ((r & 1) - 1); + } + + /* + * At this point, r = -1, 0 or 1, depending on whether (p-1)/2 + * is lower than, equal to, or greater than x. We thus want to + * do the subtraction only if r = -1. + */ + zint_sub(x, p, len, r >> 31); +} + +/* + * Rebuild integers from their RNS representation. There are 'num' + * integers, and each consists in 'xlen' words. 'xx' points at that + * first word of the first integer; subsequent integers are accessed + * by adding 'xstride' repeatedly. + * + * The words of an integer are the RNS representation of that integer, + * using the provided 'primes' are moduli. This function replaces + * each integer with its multi-word value (little-endian order). + * + * If "normalize_signed" is non-zero, then the returned value is + * normalized to the -m/2..m/2 interval (where m is the product of all + * small prime moduli); two's complement is used for negative values. + */ +static void +zint_rebuild_CRT(uint32_t *xx, size_t xlen, size_t xstride, + size_t num, const small_prime *primes, int normalize_signed, + uint32_t *tmp) { + size_t u; + uint32_t *x; + + tmp[0] = primes[0].p; + for (u = 1; u < xlen; u ++) { + /* + * At the entry of each loop iteration: + * - the first u words of each array have been + * reassembled; + * - the first u words of tmp[] contains the + * product of the prime moduli processed so far. + * + * We call 'q' the product of all previous primes. + */ + uint32_t p, p0i, s, R2; + size_t v; + + p = primes[u].p; + s = primes[u].s; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + + for (v = 0, x = xx; v < num; v ++, x += xstride) { + uint32_t xp, xq, xr; + /* + * xp = the integer x modulo the prime p for this + * iteration + * xq = (x mod q) mod p + */ + xp = x[u]; + xq = zint_mod_small_unsigned(x, u, p, p0i, R2); + + /* + * New value is (x mod q) + q * (s * (xp - xq) mod p) + */ + xr = modp_montymul(s, modp_sub(xp, xq, p), p, p0i); + zint_add_mul_small(x, tmp, u, xr); + } + + /* + * Update product of primes in tmp[]. + */ + tmp[u] = zint_mul_small(tmp, u, p); + } + + /* + * Normalize the reconstructed values around 0. + */ + if (normalize_signed) { + for (u = 0, x = xx; u < num; u ++, x += xstride) { + zint_norm_zero(x, tmp, xlen); + } + } +} + +/* + * Negate a big integer conditionally: value a is replaced with -a if + * and only if ctl = 1. Control value ctl must be 0 or 1. + */ +static void +zint_negate(uint32_t *a, size_t len, uint32_t ctl) { + size_t u; + uint32_t cc, m; + + /* + * If ctl = 1 then we flip the bits of a by XORing with + * 0x7FFFFFFF, and we add 1 to the value. If ctl = 0 then we XOR + * with 0 and add 0, which leaves the value unchanged. + */ + cc = ctl; + m = -ctl >> 1; + for (u = 0; u < len; u ++) { + uint32_t aw; + + aw = a[u]; + aw = (aw ^ m) + cc; + a[u] = aw & 0x7FFFFFFF; + cc = aw >> 31; + } +} + +/* + * Replace a with (a*xa+b*xb)/(2^31) and b with (a*ya+b*yb)/(2^31). + * The low bits are dropped (the caller should compute the coefficients + * such that these dropped bits are all zeros). If either or both + * yields a negative value, then the value is negated. + * + * Returned value is: + * 0 both values were positive + * 1 new a had to be negated + * 2 new b had to be negated + * 3 both new a and new b had to be negated + * + * Coefficients xa, xb, ya and yb may use the full signed 32-bit range. + */ +static uint32_t +zint_co_reduce(uint32_t *a, uint32_t *b, size_t len, + int64_t xa, int64_t xb, int64_t ya, int64_t yb) { + size_t u; + int64_t cca, ccb; + uint32_t nega, negb; + + cca = 0; + ccb = 0; + for (u = 0; u < len; u ++) { + uint32_t wa, wb; + uint64_t za, zb; + + wa = a[u]; + wb = b[u]; + za = wa * (uint64_t)xa + wb * (uint64_t)xb + (uint64_t)cca; + zb = wa * (uint64_t)ya + wb * (uint64_t)yb + (uint64_t)ccb; + if (u > 0) { + a[u - 1] = (uint32_t)za & 0x7FFFFFFF; + b[u - 1] = (uint32_t)zb & 0x7FFFFFFF; + } + cca = *(int64_t *)&za >> 31; + ccb = *(int64_t *)&zb >> 31; + } + a[len - 1] = (uint32_t)cca; + b[len - 1] = (uint32_t)ccb; + + nega = (uint32_t)((uint64_t)cca >> 63); + negb = (uint32_t)((uint64_t)ccb >> 63); + zint_negate(a, len, nega); + zint_negate(b, len, negb); + return nega | (negb << 1); +} + +/* + * Finish modular reduction. Rules on input parameters: + * + * if neg = 1, then -m <= a < 0 + * if neg = 0, then 0 <= a < 2*m + * + * If neg = 0, then the top word of a[] is allowed to use 32 bits. + * + * Modulus m must be odd. + */ +static void +zint_finish_mod(uint32_t *a, size_t len, const uint32_t *m, uint32_t neg) { + size_t u; + uint32_t cc, xm, ym; + + /* + * First pass: compare a (assumed nonnegative) with m. Note that + * if the top word uses 32 bits, subtracting m must yield a + * value less than 2^31 since a < 2*m. + */ + cc = 0; + for (u = 0; u < len; u ++) { + cc = (a[u] - m[u] - cc) >> 31; + } + + /* + * If neg = 1 then we must add m (regardless of cc) + * If neg = 0 and cc = 0 then we must subtract m + * If neg = 0 and cc = 1 then we must do nothing + * + * In the loop below, we conditionally subtract either m or -m + * from a. Word xm is a word of m (if neg = 0) or -m (if neg = 1); + * but if neg = 0 and cc = 1, then ym = 0 and it forces mw to 0. + */ + xm = -neg >> 1; + ym = -(neg | (1 - cc)); + cc = neg; + for (u = 0; u < len; u ++) { + uint32_t aw, mw; + + aw = a[u]; + mw = (m[u] ^ xm) & ym; + aw = aw - mw - cc; + a[u] = aw & 0x7FFFFFFF; + cc = aw >> 31; + } +} + +/* + * Replace a with (a*xa+b*xb)/(2^31) mod m, and b with + * (a*ya+b*yb)/(2^31) mod m. Modulus m must be odd; m0i = -1/m[0] mod 2^31. + */ +static void +zint_co_reduce_mod(uint32_t *a, uint32_t *b, const uint32_t *m, size_t len, + uint32_t m0i, int64_t xa, int64_t xb, int64_t ya, int64_t yb) { + size_t u; + int64_t cca, ccb; + uint32_t fa, fb; + + /* + * These are actually four combined Montgomery multiplications. + */ + cca = 0; + ccb = 0; + fa = ((a[0] * (uint32_t)xa + b[0] * (uint32_t)xb) * m0i) & 0x7FFFFFFF; + fb = ((a[0] * (uint32_t)ya + b[0] * (uint32_t)yb) * m0i) & 0x7FFFFFFF; + for (u = 0; u < len; u ++) { + uint32_t wa, wb; + uint64_t za, zb; + + wa = a[u]; + wb = b[u]; + za = wa * (uint64_t)xa + wb * (uint64_t)xb + + m[u] * (uint64_t)fa + (uint64_t)cca; + zb = wa * (uint64_t)ya + wb * (uint64_t)yb + + m[u] * (uint64_t)fb + (uint64_t)ccb; + if (u > 0) { + a[u - 1] = (uint32_t)za & 0x7FFFFFFF; + b[u - 1] = (uint32_t)zb & 0x7FFFFFFF; + } + cca = *(int64_t *)&za >> 31; + ccb = *(int64_t *)&zb >> 31; + } + a[len - 1] = (uint32_t)cca; + b[len - 1] = (uint32_t)ccb; + + /* + * At this point: + * -m <= a < 2*m + * -m <= b < 2*m + * (this is a case of Montgomery reduction) + * The top words of 'a' and 'b' may have a 32-th bit set. + * We want to add or subtract the modulus, as required. + */ + zint_finish_mod(a, len, m, (uint32_t)((uint64_t)cca >> 63)); + zint_finish_mod(b, len, m, (uint32_t)((uint64_t)ccb >> 63)); +} + +/* + * Compute a GCD between two positive big integers x and y. The two + * integers must be odd. Returned value is 1 if the GCD is 1, 0 + * otherwise. When 1 is returned, arrays u and v are filled with values + * such that: + * 0 <= u <= y + * 0 <= v <= x + * x*u - y*v = 1 + * x[] and y[] are unmodified. Both input values must have the same + * encoded length. Temporary array must be large enough to accommodate 4 + * extra values of that length. Arrays u, v and tmp may not overlap with + * each other, or with either x or y. + */ +static int +zint_bezout(uint32_t *u, uint32_t *v, + const uint32_t *x, const uint32_t *y, + size_t len, uint32_t *tmp) { + /* + * Algorithm is an extended binary GCD. We maintain 6 values + * a, b, u0, u1, v0 and v1 with the following invariants: + * + * a = x*u0 - y*v0 + * b = x*u1 - y*v1 + * 0 <= a <= x + * 0 <= b <= y + * 0 <= u0 < y + * 0 <= v0 < x + * 0 <= u1 <= y + * 0 <= v1 < x + * + * Initial values are: + * + * a = x u0 = 1 v0 = 0 + * b = y u1 = y v1 = x-1 + * + * Each iteration reduces either a or b, and maintains the + * invariants. Algorithm stops when a = b, at which point their + * common value is GCD(a,b) and (u0,v0) (or (u1,v1)) contains + * the values (u,v) we want to return. + * + * The formal definition of the algorithm is a sequence of steps: + * + * - If a is even, then: + * a <- a/2 + * u0 <- u0/2 mod y + * v0 <- v0/2 mod x + * + * - Otherwise, if b is even, then: + * b <- b/2 + * u1 <- u1/2 mod y + * v1 <- v1/2 mod x + * + * - Otherwise, if a > b, then: + * a <- (a-b)/2 + * u0 <- (u0-u1)/2 mod y + * v0 <- (v0-v1)/2 mod x + * + * - Otherwise: + * b <- (b-a)/2 + * u1 <- (u1-u0)/2 mod y + * v1 <- (v1-v0)/2 mod y + * + * We can show that the operations above preserve the invariants: + * + * - If a is even, then u0 and v0 are either both even or both + * odd (since a = x*u0 - y*v0, and x and y are both odd). + * If u0 and v0 are both even, then (u0,v0) <- (u0/2,v0/2). + * Otherwise, (u0,v0) <- ((u0+y)/2,(v0+x)/2). Either way, + * the a = x*u0 - y*v0 invariant is preserved. + * + * - The same holds for the case where b is even. + * + * - If a and b are odd, and a > b, then: + * + * a-b = x*(u0-u1) - y*(v0-v1) + * + * In that situation, if u0 < u1, then x*(u0-u1) < 0, but + * a-b > 0; therefore, it must be that v0 < v1, and the + * first part of the update is: (u0,v0) <- (u0-u1+y,v0-v1+x), + * which preserves the invariants. Otherwise, if u0 > u1, + * then u0-u1 >= 1, thus x*(u0-u1) >= x. But a <= x and + * b >= 0, hence a-b <= x. It follows that, in that case, + * v0-v1 >= 0. The first part of the update is then: + * (u0,v0) <- (u0-u1,v0-v1), which again preserves the + * invariants. + * + * Either way, once the subtraction is done, the new value of + * a, which is the difference of two odd values, is even, + * and the remaining of this step is a subcase of the + * first algorithm case (i.e. when a is even). + * + * - If a and b are odd, and b > a, then the a similar + * argument holds. + * + * The values a and b start at x and y, respectively. Since x + * and y are odd, their GCD is odd, and it is easily seen that + * all steps conserve the GCD (GCD(a-b,b) = GCD(a, b); + * GCD(a/2,b) = GCD(a,b) if GCD(a,b) is odd). Moreover, either a + * or b is reduced by at least one bit at each iteration, so + * the algorithm necessarily converges on the case a = b, at + * which point the common value is the GCD. + * + * In the algorithm expressed above, when a = b, the fourth case + * applies, and sets b = 0. Since a contains the GCD of x and y, + * which are both odd, a must be odd, and subsequent iterations + * (if any) will simply divide b by 2 repeatedly, which has no + * consequence. Thus, the algorithm can run for more iterations + * than necessary; the final GCD will be in a, and the (u,v) + * coefficients will be (u0,v0). + * + * + * The presentation above is bit-by-bit. It can be sped up by + * noticing that all decisions are taken based on the low bits + * and high bits of a and b. We can extract the two top words + * and low word of each of a and b, and compute reduction + * parameters pa, pb, qa and qb such that the new values for + * a and b are: + * a' = (a*pa + b*pb) / (2^31) + * b' = (a*qa + b*qb) / (2^31) + * the two divisions being exact. The coefficients are obtained + * just from the extracted words, and may be slightly off, requiring + * an optional correction: if a' < 0, then we replace pa with -pa + * and pb with -pb. Each such step will reduce the total length + * (sum of lengths of a and b) by at least 30 bits at each + * iteration. + */ + uint32_t *u0, *u1, *v0, *v1, *a, *b; + uint32_t x0i, y0i; + uint32_t num, rc; + size_t j; + + if (len == 0) { + return 0; + } + + /* + * u0 and v0 are the u and v result buffers; the four other + * values (u1, v1, a and b) are taken from tmp[]. + */ + u0 = u; + v0 = v; + u1 = tmp; + v1 = u1 + len; + a = v1 + len; + b = a + len; + + /* + * We'll need the Montgomery reduction coefficients. + */ + x0i = modp_ninv31(x[0]); + y0i = modp_ninv31(y[0]); + + /* + * Initialize a, b, u0, u1, v0 and v1. + * a = x u0 = 1 v0 = 0 + * b = y u1 = y v1 = x-1 + * Note that x is odd, so computing x-1 is easy. + */ + memcpy(a, x, len * sizeof * x); + memcpy(b, y, len * sizeof * y); + u0[0] = 1; + memset(u0 + 1, 0, (len - 1) * sizeof * u0); + memset(v0, 0, len * sizeof * v0); + memcpy(u1, y, len * sizeof * u1); + memcpy(v1, x, len * sizeof * v1); + v1[0] --; + + /* + * Each input operand may be as large as 31*len bits, and we + * reduce the total length by at least 30 bits at each iteration. + */ + for (num = 62 * (uint32_t)len + 30; num >= 30; num -= 30) { + uint32_t c0, c1; + uint32_t a0, a1, b0, b1; + uint64_t a_hi, b_hi; + uint32_t a_lo, b_lo; + int64_t pa, pb, qa, qb; + int i; + uint32_t r; + + /* + * Extract the top words of a and b. If j is the highest + * index >= 1 such that a[j] != 0 or b[j] != 0, then we + * want (a[j] << 31) + a[j-1] and (b[j] << 31) + b[j-1]. + * If a and b are down to one word each, then we use + * a[0] and b[0]. + */ + c0 = (uint32_t) -1; + c1 = (uint32_t) -1; + a0 = 0; + a1 = 0; + b0 = 0; + b1 = 0; + j = len; + while (j -- > 0) { + uint32_t aw, bw; + + aw = a[j]; + bw = b[j]; + a0 ^= (a0 ^ aw) & c0; + a1 ^= (a1 ^ aw) & c1; + b0 ^= (b0 ^ bw) & c0; + b1 ^= (b1 ^ bw) & c1; + c1 = c0; + c0 &= (((aw | bw) + 0x7FFFFFFF) >> 31) - (uint32_t)1; + } + + /* + * If c1 = 0, then we grabbed two words for a and b. + * If c1 != 0 but c0 = 0, then we grabbed one word. It + * is not possible that c1 != 0 and c0 != 0, because that + * would mean that both integers are zero. + */ + a1 |= a0 & c1; + a0 &= ~c1; + b1 |= b0 & c1; + b0 &= ~c1; + a_hi = ((uint64_t)a0 << 31) + a1; + b_hi = ((uint64_t)b0 << 31) + b1; + a_lo = a[0]; + b_lo = b[0]; + + /* + * Compute reduction factors: + * + * a' = a*pa + b*pb + * b' = a*qa + b*qb + * + * such that a' and b' are both multiple of 2^31, but are + * only marginally larger than a and b. + */ + pa = 1; + pb = 0; + qa = 0; + qb = 1; + for (i = 0; i < 31; i ++) { + /* + * At each iteration: + * + * a <- (a-b)/2 if: a is odd, b is odd, a_hi > b_hi + * b <- (b-a)/2 if: a is odd, b is odd, a_hi <= b_hi + * a <- a/2 if: a is even + * b <- b/2 if: a is odd, b is even + * + * We multiply a_lo and b_lo by 2 at each + * iteration, thus a division by 2 really is a + * non-multiplication by 2. + */ + uint32_t rt, oa, ob, cAB, cBA, cA; + uint64_t rz; + + /* + * rt = 1 if a_hi > b_hi, 0 otherwise. + */ + rz = b_hi - a_hi; + rt = (uint32_t)((rz ^ ((a_hi ^ b_hi) + & (a_hi ^ rz))) >> 63); + + /* + * cAB = 1 if b must be subtracted from a + * cBA = 1 if a must be subtracted from b + * cA = 1 if a must be divided by 2 + * + * Rules: + * + * cAB and cBA cannot both be 1. + * If a is not divided by 2, b is. + */ + oa = (a_lo >> i) & 1; + ob = (b_lo >> i) & 1; + cAB = oa & ob & rt; + cBA = oa & ob & ~rt; + cA = cAB | (oa ^ 1); + + /* + * Conditional subtractions. + */ + a_lo -= b_lo & -cAB; + a_hi -= b_hi & -(uint64_t)cAB; + pa -= qa & -(int64_t)cAB; + pb -= qb & -(int64_t)cAB; + b_lo -= a_lo & -cBA; + b_hi -= a_hi & -(uint64_t)cBA; + qa -= pa & -(int64_t)cBA; + qb -= pb & -(int64_t)cBA; + + /* + * Shifting. + */ + a_lo += a_lo & (cA - 1); + pa += pa & ((int64_t)cA - 1); + pb += pb & ((int64_t)cA - 1); + a_hi ^= (a_hi ^ (a_hi >> 1)) & -(uint64_t)cA; + b_lo += b_lo & -cA; + qa += qa & -(int64_t)cA; + qb += qb & -(int64_t)cA; + b_hi ^= (b_hi ^ (b_hi >> 1)) & ((uint64_t)cA - 1); + } + + /* + * Apply the computed parameters to our values. We + * may have to correct pa and pb depending on the + * returned value of zint_co_reduce() (when a and/or b + * had to be negated). + */ + r = zint_co_reduce(a, b, len, pa, pb, qa, qb); + pa -= (pa + pa) & -(int64_t)(r & 1); + pb -= (pb + pb) & -(int64_t)(r & 1); + qa -= (qa + qa) & -(int64_t)(r >> 1); + qb -= (qb + qb) & -(int64_t)(r >> 1); + zint_co_reduce_mod(u0, u1, y, len, y0i, pa, pb, qa, qb); + zint_co_reduce_mod(v0, v1, x, len, x0i, pa, pb, qa, qb); + } + + /* + * At that point, array a[] should contain the GCD, and the + * results (u,v) should already be set. We check that the GCD + * is indeed 1. We also check that the two operands x and y + * are odd. + */ + rc = a[0] ^ 1; + for (j = 1; j < len; j ++) { + rc |= a[j]; + } + return (int)((1 - ((rc | -rc) >> 31)) & x[0] & y[0]); +} + +/* + * Add k*y*2^sc to x. The result is assumed to fit in the array of + * size xlen (truncation is applied if necessary). + * Scale factor 'sc' is provided as sch and scl, such that: + * sch = sc / 31 + * scl = sc % 31 + * xlen MUST NOT be lower than ylen. + * + * x[] and y[] are both signed integers, using two's complement for + * negative values. + */ +static void +zint_add_scaled_mul_small(uint32_t *x, size_t xlen, + const uint32_t *y, size_t ylen, int32_t k, + uint32_t sch, uint32_t scl) { + size_t u; + uint32_t ysign, tw; + int32_t cc; + + if (ylen == 0) { + return; + } + + ysign = -(y[ylen - 1] >> 30) >> 1; + tw = 0; + cc = 0; + for (u = sch; u < xlen; u ++) { + size_t v; + uint32_t wy, wys, ccu; + uint64_t z; + + /* + * Get the next word of y (scaled). + */ + v = u - sch; + if (v < ylen) { + wy = y[v]; + } else { + wy = ysign; + } + wys = ((wy << scl) & 0x7FFFFFFF) | tw; + tw = wy >> (31 - scl); + + /* + * The expression below does not overflow. + */ + z = (uint64_t)((int64_t)wys * (int64_t)k + (int64_t)x[u] + cc); + x[u] = (uint32_t)z & 0x7FFFFFFF; + + /* + * Right-shifting the signed value z would yield + * implementation-defined results (arithmetic shift is + * not guaranteed). However, we can cast to unsigned, + * and get the next carry as an unsigned word. We can + * then convert it back to signed by using the guaranteed + * fact that 'int32_t' uses two's complement with no + * trap representation or padding bit, and with a layout + * compatible with that of 'uint32_t'. + */ + ccu = (uint32_t)(z >> 31); + cc = *(int32_t *)&ccu; + } +} + +/* + * Subtract y*2^sc from x. The result is assumed to fit in the array of + * size xlen (truncation is applied if necessary). + * Scale factor 'sc' is provided as sch and scl, such that: + * sch = sc / 31 + * scl = sc % 31 + * xlen MUST NOT be lower than ylen. + * + * x[] and y[] are both signed integers, using two's complement for + * negative values. + */ +static void +zint_sub_scaled(uint32_t *x, size_t xlen, + const uint32_t *y, size_t ylen, uint32_t sch, uint32_t scl) { + size_t u; + uint32_t ysign, tw; + uint32_t cc; + + if (ylen == 0) { + return; + } + + ysign = -(y[ylen - 1] >> 30) >> 1; + tw = 0; + cc = 0; + for (u = sch; u < xlen; u ++) { + size_t v; + uint32_t w, wy, wys; + + /* + * Get the next word of y (scaled). + */ + v = u - sch; + if (v < ylen) { + wy = y[v]; + } else { + wy = ysign; + } + wys = ((wy << scl) & 0x7FFFFFFF) | tw; + tw = wy >> (31 - scl); + + w = x[u] - wys - cc; + x[u] = w & 0x7FFFFFFF; + cc = w >> 31; + } +} + +/* + * Convert a one-word signed big integer into a signed value. + */ +static inline int32_t +zint_one_to_plain(const uint32_t *x) { + uint32_t w; + + w = x[0]; + w |= (w & 0x40000000) << 1; + return *(int32_t *)&w; +} + +/* ==================================================================== */ + +/* + * Convert a polynomial to floating-point values. + * + * Each coefficient has length flen words, and starts fstride words after + * the previous. + * + * IEEE-754 binary64 values can represent values in a finite range, + * roughly 2^(-1023) to 2^(+1023); thus, if coefficients are too large, + * they should be "trimmed" by pointing not to the lowest word of each, + * but upper. + */ +static void +poly_big_to_fp(fpr *d, const uint32_t *f, size_t flen, size_t fstride, + unsigned logn) { + size_t n, u; + + n = MKN(logn); + if (flen == 0) { + for (u = 0; u < n; u ++) { + d[u] = fpr_zero; + } + return; + } + for (u = 0; u < n; u ++, f += fstride) { + size_t v; + uint32_t neg, cc, xm; + fpr x, fsc; + + /* + * Get sign of the integer; if it is negative, then we + * will load its absolute value instead, and negate the + * result. + */ + neg = -(f[flen - 1] >> 30); + xm = neg >> 1; + cc = neg & 1; + x = fpr_zero; + fsc = fpr_one; + for (v = 0; v < flen; v ++, fsc = fpr_mul(fsc, fpr_ptwo31)) { + uint32_t w; + + w = (f[v] ^ xm) + cc; + cc = w >> 31; + w &= 0x7FFFFFFF; + w -= (w << 1) & neg; + x = fpr_add(x, fpr_mul(fpr_of(*(int32_t *)&w), fsc)); + } + d[u] = x; + } +} + +/* + * Convert a polynomial to small integers. Source values are supposed + * to be one-word integers, signed over 31 bits. Returned value is 0 + * if any of the coefficients exceeds the provided limit (in absolute + * value), or 1 on success. + * + * This is not constant-time; this is not a problem here, because on + * any failure, the NTRU-solving process will be deemed to have failed + * and the (f,g) polynomials will be discarded. + */ +static int +poly_big_to_small(int8_t *d, const uint32_t *s, int lim, unsigned logn) { + size_t n, u; + + n = MKN(logn); + for (u = 0; u < n; u ++) { + int32_t z; + + z = zint_one_to_plain(s + u); + if (z < -lim || z > lim) { + return 0; + } + d[u] = (int8_t)z; + } + return 1; +} + +/* + * Subtract k*f from F, where F, f and k are polynomials modulo X^N+1. + * Coefficients of polynomial k are small integers (signed values in the + * -2^31..2^31 range) scaled by 2^sc. Value sc is provided as sch = sc / 31 + * and scl = sc % 31. + * + * This function implements the basic quadratic multiplication algorithm, + * which is efficient in space (no extra buffer needed) but slow at + * high degree. + */ +static void +poly_sub_scaled(uint32_t *F, size_t Flen, size_t Fstride, + const uint32_t *f, size_t flen, size_t fstride, + const int32_t *k, uint32_t sch, uint32_t scl, unsigned logn) { + size_t n, u; + + n = MKN(logn); + for (u = 0; u < n; u ++) { + int32_t kf; + size_t v; + uint32_t *x; + const uint32_t *y; + + kf = -k[u]; + x = F + u * Fstride; + y = f; + for (v = 0; v < n; v ++) { + zint_add_scaled_mul_small( + x, Flen, y, flen, kf, sch, scl); + if (u + v == n - 1) { + x = F; + kf = -kf; + } else { + x += Fstride; + } + y += fstride; + } + } +} + +/* + * Subtract k*f from F. Coefficients of polynomial k are small integers + * (signed values in the -2^31..2^31 range) scaled by 2^sc. This function + * assumes that the degree is large, and integers relatively small. + * The value sc is provided as sch = sc / 31 and scl = sc % 31. + */ +static void +poly_sub_scaled_ntt(uint32_t *F, size_t Flen, size_t Fstride, + const uint32_t *f, size_t flen, size_t fstride, + const int32_t *k, uint32_t sch, uint32_t scl, unsigned logn, + uint32_t *tmp) { + uint32_t *gm, *igm, *fk, *t1, *x; + const uint32_t *y; + size_t n, u, tlen; + const small_prime *primes; + + n = MKN(logn); + tlen = flen + 1; + gm = tmp; + igm = gm + MKN(logn); + fk = igm + MKN(logn); + t1 = fk + n * tlen; + + primes = PRIMES; + + /* + * Compute k*f in fk[], in RNS notation. + */ + for (u = 0; u < tlen; u ++) { + uint32_t p, p0i, R2, Rx; + size_t v; + + p = primes[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + Rx = modp_Rx((unsigned)flen, p, p0i, R2); + modp_mkgm2(gm, igm, logn, primes[u].g, p, p0i); + + for (v = 0; v < n; v ++) { + t1[v] = modp_set(k[v], p); + } + modp_NTT2(t1, gm, logn, p, p0i); + for (v = 0, y = f, x = fk + u; + v < n; v ++, y += fstride, x += tlen) { + *x = zint_mod_small_signed(y, flen, p, p0i, R2, Rx); + } + modp_NTT2_ext(fk + u, tlen, gm, logn, p, p0i); + for (v = 0, x = fk + u; v < n; v ++, x += tlen) { + *x = modp_montymul( + modp_montymul(t1[v], *x, p, p0i), R2, p, p0i); + } + modp_iNTT2_ext(fk + u, tlen, igm, logn, p, p0i); + } + + /* + * Rebuild k*f. + */ + zint_rebuild_CRT(fk, tlen, tlen, n, primes, 1, t1); + + /* + * Subtract k*f, scaled, from F. + */ + for (u = 0, x = F, y = fk; u < n; u ++, x += Fstride, y += tlen) { + zint_sub_scaled(x, Flen, y, tlen, sch, scl); + } +} + +/* ==================================================================== */ + + +#define RNG_CONTEXT inner_shake256_context + +/* + * Get a random 8-byte integer from a SHAKE-based RNG. This function + * ensures consistent interpretation of the SHAKE output so that + * the same values will be obtained over different platforms, in case + * a known seed is used. + */ +static inline uint64_t +get_rng_u64(inner_shake256_context *rng) { + /* + * We enforce little-endian representation. + */ + + uint8_t tmp[8]; + + inner_shake256_extract(rng, tmp, sizeof tmp); + return (uint64_t)tmp[0] + | ((uint64_t)tmp[1] << 8) + | ((uint64_t)tmp[2] << 16) + | ((uint64_t)tmp[3] << 24) + | ((uint64_t)tmp[4] << 32) + | ((uint64_t)tmp[5] << 40) + | ((uint64_t)tmp[6] << 48) + | ((uint64_t)tmp[7] << 56); +} + +/* + * Table below incarnates a discrete Gaussian distribution: + * D(x) = exp(-(x^2)/(2*sigma^2)) + * where sigma = 1.17*sqrt(q/(2*N)), q = 12289, and N = 1024. + * Element 0 of the table is P(x = 0). + * For k > 0, element k is P(x >= k+1 | x > 0). + * Probabilities are scaled up by 2^63. + */ +static const uint64_t gauss_1024_12289[] = { + 1283868770400643928u, 6416574995475331444u, 4078260278032692663u, + 2353523259288686585u, 1227179971273316331u, 575931623374121527u, + 242543240509105209u, 91437049221049666u, 30799446349977173u, + 9255276791179340u, 2478152334826140u, 590642893610164u, + 125206034929641u, 23590435911403u, 3948334035941u, + 586753615614u, 77391054539u, 9056793210u, + 940121950u, 86539696u, 7062824u, + 510971u, 32764u, 1862u, + 94u, 4u, 0u +}; + +/* + * Generate a random value with a Gaussian distribution centered on 0. + * The RNG must be ready for extraction (already flipped). + * + * Distribution has standard deviation 1.17*sqrt(q/(2*N)). The + * precomputed table is for N = 1024. Since the sum of two independent + * values of standard deviation sigma has standard deviation + * sigma*sqrt(2), then we can just generate more values and add them + * together for lower dimensions. + */ +static int +mkgauss(RNG_CONTEXT *rng, unsigned logn) { + unsigned u, g; + int val; + + g = 1U << (10 - logn); + val = 0; + for (u = 0; u < g; u ++) { + /* + * Each iteration generates one value with the + * Gaussian distribution for N = 1024. + * + * We use two random 64-bit values. First value + * decides on whether the generated value is 0, and, + * if not, the sign of the value. Second random 64-bit + * word is used to generate the non-zero value. + * + * For constant-time code we have to read the complete + * table. This has negligible cost, compared with the + * remainder of the keygen process (solving the NTRU + * equation). + */ + uint64_t r; + uint32_t f, v, k, neg; + + /* + * First value: + * - flag 'neg' is randomly selected to be 0 or 1. + * - flag 'f' is set to 1 if the generated value is zero, + * or set to 0 otherwise. + */ + r = get_rng_u64(rng); + neg = (uint32_t)(r >> 63); + r &= ~((uint64_t)1 << 63); + f = (uint32_t)((r - gauss_1024_12289[0]) >> 63); + + /* + * We produce a new random 63-bit integer r, and go over + * the array, starting at index 1. We store in v the + * index of the first array element which is not greater + * than r, unless the flag f was already 1. + */ + v = 0; + r = get_rng_u64(rng); + r &= ~((uint64_t)1 << 63); + for (k = 1; k < (uint32_t)((sizeof gauss_1024_12289) + / (sizeof gauss_1024_12289[0])); k ++) { + uint32_t t; + + t = (uint32_t)((r - gauss_1024_12289[k]) >> 63) ^ 1; + v |= k & -(t & (f ^ 1)); + f |= t; + } + + /* + * We apply the sign ('neg' flag). If the value is zero, + * the sign has no effect. + */ + v = (v ^ -neg) + neg; + + /* + * Generated value is added to val. + */ + val += *(int32_t *)&v; + } + return val; +} + +/* + * The MAX_BL_SMALL[] and MAX_BL_LARGE[] contain the lengths, in 31-bit + * words, of intermediate values in the computation: + * + * MAX_BL_SMALL[depth]: length for the input f and g at that depth + * MAX_BL_LARGE[depth]: length for the unreduced F and G at that depth + * + * Rules: + * + * - Within an array, values grow. + * + * - The 'SMALL' array must have an entry for maximum depth, corresponding + * to the size of values used in the binary GCD. There is no such value + * for the 'LARGE' array (the binary GCD yields already reduced + * coefficients). + * + * - MAX_BL_LARGE[depth] >= MAX_BL_SMALL[depth + 1]. + * + * - Values must be large enough to handle the common cases, with some + * margins. + * + * - Values must not be "too large" either because we will convert some + * integers into floating-point values by considering the top 10 words, + * i.e. 310 bits; hence, for values of length more than 10 words, we + * should take care to have the length centered on the expected size. + * + * The following average lengths, in bits, have been measured on thousands + * of random keys (fg = max length of the absolute value of coefficients + * of f and g at that depth; FG = idem for the unreduced F and G; for the + * maximum depth, F and G are the output of binary GCD, multiplied by q; + * for each value, the average and standard deviation are provided). + * + * Binary case: + * depth: 10 fg: 6307.52 (24.48) FG: 6319.66 (24.51) + * depth: 9 fg: 3138.35 (12.25) FG: 9403.29 (27.55) + * depth: 8 fg: 1576.87 ( 7.49) FG: 4703.30 (14.77) + * depth: 7 fg: 794.17 ( 4.98) FG: 2361.84 ( 9.31) + * depth: 6 fg: 400.67 ( 3.10) FG: 1188.68 ( 6.04) + * depth: 5 fg: 202.22 ( 1.87) FG: 599.81 ( 3.87) + * depth: 4 fg: 101.62 ( 1.02) FG: 303.49 ( 2.38) + * depth: 3 fg: 50.37 ( 0.53) FG: 153.65 ( 1.39) + * depth: 2 fg: 24.07 ( 0.25) FG: 78.20 ( 0.73) + * depth: 1 fg: 10.99 ( 0.08) FG: 39.82 ( 0.41) + * depth: 0 fg: 4.00 ( 0.00) FG: 19.61 ( 0.49) + * + * Integers are actually represented either in binary notation over + * 31-bit words (signed, using two's complement), or in RNS, modulo + * many small primes. These small primes are close to, but slightly + * lower than, 2^31. Use of RNS loses less than two bits, even for + * the largest values. + * + * IMPORTANT: if these values are modified, then the temporary buffer + * sizes (FALCON_KEYGEN_TEMP_*, in inner.h) must be recomputed + * accordingly. + */ + +static const size_t MAX_BL_SMALL[] = { + 1, 1, 2, 2, 4, 7, 14, 27, 53, 106, 209 +}; + +static const size_t MAX_BL_LARGE[] = { + 2, 2, 5, 7, 12, 21, 40, 78, 157, 308 +}; + +/* + * Average and standard deviation for the maximum size (in bits) of + * coefficients of (f,g), depending on depth. These values are used + * to compute bounds for Babai's reduction. + */ +static const struct { + int avg; + int std; +} BITLENGTH[] = { + { 4, 0 }, + { 11, 1 }, + { 24, 1 }, + { 50, 1 }, + { 102, 1 }, + { 202, 2 }, + { 401, 4 }, + { 794, 5 }, + { 1577, 8 }, + { 3138, 13 }, + { 6308, 25 } +}; + +/* + * Minimal recursion depth at which we rebuild intermediate values + * when reconstructing f and g. + */ +#define DEPTH_INT_FG 4 + +/* + * Compute squared norm of a short vector. Returned value is saturated to + * 2^32-1 if it is not lower than 2^31. + */ +static uint32_t +poly_small_sqnorm(const int8_t *f, unsigned logn) { + size_t n, u; + uint32_t s, ng; + + n = MKN(logn); + s = 0; + ng = 0; + for (u = 0; u < n; u ++) { + int32_t z; + + z = f[u]; + s += (uint32_t)(z * z); + ng |= s; + } + return s | -(ng >> 31); +} + +/* + * Align (upwards) the provided 'data' pointer with regards to 'base' + * so that the offset is a multiple of the size of 'fpr'. + */ +static fpr * +align_fpr(void *base, void *data) { + uint8_t *cb, *cd; + size_t k, km; + + cb = base; + cd = data; + k = (size_t)(cd - cb); + km = k % sizeof(fpr); + if (km) { + k += (sizeof(fpr)) - km; + } + return (fpr *)(cb + k); +} + +/* + * Align (upwards) the provided 'data' pointer with regards to 'base' + * so that the offset is a multiple of the size of 'uint32_t'. + */ +static uint32_t * +align_u32(void *base, void *data) { + uint8_t *cb, *cd; + size_t k, km; + + cb = base; + cd = data; + k = (size_t)(cd - cb); + km = k % sizeof(uint32_t); + if (km) { + k += (sizeof(uint32_t)) - km; + } + return (uint32_t *)(cb + k); +} + +/* + * Convert a small vector to floating point. + */ +static void +poly_small_to_fp(fpr *x, const int8_t *f, unsigned logn) { + size_t n, u; + + n = MKN(logn); + for (u = 0; u < n; u ++) { + x[u] = fpr_of(f[u]); + } +} + +/* + * Input: f,g of degree N = 2^logn; 'depth' is used only to get their + * individual length. + * + * Output: f',g' of degree N/2, with the length for 'depth+1'. + * + * Values are in RNS; input and/or output may also be in NTT. + */ +static void +make_fg_step(uint32_t *data, unsigned logn, unsigned depth, + int in_ntt, int out_ntt) { + size_t n, hn, u; + size_t slen, tlen; + uint32_t *fd, *gd, *fs, *gs, *gm, *igm, *t1; + const small_prime *primes; + + n = (size_t)1 << logn; + hn = n >> 1; + slen = MAX_BL_SMALL[depth]; + tlen = MAX_BL_SMALL[depth + 1]; + primes = PRIMES; + + /* + * Prepare room for the result. + */ + fd = data; + gd = fd + hn * tlen; + fs = gd + hn * tlen; + gs = fs + n * slen; + gm = gs + n * slen; + igm = gm + n; + t1 = igm + n; + memmove(fs, data, 2 * n * slen * sizeof * data); + + /* + * First slen words: we use the input values directly, and apply + * inverse NTT as we go. + */ + for (u = 0; u < slen; u ++) { + uint32_t p, p0i, R2; + size_t v; + uint32_t *x; + + p = primes[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + modp_mkgm2(gm, igm, logn, primes[u].g, p, p0i); + + for (v = 0, x = fs + u; v < n; v ++, x += slen) { + t1[v] = *x; + } + if (!in_ntt) { + modp_NTT2(t1, gm, logn, p, p0i); + } + for (v = 0, x = fd + u; v < hn; v ++, x += tlen) { + uint32_t w0, w1; + + w0 = t1[(v << 1) + 0]; + w1 = t1[(v << 1) + 1]; + *x = modp_montymul( + modp_montymul(w0, w1, p, p0i), R2, p, p0i); + } + if (in_ntt) { + modp_iNTT2_ext(fs + u, slen, igm, logn, p, p0i); + } + + for (v = 0, x = gs + u; v < n; v ++, x += slen) { + t1[v] = *x; + } + if (!in_ntt) { + modp_NTT2(t1, gm, logn, p, p0i); + } + for (v = 0, x = gd + u; v < hn; v ++, x += tlen) { + uint32_t w0, w1; + + w0 = t1[(v << 1) + 0]; + w1 = t1[(v << 1) + 1]; + *x = modp_montymul( + modp_montymul(w0, w1, p, p0i), R2, p, p0i); + } + if (in_ntt) { + modp_iNTT2_ext(gs + u, slen, igm, logn, p, p0i); + } + + if (!out_ntt) { + modp_iNTT2_ext(fd + u, tlen, igm, logn - 1, p, p0i); + modp_iNTT2_ext(gd + u, tlen, igm, logn - 1, p, p0i); + } + } + + /* + * Since the fs and gs words have been de-NTTized, we can use the + * CRT to rebuild the values. + */ + zint_rebuild_CRT(fs, slen, slen, n, primes, 1, gm); + zint_rebuild_CRT(gs, slen, slen, n, primes, 1, gm); + + /* + * Remaining words: use modular reductions to extract the values. + */ + for (u = slen; u < tlen; u ++) { + uint32_t p, p0i, R2, Rx; + size_t v; + uint32_t *x; + + p = primes[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + Rx = modp_Rx((unsigned)slen, p, p0i, R2); + modp_mkgm2(gm, igm, logn, primes[u].g, p, p0i); + for (v = 0, x = fs; v < n; v ++, x += slen) { + t1[v] = zint_mod_small_signed(x, slen, p, p0i, R2, Rx); + } + modp_NTT2(t1, gm, logn, p, p0i); + for (v = 0, x = fd + u; v < hn; v ++, x += tlen) { + uint32_t w0, w1; + + w0 = t1[(v << 1) + 0]; + w1 = t1[(v << 1) + 1]; + *x = modp_montymul( + modp_montymul(w0, w1, p, p0i), R2, p, p0i); + } + for (v = 0, x = gs; v < n; v ++, x += slen) { + t1[v] = zint_mod_small_signed(x, slen, p, p0i, R2, Rx); + } + modp_NTT2(t1, gm, logn, p, p0i); + for (v = 0, x = gd + u; v < hn; v ++, x += tlen) { + uint32_t w0, w1; + + w0 = t1[(v << 1) + 0]; + w1 = t1[(v << 1) + 1]; + *x = modp_montymul( + modp_montymul(w0, w1, p, p0i), R2, p, p0i); + } + + if (!out_ntt) { + modp_iNTT2_ext(fd + u, tlen, igm, logn - 1, p, p0i); + modp_iNTT2_ext(gd + u, tlen, igm, logn - 1, p, p0i); + } + } +} + +/* + * Compute f and g at a specific depth, in RNS notation. + * + * Returned values are stored in the data[] array, at slen words per integer. + * + * Conditions: + * 0 <= depth <= logn + * + * Space use in data[]: enough room for any two successive values (f', g', + * f and g). + */ +static void +make_fg(uint32_t *data, const int8_t *f, const int8_t *g, + unsigned logn, unsigned depth, int out_ntt) { + size_t n, u; + uint32_t *ft, *gt, p0; + unsigned d; + const small_prime *primes; + + n = MKN(logn); + ft = data; + gt = ft + n; + primes = PRIMES; + p0 = primes[0].p; + for (u = 0; u < n; u ++) { + ft[u] = modp_set(f[u], p0); + gt[u] = modp_set(g[u], p0); + } + + if (depth == 0 && out_ntt) { + uint32_t *gm, *igm; + uint32_t p, p0i; + + p = primes[0].p; + p0i = modp_ninv31(p); + gm = gt + n; + igm = gm + MKN(logn); + modp_mkgm2(gm, igm, logn, primes[0].g, p, p0i); + modp_NTT2(ft, gm, logn, p, p0i); + modp_NTT2(gt, gm, logn, p, p0i); + return; + } + + if (depth == 0) { + return; + } + if (depth == 1) { + make_fg_step(data, logn, 0, 0, out_ntt); + return; + } + make_fg_step(data, logn, 0, 0, 1); + for (d = 1; d + 1 < depth; d ++) { + make_fg_step(data, logn - d, d, 1, 1); + } + make_fg_step(data, logn - depth + 1, depth - 1, 1, out_ntt); +} + +/* + * Solving the NTRU equation, deepest level: compute the resultants of + * f and g with X^N+1, and use binary GCD. The F and G values are + * returned in tmp[]. + * + * Returned value: 1 on success, 0 on error. + */ +static int +solve_NTRU_deepest(unsigned logn_top, + const int8_t *f, const int8_t *g, uint32_t *tmp) { + size_t len; + uint32_t *Fp, *Gp, *fp, *gp, *t1, q; + const small_prime *primes; + + len = MAX_BL_SMALL[logn_top]; + primes = PRIMES; + + Fp = tmp; + Gp = Fp + len; + fp = Gp + len; + gp = fp + len; + t1 = gp + len; + + make_fg(fp, f, g, logn_top, logn_top, 0); + + /* + * We use the CRT to rebuild the resultants as big integers. + * There are two such big integers. The resultants are always + * nonnegative. + */ + zint_rebuild_CRT(fp, len, len, 2, primes, 0, t1); + + /* + * Apply the binary GCD. The zint_bezout() function works only + * if both inputs are odd. + * + * We can test on the result and return 0 because that would + * imply failure of the NTRU solving equation, and the (f,g) + * values will be abandoned in that case. + */ + if (!zint_bezout(Gp, Fp, fp, gp, len, t1)) { + return 0; + } + + /* + * Multiply the two values by the target value q. Values must + * fit in the destination arrays. + * We can again test on the returned words: a non-zero output + * of zint_mul_small() means that we exceeded our array + * capacity, and that implies failure and rejection of (f,g). + */ + q = 12289; + if (zint_mul_small(Fp, len, q) != 0 + || zint_mul_small(Gp, len, q) != 0) { + return 0; + } + + return 1; +} + +/* + * Solving the NTRU equation, intermediate level. Upon entry, the F and G + * from the previous level should be in the tmp[] array. + * This function MAY be invoked for the top-level (in which case depth = 0). + * + * Returned value: 1 on success, 0 on error. + */ +static int +solve_NTRU_intermediate(unsigned logn_top, + const int8_t *f, const int8_t *g, unsigned depth, uint32_t *tmp) { + /* + * In this function, 'logn' is the log2 of the degree for + * this step. If N = 2^logn, then: + * - the F and G values already in fk->tmp (from the deeper + * levels) have degree N/2; + * - this function should return F and G of degree N. + */ + unsigned logn; + size_t n, hn, slen, dlen, llen, rlen, FGlen, u; + uint32_t *Fd, *Gd, *Ft, *Gt, *ft, *gt, *t1; + fpr *rt1, *rt2, *rt3, *rt4, *rt5; + int scale_fg, minbl_fg, maxbl_fg, maxbl_FG, scale_k; + uint32_t *x, *y; + int32_t *k; + const small_prime *primes; + + logn = logn_top - depth; + n = (size_t)1 << logn; + hn = n >> 1; + + /* + * slen = size for our input f and g; also size of the reduced + * F and G we return (degree N) + * + * dlen = size of the F and G obtained from the deeper level + * (degree N/2 or N/3) + * + * llen = size for intermediary F and G before reduction (degree N) + * + * We build our non-reduced F and G as two independent halves each, + * of degree N/2 (F = F0 + X*F1, G = G0 + X*G1). + */ + slen = MAX_BL_SMALL[depth]; + dlen = MAX_BL_SMALL[depth + 1]; + llen = MAX_BL_LARGE[depth]; + primes = PRIMES; + + /* + * Fd and Gd are the F and G from the deeper level. + */ + Fd = tmp; + Gd = Fd + dlen * hn; + + /* + * Compute the input f and g for this level. Note that we get f + * and g in RNS + NTT representation. + */ + ft = Gd + dlen * hn; + make_fg(ft, f, g, logn_top, depth, 1); + + /* + * Move the newly computed f and g to make room for our candidate + * F and G (unreduced). + */ + Ft = tmp; + Gt = Ft + n * llen; + t1 = Gt + n * llen; + memmove(t1, ft, 2 * n * slen * sizeof * ft); + ft = t1; + gt = ft + slen * n; + t1 = gt + slen * n; + + /* + * Move Fd and Gd _after_ f and g. + */ + memmove(t1, Fd, 2 * hn * dlen * sizeof * Fd); + Fd = t1; + Gd = Fd + hn * dlen; + + /* + * We reduce Fd and Gd modulo all the small primes we will need, + * and store the values in Ft and Gt (only n/2 values in each). + */ + for (u = 0; u < llen; u ++) { + uint32_t p, p0i, R2, Rx; + size_t v; + uint32_t *xs, *ys, *xd, *yd; + + p = primes[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + Rx = modp_Rx((unsigned)dlen, p, p0i, R2); + for (v = 0, xs = Fd, ys = Gd, xd = Ft + u, yd = Gt + u; + v < hn; + v ++, xs += dlen, ys += dlen, xd += llen, yd += llen) { + *xd = zint_mod_small_signed(xs, dlen, p, p0i, R2, Rx); + *yd = zint_mod_small_signed(ys, dlen, p, p0i, R2, Rx); + } + } + + /* + * We do not need Fd and Gd after that point. + */ + + /* + * Compute our F and G modulo sufficiently many small primes. + */ + for (u = 0; u < llen; u ++) { + uint32_t p, p0i, R2; + uint32_t *gm, *igm, *fx, *gx, *Fp, *Gp; + size_t v; + + /* + * All computations are done modulo p. + */ + p = primes[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + + /* + * If we processed slen words, then f and g have been + * de-NTTized, and are in RNS; we can rebuild them. + */ + if (u == slen) { + zint_rebuild_CRT(ft, slen, slen, n, primes, 1, t1); + zint_rebuild_CRT(gt, slen, slen, n, primes, 1, t1); + } + + gm = t1; + igm = gm + n; + fx = igm + n; + gx = fx + n; + + modp_mkgm2(gm, igm, logn, primes[u].g, p, p0i); + + if (u < slen) { + for (v = 0, x = ft + u, y = gt + u; + v < n; v ++, x += slen, y += slen) { + fx[v] = *x; + gx[v] = *y; + } + modp_iNTT2_ext(ft + u, slen, igm, logn, p, p0i); + modp_iNTT2_ext(gt + u, slen, igm, logn, p, p0i); + } else { + uint32_t Rx; + + Rx = modp_Rx((unsigned)slen, p, p0i, R2); + for (v = 0, x = ft, y = gt; + v < n; v ++, x += slen, y += slen) { + fx[v] = zint_mod_small_signed(x, slen, + p, p0i, R2, Rx); + gx[v] = zint_mod_small_signed(y, slen, + p, p0i, R2, Rx); + } + modp_NTT2(fx, gm, logn, p, p0i); + modp_NTT2(gx, gm, logn, p, p0i); + } + + /* + * Get F' and G' modulo p and in NTT representation + * (they have degree n/2). These values were computed in + * a previous step, and stored in Ft and Gt. + */ + Fp = gx + n; + Gp = Fp + hn; + for (v = 0, x = Ft + u, y = Gt + u; + v < hn; v ++, x += llen, y += llen) { + Fp[v] = *x; + Gp[v] = *y; + } + modp_NTT2(Fp, gm, logn - 1, p, p0i); + modp_NTT2(Gp, gm, logn - 1, p, p0i); + + /* + * Compute our F and G modulo p. + * + * General case: + * + * we divide degree by d = 2 or 3 + * f'(x^d) = N(f)(x^d) = f * adj(f) + * g'(x^d) = N(g)(x^d) = g * adj(g) + * f'*G' - g'*F' = q + * F = F'(x^d) * adj(g) + * G = G'(x^d) * adj(f) + * + * We compute things in the NTT. We group roots of phi + * such that all roots x in a group share the same x^d. + * If the roots in a group are x_1, x_2... x_d, then: + * + * N(f)(x_1^d) = f(x_1)*f(x_2)*...*f(x_d) + * + * Thus, we have: + * + * G(x_1) = f(x_2)*f(x_3)*...*f(x_d)*G'(x_1^d) + * G(x_2) = f(x_1)*f(x_3)*...*f(x_d)*G'(x_1^d) + * ... + * G(x_d) = f(x_1)*f(x_2)*...*f(x_{d-1})*G'(x_1^d) + * + * In all cases, we can thus compute F and G in NTT + * representation by a few simple multiplications. + * Moreover, in our chosen NTT representation, roots + * from the same group are consecutive in RAM. + */ + for (v = 0, x = Ft + u, y = Gt + u; v < hn; + v ++, x += (llen << 1), y += (llen << 1)) { + uint32_t ftA, ftB, gtA, gtB; + uint32_t mFp, mGp; + + ftA = fx[(v << 1) + 0]; + ftB = fx[(v << 1) + 1]; + gtA = gx[(v << 1) + 0]; + gtB = gx[(v << 1) + 1]; + mFp = modp_montymul(Fp[v], R2, p, p0i); + mGp = modp_montymul(Gp[v], R2, p, p0i); + x[0] = modp_montymul(gtB, mFp, p, p0i); + x[llen] = modp_montymul(gtA, mFp, p, p0i); + y[0] = modp_montymul(ftB, mGp, p, p0i); + y[llen] = modp_montymul(ftA, mGp, p, p0i); + } + modp_iNTT2_ext(Ft + u, llen, igm, logn, p, p0i); + modp_iNTT2_ext(Gt + u, llen, igm, logn, p, p0i); + } + + /* + * Rebuild F and G with the CRT. + */ + zint_rebuild_CRT(Ft, llen, llen, n, primes, 1, t1); + zint_rebuild_CRT(Gt, llen, llen, n, primes, 1, t1); + + /* + * At that point, Ft, Gt, ft and gt are consecutive in RAM (in that + * order). + */ + + /* + * Apply Babai reduction to bring back F and G to size slen. + * + * We use the FFT to compute successive approximations of the + * reduction coefficient. We first isolate the top bits of + * the coefficients of f and g, and convert them to floating + * point; with the FFT, we compute adj(f), adj(g), and + * 1/(f*adj(f)+g*adj(g)). + * + * Then, we repeatedly apply the following: + * + * - Get the top bits of the coefficients of F and G into + * floating point, and use the FFT to compute: + * (F*adj(f)+G*adj(g))/(f*adj(f)+g*adj(g)) + * + * - Convert back that value into normal representation, and + * round it to the nearest integers, yielding a polynomial k. + * Proper scaling is applied to f, g, F and G so that the + * coefficients fit on 32 bits (signed). + * + * - Subtract k*f from F and k*g from G. + * + * Under normal conditions, this process reduces the size of F + * and G by some bits at each iteration. For constant-time + * operation, we do not want to measure the actual length of + * F and G; instead, we do the following: + * + * - f and g are converted to floating-point, with some scaling + * if necessary to keep values in the representable range. + * + * - For each iteration, we _assume_ a maximum size for F and G, + * and use the values at that size. If we overreach, then + * we get zeros, which is harmless: the resulting coefficients + * of k will be 0 and the value won't be reduced. + * + * - We conservatively assume that F and G will be reduced by + * at least 25 bits at each iteration. + * + * Even when reaching the bottom of the reduction, reduction + * coefficient will remain low. If it goes out-of-range, then + * something wrong occurred and the whole NTRU solving fails. + */ + + /* + * Memory layout: + * - We need to compute and keep adj(f), adj(g), and + * 1/(f*adj(f)+g*adj(g)) (sizes N, N and N/2 fp numbers, + * respectively). + * - At each iteration we need two extra fp buffer (N fp values), + * and produce a k (N 32-bit words). k will be shared with one + * of the fp buffers. + * - To compute k*f and k*g efficiently (with the NTT), we need + * some extra room; we reuse the space of the temporary buffers. + * + * Arrays of 'fpr' are obtained from the temporary array itself. + * We ensure that the base is at a properly aligned offset (the + * source array tmp[] is supposed to be already aligned). + */ + + rt3 = align_fpr(tmp, t1); + rt4 = rt3 + n; + rt5 = rt4 + n; + rt1 = rt5 + (n >> 1); + k = (int32_t *)align_u32(tmp, rt1); + rt2 = align_fpr(tmp, k + n); + if (rt2 < (rt1 + n)) { + rt2 = rt1 + n; + } + t1 = (uint32_t *)k + n; + + /* + * Get f and g into rt3 and rt4 as floating-point approximations. + * + * We need to "scale down" the floating-point representation of + * coefficients when they are too big. We want to keep the value + * below 2^310 or so. Thus, when values are larger than 10 words, + * we consider only the top 10 words. Array lengths have been + * computed so that average maximum length will fall in the + * middle or the upper half of these top 10 words. + */ + rlen = slen; + if (rlen > 10) { + rlen = 10; + } + poly_big_to_fp(rt3, ft + slen - rlen, rlen, slen, logn); + poly_big_to_fp(rt4, gt + slen - rlen, rlen, slen, logn); + + /* + * Values in rt3 and rt4 are downscaled by 2^(scale_fg). + */ + scale_fg = 31 * (int)(slen - rlen); + + /* + * Estimated boundaries for the maximum size (in bits) of the + * coefficients of (f,g). We use the measured average, and + * allow for a deviation of at most six times the standard + * deviation. + */ + minbl_fg = BITLENGTH[depth].avg - 6 * BITLENGTH[depth].std; + maxbl_fg = BITLENGTH[depth].avg + 6 * BITLENGTH[depth].std; + + /* + * Compute 1/(f*adj(f)+g*adj(g)) in rt5. We also keep adj(f) + * and adj(g) in rt3 and rt4, respectively. + */ + PQCLEAN_FALCON1024_AVX2_FFT(rt3, logn); + PQCLEAN_FALCON1024_AVX2_FFT(rt4, logn); + PQCLEAN_FALCON1024_AVX2_poly_invnorm2_fft(rt5, rt3, rt4, logn); + PQCLEAN_FALCON1024_AVX2_poly_adj_fft(rt3, logn); + PQCLEAN_FALCON1024_AVX2_poly_adj_fft(rt4, logn); + + /* + * Reduce F and G repeatedly. + * + * The expected maximum bit length of coefficients of F and G + * is kept in maxbl_FG, with the corresponding word length in + * FGlen. + */ + FGlen = llen; + maxbl_FG = 31 * (int)llen; + + /* + * Each reduction operation computes the reduction polynomial + * "k". We need that polynomial to have coefficients that fit + * on 32-bit signed integers, with some scaling; thus, we use + * a descending sequence of scaling values, down to zero. + * + * The size of the coefficients of k is (roughly) the difference + * between the size of the coefficients of (F,G) and the size + * of the coefficients of (f,g). Thus, the maximum size of the + * coefficients of k is, at the start, maxbl_FG - minbl_fg; + * this is our starting scale value for k. + * + * We need to estimate the size of (F,G) during the execution of + * the algorithm; we are allowed some overestimation but not too + * much (poly_big_to_fp() uses a 310-bit window). Generally + * speaking, after applying a reduction with k scaled to + * scale_k, the size of (F,G) will be size(f,g) + scale_k + dd, + * where 'dd' is a few bits to account for the fact that the + * reduction is never perfect (intuitively, dd is on the order + * of sqrt(N), so at most 5 bits; we here allow for 10 extra + * bits). + * + * The size of (f,g) is not known exactly, but maxbl_fg is an + * upper bound. + */ + scale_k = maxbl_FG - minbl_fg; + + for (;;) { + int scale_FG, dc, new_maxbl_FG; + uint32_t scl, sch; + fpr pdc, pt; + + /* + * Convert current F and G into floating-point. We apply + * scaling if the current length is more than 10 words. + */ + rlen = FGlen; + if (rlen > 10) { + rlen = 10; + } + scale_FG = 31 * (int)(FGlen - rlen); + poly_big_to_fp(rt1, Ft + FGlen - rlen, rlen, llen, logn); + poly_big_to_fp(rt2, Gt + FGlen - rlen, rlen, llen, logn); + + /* + * Compute (F*adj(f)+G*adj(g))/(f*adj(f)+g*adj(g)) in rt2. + */ + PQCLEAN_FALCON1024_AVX2_FFT(rt1, logn); + PQCLEAN_FALCON1024_AVX2_FFT(rt2, logn); + PQCLEAN_FALCON1024_AVX2_poly_mul_fft(rt1, rt3, logn); + PQCLEAN_FALCON1024_AVX2_poly_mul_fft(rt2, rt4, logn); + PQCLEAN_FALCON1024_AVX2_poly_add(rt2, rt1, logn); + PQCLEAN_FALCON1024_AVX2_poly_mul_autoadj_fft(rt2, rt5, logn); + PQCLEAN_FALCON1024_AVX2_iFFT(rt2, logn); + + /* + * (f,g) are scaled by 'scale_fg', meaning that the + * numbers in rt3/rt4 should be multiplied by 2^(scale_fg) + * to have their true mathematical value. + * + * (F,G) are similarly scaled by 'scale_FG'. Therefore, + * the value we computed in rt2 is scaled by + * 'scale_FG-scale_fg'. + * + * We want that value to be scaled by 'scale_k', hence we + * apply a corrective scaling. After scaling, the values + * should fit in -2^31-1..+2^31-1. + */ + dc = scale_k - scale_FG + scale_fg; + + /* + * We will need to multiply values by 2^(-dc). The value + * 'dc' is not secret, so we can compute 2^(-dc) with a + * non-constant-time process. + * (We could use ldexp(), but we prefer to avoid any + * dependency on libm. When using FP emulation, we could + * use our fpr_ldexp(), which is constant-time.) + */ + if (dc < 0) { + dc = -dc; + pt = fpr_two; + } else { + pt = fpr_onehalf; + } + pdc = fpr_one; + while (dc != 0) { + if ((dc & 1) != 0) { + pdc = fpr_mul(pdc, pt); + } + dc >>= 1; + pt = fpr_sqr(pt); + } + + for (u = 0; u < n; u ++) { + fpr xv; + + xv = fpr_mul(rt2[u], pdc); + + /* + * Sometimes the values can be out-of-bounds if + * the algorithm fails; we must not call + * fpr_rint() (and cast to int32_t) if the value + * is not in-bounds. Note that the test does not + * break constant-time discipline, since any + * failure here implies that we discard the current + * secret key (f,g). + */ + if (!fpr_lt(fpr_mtwo31m1, xv) + || !fpr_lt(xv, fpr_ptwo31m1)) { + return 0; + } + k[u] = (int32_t)fpr_rint(xv); + } + + /* + * Values in k[] are integers. They really are scaled + * down by maxbl_FG - minbl_fg bits. + * + * If we are at low depth, then we use the NTT to + * compute k*f and k*g. + */ + sch = (uint32_t)(scale_k / 31); + scl = (uint32_t)(scale_k % 31); + if (depth <= DEPTH_INT_FG) { + poly_sub_scaled_ntt(Ft, FGlen, llen, ft, slen, slen, + k, sch, scl, logn, t1); + poly_sub_scaled_ntt(Gt, FGlen, llen, gt, slen, slen, + k, sch, scl, logn, t1); + } else { + poly_sub_scaled(Ft, FGlen, llen, ft, slen, slen, + k, sch, scl, logn); + poly_sub_scaled(Gt, FGlen, llen, gt, slen, slen, + k, sch, scl, logn); + } + + /* + * We compute the new maximum size of (F,G), assuming that + * (f,g) has _maximal_ length (i.e. that reduction is + * "late" instead of "early". We also adjust FGlen + * accordingly. + */ + new_maxbl_FG = scale_k + maxbl_fg + 10; + if (new_maxbl_FG < maxbl_FG) { + maxbl_FG = new_maxbl_FG; + if ((int)FGlen * 31 >= maxbl_FG + 31) { + FGlen --; + } + } + + /* + * We suppose that scaling down achieves a reduction by + * at least 25 bits per iteration. We stop when we have + * done the loop with an unscaled k. + */ + if (scale_k <= 0) { + break; + } + scale_k -= 25; + if (scale_k < 0) { + scale_k = 0; + } + } + + /* + * If (F,G) length was lowered below 'slen', then we must take + * care to re-extend the sign. + */ + if (FGlen < slen) { + for (u = 0; u < n; u ++, Ft += llen, Gt += llen) { + size_t v; + uint32_t sw; + + sw = -(Ft[FGlen - 1] >> 30) >> 1; + for (v = FGlen; v < slen; v ++) { + Ft[v] = sw; + } + sw = -(Gt[FGlen - 1] >> 30) >> 1; + for (v = FGlen; v < slen; v ++) { + Gt[v] = sw; + } + } + } + + /* + * Compress encoding of all values to 'slen' words (this is the + * expected output format). + */ + for (u = 0, x = tmp, y = tmp; + u < (n << 1); u ++, x += slen, y += llen) { + memmove(x, y, slen * sizeof * y); + } + return 1; +} + +/* + * Solving the NTRU equation, binary case, depth = 1. Upon entry, the + * F and G from the previous level should be in the tmp[] array. + * + * Returned value: 1 on success, 0 on error. + */ +static int +solve_NTRU_binary_depth1(unsigned logn_top, + const int8_t *f, const int8_t *g, uint32_t *tmp) { + /* + * The first half of this function is a copy of the corresponding + * part in solve_NTRU_intermediate(), for the reconstruction of + * the unreduced F and G. The second half (Babai reduction) is + * done differently, because the unreduced F and G fit in 53 bits + * of precision, allowing a much simpler process with lower RAM + * usage. + */ + unsigned depth, logn; + size_t n_top, n, hn, slen, dlen, llen, u; + uint32_t *Fd, *Gd, *Ft, *Gt, *ft, *gt, *t1; + fpr *rt1, *rt2, *rt3, *rt4, *rt5, *rt6; + uint32_t *x, *y; + + depth = 1; + n_top = (size_t)1 << logn_top; + logn = logn_top - depth; + n = (size_t)1 << logn; + hn = n >> 1; + + /* + * Equations are: + * + * f' = f0^2 - X^2*f1^2 + * g' = g0^2 - X^2*g1^2 + * F' and G' are a solution to f'G' - g'F' = q (from deeper levels) + * F = F'*(g0 - X*g1) + * G = G'*(f0 - X*f1) + * + * f0, f1, g0, g1, f', g', F' and G' are all "compressed" to + * degree N/2 (their odd-indexed coefficients are all zero). + */ + + /* + * slen = size for our input f and g; also size of the reduced + * F and G we return (degree N) + * + * dlen = size of the F and G obtained from the deeper level + * (degree N/2) + * + * llen = size for intermediary F and G before reduction (degree N) + * + * We build our non-reduced F and G as two independent halves each, + * of degree N/2 (F = F0 + X*F1, G = G0 + X*G1). + */ + slen = MAX_BL_SMALL[depth]; + dlen = MAX_BL_SMALL[depth + 1]; + llen = MAX_BL_LARGE[depth]; + + /* + * Fd and Gd are the F and G from the deeper level. Ft and Gt + * are the destination arrays for the unreduced F and G. + */ + Fd = tmp; + Gd = Fd + dlen * hn; + Ft = Gd + dlen * hn; + Gt = Ft + llen * n; + + /* + * We reduce Fd and Gd modulo all the small primes we will need, + * and store the values in Ft and Gt. + */ + for (u = 0; u < llen; u ++) { + uint32_t p, p0i, R2, Rx; + size_t v; + uint32_t *xs, *ys, *xd, *yd; + + p = PRIMES[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + Rx = modp_Rx((unsigned)dlen, p, p0i, R2); + for (v = 0, xs = Fd, ys = Gd, xd = Ft + u, yd = Gt + u; + v < hn; + v ++, xs += dlen, ys += dlen, xd += llen, yd += llen) { + *xd = zint_mod_small_signed(xs, dlen, p, p0i, R2, Rx); + *yd = zint_mod_small_signed(ys, dlen, p, p0i, R2, Rx); + } + } + + /* + * Now Fd and Gd are not needed anymore; we can squeeze them out. + */ + memmove(tmp, Ft, llen * n * sizeof(uint32_t)); + Ft = tmp; + memmove(Ft + llen * n, Gt, llen * n * sizeof(uint32_t)); + Gt = Ft + llen * n; + ft = Gt + llen * n; + gt = ft + slen * n; + + t1 = gt + slen * n; + + /* + * Compute our F and G modulo sufficiently many small primes. + */ + for (u = 0; u < llen; u ++) { + uint32_t p, p0i, R2; + uint32_t *gm, *igm, *fx, *gx, *Fp, *Gp; + unsigned e; + size_t v; + + /* + * All computations are done modulo p. + */ + p = PRIMES[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + + /* + * We recompute things from the source f and g, of full + * degree. However, we will need only the n first elements + * of the inverse NTT table (igm); the call to modp_mkgm() + * below will fill n_top elements in igm[] (thus overflowing + * into fx[]) but later code will overwrite these extra + * elements. + */ + gm = t1; + igm = gm + n_top; + fx = igm + n; + gx = fx + n_top; + modp_mkgm2(gm, igm, logn_top, PRIMES[u].g, p, p0i); + + /* + * Set ft and gt to f and g modulo p, respectively. + */ + for (v = 0; v < n_top; v ++) { + fx[v] = modp_set(f[v], p); + gx[v] = modp_set(g[v], p); + } + + /* + * Convert to NTT and compute our f and g. + */ + modp_NTT2(fx, gm, logn_top, p, p0i); + modp_NTT2(gx, gm, logn_top, p, p0i); + for (e = logn_top; e > logn; e --) { + modp_poly_rec_res(fx, e, p, p0i, R2); + modp_poly_rec_res(gx, e, p, p0i, R2); + } + + /* + * From that point onward, we only need tables for + * degree n, so we can save some space. + */ + if (depth > 0) { /* always true */ + memmove(gm + n, igm, n * sizeof * igm); + igm = gm + n; + memmove(igm + n, fx, n * sizeof * ft); + fx = igm + n; + memmove(fx + n, gx, n * sizeof * gt); + gx = fx + n; + } + + /* + * Get F' and G' modulo p and in NTT representation + * (they have degree n/2). These values were computed + * in a previous step, and stored in Ft and Gt. + */ + Fp = gx + n; + Gp = Fp + hn; + for (v = 0, x = Ft + u, y = Gt + u; + v < hn; v ++, x += llen, y += llen) { + Fp[v] = *x; + Gp[v] = *y; + } + modp_NTT2(Fp, gm, logn - 1, p, p0i); + modp_NTT2(Gp, gm, logn - 1, p, p0i); + + /* + * Compute our F and G modulo p. + * + * Equations are: + * + * f'(x^2) = N(f)(x^2) = f * adj(f) + * g'(x^2) = N(g)(x^2) = g * adj(g) + * + * f'*G' - g'*F' = q + * + * F = F'(x^2) * adj(g) + * G = G'(x^2) * adj(f) + * + * The NTT representation of f is f(w) for all w which + * are roots of phi. In the binary case, as well as in + * the ternary case for all depth except the deepest, + * these roots can be grouped in pairs (w,-w), and we + * then have: + * + * f(w) = adj(f)(-w) + * f(-w) = adj(f)(w) + * + * and w^2 is then a root for phi at the half-degree. + * + * At the deepest level in the ternary case, this still + * holds, in the following sense: the roots of x^2-x+1 + * are (w,-w^2) (for w^3 = -1, and w != -1), and we + * have: + * + * f(w) = adj(f)(-w^2) + * f(-w^2) = adj(f)(w) + * + * In all case, we can thus compute F and G in NTT + * representation by a few simple multiplications. + * Moreover, the two roots for each pair are consecutive + * in our bit-reversal encoding. + */ + for (v = 0, x = Ft + u, y = Gt + u; + v < hn; v ++, x += (llen << 1), y += (llen << 1)) { + uint32_t ftA, ftB, gtA, gtB; + uint32_t mFp, mGp; + + ftA = fx[(v << 1) + 0]; + ftB = fx[(v << 1) + 1]; + gtA = gx[(v << 1) + 0]; + gtB = gx[(v << 1) + 1]; + mFp = modp_montymul(Fp[v], R2, p, p0i); + mGp = modp_montymul(Gp[v], R2, p, p0i); + x[0] = modp_montymul(gtB, mFp, p, p0i); + x[llen] = modp_montymul(gtA, mFp, p, p0i); + y[0] = modp_montymul(ftB, mGp, p, p0i); + y[llen] = modp_montymul(ftA, mGp, p, p0i); + } + modp_iNTT2_ext(Ft + u, llen, igm, logn, p, p0i); + modp_iNTT2_ext(Gt + u, llen, igm, logn, p, p0i); + + /* + * Also save ft and gt (only up to size slen). + */ + if (u < slen) { + modp_iNTT2(fx, igm, logn, p, p0i); + modp_iNTT2(gx, igm, logn, p, p0i); + for (v = 0, x = ft + u, y = gt + u; + v < n; v ++, x += slen, y += slen) { + *x = fx[v]; + *y = gx[v]; + } + } + } + + /* + * Rebuild f, g, F and G with the CRT. Note that the elements of F + * and G are consecutive, and thus can be rebuilt in a single + * loop; similarly, the elements of f and g are consecutive. + */ + zint_rebuild_CRT(Ft, llen, llen, n << 1, PRIMES, 1, t1); + zint_rebuild_CRT(ft, slen, slen, n << 1, PRIMES, 1, t1); + + /* + * Here starts the Babai reduction, specialized for depth = 1. + * + * Candidates F and G (from Ft and Gt), and base f and g (ft and gt), + * are converted to floating point. There is no scaling, and a + * single pass is sufficient. + */ + + /* + * Convert F and G into floating point (rt1 and rt2). + */ + rt1 = align_fpr(tmp, gt + slen * n); + rt2 = rt1 + n; + poly_big_to_fp(rt1, Ft, llen, llen, logn); + poly_big_to_fp(rt2, Gt, llen, llen, logn); + + /* + * Integer representation of F and G is no longer needed, we + * can remove it. + */ + memmove(tmp, ft, 2 * slen * n * sizeof * ft); + ft = tmp; + gt = ft + slen * n; + rt3 = align_fpr(tmp, gt + slen * n); + memmove(rt3, rt1, 2 * n * sizeof * rt1); + rt1 = rt3; + rt2 = rt1 + n; + rt3 = rt2 + n; + rt4 = rt3 + n; + + /* + * Convert f and g into floating point (rt3 and rt4). + */ + poly_big_to_fp(rt3, ft, slen, slen, logn); + poly_big_to_fp(rt4, gt, slen, slen, logn); + + /* + * Remove unneeded ft and gt. + */ + memmove(tmp, rt1, 4 * n * sizeof * rt1); + rt1 = (fpr *)tmp; + rt2 = rt1 + n; + rt3 = rt2 + n; + rt4 = rt3 + n; + + /* + * We now have: + * rt1 = F + * rt2 = G + * rt3 = f + * rt4 = g + * in that order in RAM. We convert all of them to FFT. + */ + PQCLEAN_FALCON1024_AVX2_FFT(rt1, logn); + PQCLEAN_FALCON1024_AVX2_FFT(rt2, logn); + PQCLEAN_FALCON1024_AVX2_FFT(rt3, logn); + PQCLEAN_FALCON1024_AVX2_FFT(rt4, logn); + + /* + * Compute: + * rt5 = F*adj(f) + G*adj(g) + * rt6 = 1 / (f*adj(f) + g*adj(g)) + * (Note that rt6 is half-length.) + */ + rt5 = rt4 + n; + rt6 = rt5 + n; + PQCLEAN_FALCON1024_AVX2_poly_add_muladj_fft(rt5, rt1, rt2, rt3, rt4, logn); + PQCLEAN_FALCON1024_AVX2_poly_invnorm2_fft(rt6, rt3, rt4, logn); + + /* + * Compute: + * rt5 = (F*adj(f)+G*adj(g)) / (f*adj(f)+g*adj(g)) + */ + PQCLEAN_FALCON1024_AVX2_poly_mul_autoadj_fft(rt5, rt6, logn); + + /* + * Compute k as the rounded version of rt5. Check that none of + * the values is larger than 2^63-1 (in absolute value) + * because that would make the fpr_rint() do something undefined; + * note that any out-of-bounds value here implies a failure and + * (f,g) will be discarded, so we can make a simple test. + */ + PQCLEAN_FALCON1024_AVX2_iFFT(rt5, logn); + for (u = 0; u < n; u ++) { + fpr z; + + z = rt5[u]; + if (!fpr_lt(z, fpr_ptwo63m1) || !fpr_lt(fpr_mtwo63m1, z)) { + return 0; + } + rt5[u] = fpr_of(fpr_rint(z)); + } + PQCLEAN_FALCON1024_AVX2_FFT(rt5, logn); + + /* + * Subtract k*f from F, and k*g from G. + */ + PQCLEAN_FALCON1024_AVX2_poly_mul_fft(rt3, rt5, logn); + PQCLEAN_FALCON1024_AVX2_poly_mul_fft(rt4, rt5, logn); + PQCLEAN_FALCON1024_AVX2_poly_sub(rt1, rt3, logn); + PQCLEAN_FALCON1024_AVX2_poly_sub(rt2, rt4, logn); + PQCLEAN_FALCON1024_AVX2_iFFT(rt1, logn); + PQCLEAN_FALCON1024_AVX2_iFFT(rt2, logn); + + /* + * Convert back F and G to integers, and return. + */ + Ft = tmp; + Gt = Ft + n; + rt3 = align_fpr(tmp, Gt + n); + memmove(rt3, rt1, 2 * n * sizeof * rt1); + rt1 = rt3; + rt2 = rt1 + n; + for (u = 0; u < n; u ++) { + Ft[u] = (uint32_t)fpr_rint(rt1[u]); + Gt[u] = (uint32_t)fpr_rint(rt2[u]); + } + + return 1; +} + +/* + * Solving the NTRU equation, top level. Upon entry, the F and G + * from the previous level should be in the tmp[] array. + * + * Returned value: 1 on success, 0 on error. + */ +static int +solve_NTRU_binary_depth0(unsigned logn, + const int8_t *f, const int8_t *g, uint32_t *tmp) { + size_t n, hn, u; + uint32_t p, p0i, R2; + uint32_t *Fp, *Gp, *t1, *t2, *t3, *t4, *t5; + uint32_t *gm, *igm, *ft, *gt; + fpr *rt2, *rt3; + + n = (size_t)1 << logn; + hn = n >> 1; + + /* + * Equations are: + * + * f' = f0^2 - X^2*f1^2 + * g' = g0^2 - X^2*g1^2 + * F' and G' are a solution to f'G' - g'F' = q (from deeper levels) + * F = F'*(g0 - X*g1) + * G = G'*(f0 - X*f1) + * + * f0, f1, g0, g1, f', g', F' and G' are all "compressed" to + * degree N/2 (their odd-indexed coefficients are all zero). + * + * Everything should fit in 31-bit integers, hence we can just use + * the first small prime p = 2147473409. + */ + p = PRIMES[0].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + + Fp = tmp; + Gp = Fp + hn; + ft = Gp + hn; + gt = ft + n; + gm = gt + n; + igm = gm + n; + + modp_mkgm2(gm, igm, logn, PRIMES[0].g, p, p0i); + + /* + * Convert F' anf G' in NTT representation. + */ + for (u = 0; u < hn; u ++) { + Fp[u] = modp_set(zint_one_to_plain(Fp + u), p); + Gp[u] = modp_set(zint_one_to_plain(Gp + u), p); + } + modp_NTT2(Fp, gm, logn - 1, p, p0i); + modp_NTT2(Gp, gm, logn - 1, p, p0i); + + /* + * Load f and g and convert them to NTT representation. + */ + for (u = 0; u < n; u ++) { + ft[u] = modp_set(f[u], p); + gt[u] = modp_set(g[u], p); + } + modp_NTT2(ft, gm, logn, p, p0i); + modp_NTT2(gt, gm, logn, p, p0i); + + /* + * Build the unreduced F,G in ft and gt. + */ + for (u = 0; u < n; u += 2) { + uint32_t ftA, ftB, gtA, gtB; + uint32_t mFp, mGp; + + ftA = ft[u + 0]; + ftB = ft[u + 1]; + gtA = gt[u + 0]; + gtB = gt[u + 1]; + mFp = modp_montymul(Fp[u >> 1], R2, p, p0i); + mGp = modp_montymul(Gp[u >> 1], R2, p, p0i); + ft[u + 0] = modp_montymul(gtB, mFp, p, p0i); + ft[u + 1] = modp_montymul(gtA, mFp, p, p0i); + gt[u + 0] = modp_montymul(ftB, mGp, p, p0i); + gt[u + 1] = modp_montymul(ftA, mGp, p, p0i); + } + modp_iNTT2(ft, igm, logn, p, p0i); + modp_iNTT2(gt, igm, logn, p, p0i); + + Gp = Fp + n; + t1 = Gp + n; + memmove(Fp, ft, 2 * n * sizeof * ft); + + /* + * We now need to apply the Babai reduction. At that point, + * we have F and G in two n-word arrays. + * + * We can compute F*adj(f)+G*adj(g) and f*adj(f)+g*adj(g) + * modulo p, using the NTT. We still move memory around in + * order to save RAM. + */ + t2 = t1 + n; + t3 = t2 + n; + t4 = t3 + n; + t5 = t4 + n; + + /* + * Compute the NTT tables in t1 and t2. We do not keep t2 + * (we'll recompute it later on). + */ + modp_mkgm2(t1, t2, logn, PRIMES[0].g, p, p0i); + + /* + * Convert F and G to NTT. + */ + modp_NTT2(Fp, t1, logn, p, p0i); + modp_NTT2(Gp, t1, logn, p, p0i); + + /* + * Load f and adj(f) in t4 and t5, and convert them to NTT + * representation. + */ + t4[0] = t5[0] = modp_set(f[0], p); + for (u = 1; u < n; u ++) { + t4[u] = modp_set(f[u], p); + t5[n - u] = modp_set(-f[u], p); + } + modp_NTT2(t4, t1, logn, p, p0i); + modp_NTT2(t5, t1, logn, p, p0i); + + /* + * Compute F*adj(f) in t2, and f*adj(f) in t3. + */ + for (u = 0; u < n; u ++) { + uint32_t w; + + w = modp_montymul(t5[u], R2, p, p0i); + t2[u] = modp_montymul(w, Fp[u], p, p0i); + t3[u] = modp_montymul(w, t4[u], p, p0i); + } + + /* + * Load g and adj(g) in t4 and t5, and convert them to NTT + * representation. + */ + t4[0] = t5[0] = modp_set(g[0], p); + for (u = 1; u < n; u ++) { + t4[u] = modp_set(g[u], p); + t5[n - u] = modp_set(-g[u], p); + } + modp_NTT2(t4, t1, logn, p, p0i); + modp_NTT2(t5, t1, logn, p, p0i); + + /* + * Add G*adj(g) to t2, and g*adj(g) to t3. + */ + for (u = 0; u < n; u ++) { + uint32_t w; + + w = modp_montymul(t5[u], R2, p, p0i); + t2[u] = modp_add(t2[u], + modp_montymul(w, Gp[u], p, p0i), p); + t3[u] = modp_add(t3[u], + modp_montymul(w, t4[u], p, p0i), p); + } + + /* + * Convert back t2 and t3 to normal representation (normalized + * around 0), and then + * move them to t1 and t2. We first need to recompute the + * inverse table for NTT. + */ + modp_mkgm2(t1, t4, logn, PRIMES[0].g, p, p0i); + modp_iNTT2(t2, t4, logn, p, p0i); + modp_iNTT2(t3, t4, logn, p, p0i); + for (u = 0; u < n; u ++) { + t1[u] = (uint32_t)modp_norm(t2[u], p); + t2[u] = (uint32_t)modp_norm(t3[u], p); + } + + /* + * At that point, array contents are: + * + * F (NTT representation) (Fp) + * G (NTT representation) (Gp) + * F*adj(f)+G*adj(g) (t1) + * f*adj(f)+g*adj(g) (t2) + * + * We want to divide t1 by t2. The result is not integral; it + * must be rounded. We thus need to use the FFT. + */ + + /* + * Get f*adj(f)+g*adj(g) in FFT representation. Since this + * polynomial is auto-adjoint, all its coordinates in FFT + * representation are actually real, so we can truncate off + * the imaginary parts. + */ + rt3 = align_fpr(tmp, t3); + for (u = 0; u < n; u ++) { + rt3[u] = fpr_of(((int32_t *)t2)[u]); + } + PQCLEAN_FALCON1024_AVX2_FFT(rt3, logn); + rt2 = align_fpr(tmp, t2); + memmove(rt2, rt3, hn * sizeof * rt3); + + /* + * Convert F*adj(f)+G*adj(g) in FFT representation. + */ + rt3 = rt2 + hn; + for (u = 0; u < n; u ++) { + rt3[u] = fpr_of(((int32_t *)t1)[u]); + } + PQCLEAN_FALCON1024_AVX2_FFT(rt3, logn); + + /* + * Compute (F*adj(f)+G*adj(g))/(f*adj(f)+g*adj(g)) and get + * its rounded normal representation in t1. + */ + PQCLEAN_FALCON1024_AVX2_poly_div_autoadj_fft(rt3, rt2, logn); + PQCLEAN_FALCON1024_AVX2_iFFT(rt3, logn); + for (u = 0; u < n; u ++) { + t1[u] = modp_set((int32_t)fpr_rint(rt3[u]), p); + } + + /* + * RAM contents are now: + * + * F (NTT representation) (Fp) + * G (NTT representation) (Gp) + * k (t1) + * + * We want to compute F-k*f, and G-k*g. + */ + t2 = t1 + n; + t3 = t2 + n; + t4 = t3 + n; + t5 = t4 + n; + modp_mkgm2(t2, t3, logn, PRIMES[0].g, p, p0i); + for (u = 0; u < n; u ++) { + t4[u] = modp_set(f[u], p); + t5[u] = modp_set(g[u], p); + } + modp_NTT2(t1, t2, logn, p, p0i); + modp_NTT2(t4, t2, logn, p, p0i); + modp_NTT2(t5, t2, logn, p, p0i); + for (u = 0; u < n; u ++) { + uint32_t kw; + + kw = modp_montymul(t1[u], R2, p, p0i); + Fp[u] = modp_sub(Fp[u], + modp_montymul(kw, t4[u], p, p0i), p); + Gp[u] = modp_sub(Gp[u], + modp_montymul(kw, t5[u], p, p0i), p); + } + modp_iNTT2(Fp, t3, logn, p, p0i); + modp_iNTT2(Gp, t3, logn, p, p0i); + for (u = 0; u < n; u ++) { + Fp[u] = (uint32_t)modp_norm(Fp[u], p); + Gp[u] = (uint32_t)modp_norm(Gp[u], p); + } + + return 1; +} + +/* + * Solve the NTRU equation. Returned value is 1 on success, 0 on error. + * G can be NULL, in which case that value is computed but not returned. + * If any of the coefficients of F and G exceeds lim (in absolute value), + * then 0 is returned. + */ +static int +solve_NTRU(unsigned logn, int8_t *F, int8_t *G, + const int8_t *f, const int8_t *g, int lim, uint32_t *tmp) { + size_t n, u; + uint32_t *ft, *gt, *Ft, *Gt, *gm; + uint32_t p, p0i, r; + const small_prime *primes; + + n = MKN(logn); + + if (!solve_NTRU_deepest(logn, f, g, tmp)) { + return 0; + } + + /* + * For logn <= 2, we need to use solve_NTRU_intermediate() + * directly, because coefficients are a bit too large and + * do not fit the hypotheses in solve_NTRU_binary_depth0(). + */ + if (logn <= 2) { + unsigned depth; + + depth = logn; + while (depth -- > 0) { + if (!solve_NTRU_intermediate(logn, f, g, depth, tmp)) { + return 0; + } + } + } else { + unsigned depth; + + depth = logn; + while (depth -- > 2) { + if (!solve_NTRU_intermediate(logn, f, g, depth, tmp)) { + return 0; + } + } + if (!solve_NTRU_binary_depth1(logn, f, g, tmp)) { + return 0; + } + if (!solve_NTRU_binary_depth0(logn, f, g, tmp)) { + return 0; + } + } + + /* + * If no buffer has been provided for G, use a temporary one. + */ + if (G == NULL) { + G = (int8_t *)(tmp + 2 * n); + } + + /* + * Final F and G are in fk->tmp, one word per coefficient + * (signed value over 31 bits). + */ + if (!poly_big_to_small(F, tmp, lim, logn) + || !poly_big_to_small(G, tmp + n, lim, logn)) { + return 0; + } + + /* + * Verify that the NTRU equation is fulfilled. Since all elements + * have short lengths, verifying modulo a small prime p works, and + * allows using the NTT. + * + * We put Gt[] first in tmp[], and process it first, so that it does + * not overlap with G[] in case we allocated it ourselves. + */ + Gt = tmp; + ft = Gt + n; + gt = ft + n; + Ft = gt + n; + gm = Ft + n; + + primes = PRIMES; + p = primes[0].p; + p0i = modp_ninv31(p); + modp_mkgm2(gm, tmp, logn, primes[0].g, p, p0i); + for (u = 0; u < n; u ++) { + Gt[u] = modp_set(G[u], p); + } + for (u = 0; u < n; u ++) { + ft[u] = modp_set(f[u], p); + gt[u] = modp_set(g[u], p); + Ft[u] = modp_set(F[u], p); + } + modp_NTT2(ft, gm, logn, p, p0i); + modp_NTT2(gt, gm, logn, p, p0i); + modp_NTT2(Ft, gm, logn, p, p0i); + modp_NTT2(Gt, gm, logn, p, p0i); + r = modp_montymul(12289, 1, p, p0i); + for (u = 0; u < n; u ++) { + uint32_t z; + + z = modp_sub(modp_montymul(ft[u], Gt[u], p, p0i), + modp_montymul(gt[u], Ft[u], p, p0i), p); + if (z != r) { + return 0; + } + } + + return 1; +} + +/* + * Generate a random polynomial with a Gaussian distribution. This function + * also makes sure that the resultant of the polynomial with phi is odd. + */ +static void +poly_small_mkgauss(RNG_CONTEXT *rng, int8_t *f, unsigned logn) { + size_t n, u; + unsigned mod2; + + n = MKN(logn); + mod2 = 0; + for (u = 0; u < n; u ++) { + int s; + +restart: + s = mkgauss(rng, logn); + + /* + * We need the coefficient to fit within -127..+127; + * realistically, this is always the case except for + * the very low degrees (N = 2 or 4), for which there + * is no real security anyway. + */ + if (s < -127 || s > 127) { + goto restart; + } + + /* + * We need the sum of all coefficients to be 1; otherwise, + * the resultant of the polynomial with X^N+1 will be even, + * and the binary GCD will fail. + */ + if (u == n - 1) { + if ((mod2 ^ (unsigned)(s & 1)) == 0) { + goto restart; + } + } else { + mod2 ^= (unsigned)(s & 1); + } + f[u] = (int8_t)s; + } +} + +/* see falcon.h */ +void +PQCLEAN_FALCON1024_AVX2_keygen(inner_shake256_context *rng, + int8_t *f, int8_t *g, int8_t *F, int8_t *G, uint16_t *h, + unsigned logn, uint8_t *tmp) { + /* + * Algorithm is the following: + * + * - Generate f and g with the Gaussian distribution. + * + * - If either Res(f,phi) or Res(g,phi) is even, try again. + * + * - If ||(f,g)|| is too large, try again. + * + * - If ||B~_{f,g}|| is too large, try again. + * + * - If f is not invertible mod phi mod q, try again. + * + * - Compute h = g/f mod phi mod q. + * + * - Solve the NTRU equation fG - gF = q; if the solving fails, + * try again. Usual failure condition is when Res(f,phi) + * and Res(g,phi) are not prime to each other. + */ + size_t n, u; + uint16_t *h2, *tmp2; + RNG_CONTEXT *rc; + + n = MKN(logn); + rc = rng; + + /* + * We need to generate f and g randomly, until we find values + * such that the norm of (g,-f), and of the orthogonalized + * vector, are satisfying. The orthogonalized vector is: + * (q*adj(f)/(f*adj(f)+g*adj(g)), q*adj(g)/(f*adj(f)+g*adj(g))) + * (it is actually the (N+1)-th row of the Gram-Schmidt basis). + * + * In the binary case, coefficients of f and g are generated + * independently of each other, with a discrete Gaussian + * distribution of standard deviation 1.17*sqrt(q/(2*N)). Then, + * the two vectors have expected norm 1.17*sqrt(q), which is + * also our acceptance bound: we require both vectors to be no + * larger than that (this will be satisfied about 1/4th of the + * time, thus we expect sampling new (f,g) about 4 times for that + * step). + * + * We require that Res(f,phi) and Res(g,phi) are both odd (the + * NTRU equation solver requires it). + */ + for (;;) { + fpr *rt1, *rt2, *rt3; + fpr bnorm; + uint32_t normf, normg, norm; + int lim; + + /* + * The poly_small_mkgauss() function makes sure + * that the sum of coefficients is 1 modulo 2 + * (i.e. the resultant of the polynomial with phi + * will be odd). + */ + poly_small_mkgauss(rc, f, logn); + poly_small_mkgauss(rc, g, logn); + + /* + * Verify that all coefficients are within the bounds + * defined in max_fg_bits. This is the case with + * overwhelming probability; this guarantees that the + * key will be encodable with FALCON_COMP_TRIM. + */ + lim = 1 << (PQCLEAN_FALCON1024_AVX2_max_fg_bits[logn] - 1); + for (u = 0; u < n; u ++) { + /* + * We can use non-CT tests since on any failure + * we will discard f and g. + */ + if (f[u] >= lim || f[u] <= -lim + || g[u] >= lim || g[u] <= -lim) { + lim = -1; + break; + } + } + if (lim < 0) { + continue; + } + + /* + * Bound is 1.17*sqrt(q). We compute the squared + * norms. With q = 12289, the squared bound is: + * (1.17^2)* 12289 = 16822.4121 + * Since f and g are integral, the squared norm + * of (g,-f) is an integer. + */ + normf = poly_small_sqnorm(f, logn); + normg = poly_small_sqnorm(g, logn); + norm = (normf + normg) | -((normf | normg) >> 31); + if (norm >= 16823) { + continue; + } + + /* + * We compute the orthogonalized vector norm. + */ + rt1 = (fpr *)tmp; + rt2 = rt1 + n; + rt3 = rt2 + n; + poly_small_to_fp(rt1, f, logn); + poly_small_to_fp(rt2, g, logn); + PQCLEAN_FALCON1024_AVX2_FFT(rt1, logn); + PQCLEAN_FALCON1024_AVX2_FFT(rt2, logn); + PQCLEAN_FALCON1024_AVX2_poly_invnorm2_fft(rt3, rt1, rt2, logn); + PQCLEAN_FALCON1024_AVX2_poly_adj_fft(rt1, logn); + PQCLEAN_FALCON1024_AVX2_poly_adj_fft(rt2, logn); + PQCLEAN_FALCON1024_AVX2_poly_mulconst(rt1, fpr_q, logn); + PQCLEAN_FALCON1024_AVX2_poly_mulconst(rt2, fpr_q, logn); + PQCLEAN_FALCON1024_AVX2_poly_mul_autoadj_fft(rt1, rt3, logn); + PQCLEAN_FALCON1024_AVX2_poly_mul_autoadj_fft(rt2, rt3, logn); + PQCLEAN_FALCON1024_AVX2_iFFT(rt1, logn); + PQCLEAN_FALCON1024_AVX2_iFFT(rt2, logn); + bnorm = fpr_zero; + for (u = 0; u < n; u ++) { + bnorm = fpr_add(bnorm, fpr_sqr(rt1[u])); + bnorm = fpr_add(bnorm, fpr_sqr(rt2[u])); + } + if (!fpr_lt(bnorm, fpr_bnorm_max)) { + continue; + } + + /* + * Compute public key h = g/f mod X^N+1 mod q. If this + * fails, we must restart. + */ + if (h == NULL) { + h2 = (uint16_t *)tmp; + tmp2 = h2 + n; + } else { + h2 = h; + tmp2 = (uint16_t *)tmp; + } + if (!PQCLEAN_FALCON1024_AVX2_compute_public(h2, f, g, logn, (uint8_t *)tmp2)) { + continue; + } + + /* + * Solve the NTRU equation to get F and G. + */ + lim = (1 << (PQCLEAN_FALCON1024_AVX2_max_FG_bits[logn] - 1)) - 1; + if (!solve_NTRU(logn, F, G, f, g, lim, (uint32_t *)tmp)) { + continue; + } + + /* + * Key pair is generated. + */ + break; + } +} diff --git a/crypto_sign/falcon/falcon-1024/avx2/pqclean.c b/crypto_sign/falcon/falcon-1024/avx2/pqclean.c new file mode 100644 index 00000000..1b254cfc --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/avx2/pqclean.c @@ -0,0 +1,386 @@ +#include "api.h" +#include "inner.h" +#include "randombytes.h" +#include +#include +/* + * Wrapper for implementing the PQClean API. + */ + + + +#define NONCELEN 40 +#define SEEDLEN 48 + +/* + * Encoding formats (nnnn = log of degree, 9 for Falcon-512, 10 for Falcon-1024) + * + * private key: + * header byte: 0101nnnn + * private f (6 or 5 bits by element, depending on degree) + * private g (6 or 5 bits by element, depending on degree) + * private F (8 bits by element) + * + * public key: + * header byte: 0000nnnn + * public h (14 bits by element) + * + * signature: + * header byte: 0011nnnn + * nonce 40 bytes + * value (12 bits by element) + * + * message + signature: + * signature length (2 bytes, big-endian) + * nonce 40 bytes + * message + * header byte: 0010nnnn + * value (12 bits by element) + * (signature length is 1+len(value), not counting the nonce) + */ + +/* see api.h */ +int +PQCLEAN_FALCON1024_AVX2_crypto_sign_keypair(unsigned char *pk, unsigned char *sk) { + union { + uint8_t b[28 * 1024]; + uint64_t dummy_u64; + fpr dummy_fpr; + } tmp; + int8_t f[1024], g[1024], F[1024], G[1024]; + uint16_t h[1024]; + unsigned char seed[SEEDLEN]; + inner_shake256_context rng; + size_t u, v; + + + /* + * Generate key pair. + */ + randombytes(seed, sizeof seed); + inner_shake256_init(&rng); + inner_shake256_inject(&rng, seed, sizeof seed); + inner_shake256_flip(&rng); + PQCLEAN_FALCON1024_AVX2_keygen(&rng, f, g, F, G, h, 10, tmp.b); + inner_shake256_ctx_release(&rng); + + /* + * Encode private key. + */ + sk[0] = 0x50 + 10; + u = 1; + v = PQCLEAN_FALCON1024_AVX2_trim_i8_encode( + sk + u, PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES - u, + f, 10, PQCLEAN_FALCON1024_AVX2_max_fg_bits[10]); + if (v == 0) { + return -1; + } + u += v; + v = PQCLEAN_FALCON1024_AVX2_trim_i8_encode( + sk + u, PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES - u, + g, 10, PQCLEAN_FALCON1024_AVX2_max_fg_bits[10]); + if (v == 0) { + return -1; + } + u += v; + v = PQCLEAN_FALCON1024_AVX2_trim_i8_encode( + sk + u, PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES - u, + F, 10, PQCLEAN_FALCON1024_AVX2_max_FG_bits[10]); + if (v == 0) { + return -1; + } + u += v; + if (u != PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES) { + return -1; + } + + /* + * Encode public key. + */ + pk[0] = 0x00 + 10; + v = PQCLEAN_FALCON1024_AVX2_modq_encode( + pk + 1, PQCLEAN_FALCON1024_AVX2_CRYPTO_PUBLICKEYBYTES - 1, + h, 10); + if (v != PQCLEAN_FALCON1024_AVX2_CRYPTO_PUBLICKEYBYTES - 1) { + return -1; + } + + return 0; +} + +/* + * Compute the signature. nonce[] receives the nonce and must have length + * NONCELEN bytes. sigbuf[] receives the signature value (without nonce + * or header byte), with *sigbuflen providing the maximum value length and + * receiving the actual value length. + * + * If a signature could be computed but not encoded because it would + * exceed the output buffer size, then a new signature is computed. If + * the provided buffer size is too low, this could loop indefinitely, so + * the caller must provide a size that can accommodate signatures with a + * large enough probability. + * + * Return value: 0 on success, -1 on error. + */ +static int +do_sign(uint8_t *nonce, uint8_t *sigbuf, size_t *sigbuflen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + union { + uint8_t b[72 * 1024]; + uint64_t dummy_u64; + fpr dummy_fpr; + } tmp; + int8_t f[1024], g[1024], F[1024], G[1024]; + union { + int16_t sig[1024]; + uint16_t hm[1024]; + } r; + unsigned char seed[SEEDLEN]; + inner_shake256_context sc; + size_t u, v; + + /* + * Decode the private key. + */ + if (sk[0] != 0x50 + 10) { + return -1; + } + u = 1; + v = PQCLEAN_FALCON1024_AVX2_trim_i8_decode( + f, 10, PQCLEAN_FALCON1024_AVX2_max_fg_bits[10], + sk + u, PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES - u); + if (v == 0) { + return -1; + } + u += v; + v = PQCLEAN_FALCON1024_AVX2_trim_i8_decode( + g, 10, PQCLEAN_FALCON1024_AVX2_max_fg_bits[10], + sk + u, PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES - u); + if (v == 0) { + return -1; + } + u += v; + v = PQCLEAN_FALCON1024_AVX2_trim_i8_decode( + F, 10, PQCLEAN_FALCON1024_AVX2_max_FG_bits[10], + sk + u, PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES - u); + if (v == 0) { + return -1; + } + u += v; + if (u != PQCLEAN_FALCON1024_AVX2_CRYPTO_SECRETKEYBYTES) { + return -1; + } + if (!PQCLEAN_FALCON1024_AVX2_complete_private(G, f, g, F, 10, tmp.b)) { + return -1; + } + + + /* + * Create a random nonce (40 bytes). + */ + randombytes(nonce, NONCELEN); + + /* + * Hash message nonce + message into a vector. + */ + inner_shake256_init(&sc); + inner_shake256_inject(&sc, nonce, NONCELEN); + inner_shake256_inject(&sc, m, mlen); + inner_shake256_flip(&sc); + PQCLEAN_FALCON1024_AVX2_hash_to_point_vartime(&sc, r.hm, 10); + inner_shake256_ctx_release(&sc); + + /* + * Initialize a RNG. + */ + randombytes(seed, sizeof seed); + inner_shake256_init(&sc); + inner_shake256_inject(&sc, seed, sizeof seed); + inner_shake256_flip(&sc); + + /* + * Compute and return the signature. This loops until a signature + * value is found that fits in the provided buffer. + */ + for (;;) { + PQCLEAN_FALCON1024_AVX2_sign_dyn(r.sig, &sc, f, g, F, G, r.hm, 10, tmp.b); + v = PQCLEAN_FALCON1024_AVX2_comp_encode(sigbuf, *sigbuflen, r.sig, 10); + if (v != 0) { + inner_shake256_ctx_release(&sc); + *sigbuflen = v; + return 0; + } + } +} + +/* + * Verify a sigature. The nonce has size NONCELEN bytes. sigbuf[] + * (of size sigbuflen) contains the signature value, not including the + * header byte or nonce. Return value is 0 on success, -1 on error. + */ +static int +do_verify( + const uint8_t *nonce, const uint8_t *sigbuf, size_t sigbuflen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + union { + uint8_t b[2 * 1024]; + uint64_t dummy_u64; + fpr dummy_fpr; + } tmp; + uint16_t h[1024], hm[1024]; + int16_t sig[1024]; + inner_shake256_context sc; + + /* + * Decode public key. + */ + if (pk[0] != 0x00 + 10) { + return -1; + } + if (PQCLEAN_FALCON1024_AVX2_modq_decode(h, 10, + pk + 1, PQCLEAN_FALCON1024_AVX2_CRYPTO_PUBLICKEYBYTES - 1) + != PQCLEAN_FALCON1024_AVX2_CRYPTO_PUBLICKEYBYTES - 1) { + return -1; + } + PQCLEAN_FALCON1024_AVX2_to_ntt_monty(h, 10); + + /* + * Decode signature. + */ + if (sigbuflen == 0) { + return -1; + } + if (PQCLEAN_FALCON1024_AVX2_comp_decode(sig, 10, sigbuf, sigbuflen) != sigbuflen) { + return -1; + } + + /* + * Hash nonce + message into a vector. + */ + inner_shake256_init(&sc); + inner_shake256_inject(&sc, nonce, NONCELEN); + inner_shake256_inject(&sc, m, mlen); + inner_shake256_flip(&sc); + PQCLEAN_FALCON1024_AVX2_hash_to_point_ct(&sc, hm, 10, tmp.b); + inner_shake256_ctx_release(&sc); + + /* + * Verify signature. + */ + if (!PQCLEAN_FALCON1024_AVX2_verify_raw(hm, sig, h, 10, tmp.b)) { + return -1; + } + return 0; +} + +/* see api.h */ +int +PQCLEAN_FALCON1024_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + /* + * The PQCLEAN_FALCON1024_AVX2_CRYPTO_BYTES constant is used for + * the signed message object (as produced by PQCLEAN_FALCON1024_AVX2_crypto_sign()) + * and includes a two-byte length value, so we take care here + * to only generate signatures that are two bytes shorter than + * the maximum. This is done to ensure that PQCLEAN_FALCON1024_AVX2_crypto_sign() + * and PQCLEAN_FALCON1024_AVX2_crypto_sign_signature() produce the exact same signature + * value, if used on the same message, with the same private key, + * and using the same output from randombytes() (this is for + * reproducibility of tests). + */ + size_t vlen; + + vlen = PQCLEAN_FALCON1024_AVX2_CRYPTO_BYTES - NONCELEN - 3; + if (do_sign(sig + 1, sig + 1 + NONCELEN, &vlen, m, mlen, sk) < 0) { + return -1; + } + sig[0] = 0x30 + 10; + *siglen = 1 + NONCELEN + vlen; + return 0; +} + +/* see api.h */ +int +PQCLEAN_FALCON1024_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + if (siglen < 1 + NONCELEN) { + return -1; + } + if (sig[0] != 0x30 + 10) { + return -1; + } + return do_verify(sig + 1, + sig + 1 + NONCELEN, siglen - 1 - NONCELEN, m, mlen, pk); +} + +/* see api.h */ +int +PQCLEAN_FALCON1024_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + uint8_t *pm, *sigbuf; + size_t sigbuflen; + + /* + * Move the message to its final location; this is a memmove() so + * it handles overlaps properly. + */ + memmove(sm + 2 + NONCELEN, m, mlen); + pm = sm + 2 + NONCELEN; + sigbuf = pm + 1 + mlen; + sigbuflen = PQCLEAN_FALCON1024_AVX2_CRYPTO_BYTES - NONCELEN - 3; + if (do_sign(sm + 2, sigbuf, &sigbuflen, pm, mlen, sk) < 0) { + return -1; + } + pm[mlen] = 0x20 + 10; + sigbuflen ++; + sm[0] = (uint8_t)(sigbuflen >> 8); + sm[1] = (uint8_t)sigbuflen; + *smlen = mlen + 2 + NONCELEN + sigbuflen; + return 0; +} + +/* see api.h */ +int +PQCLEAN_FALCON1024_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + const uint8_t *sigbuf; + size_t pmlen, sigbuflen; + + if (smlen < 3 + NONCELEN) { + return -1; + } + sigbuflen = ((size_t)sm[0] << 8) | (size_t)sm[1]; + if (sigbuflen < 2 || sigbuflen > (smlen - NONCELEN - 2)) { + return -1; + } + sigbuflen --; + pmlen = smlen - NONCELEN - 3 - sigbuflen; + if (sm[2 + NONCELEN + pmlen] != 0x20 + 10) { + return -1; + } + sigbuf = sm + 2 + NONCELEN + pmlen + 1; + + /* + * The 2-byte length header and the one-byte signature header + * have been verified. Nonce is at sm+2, followed by the message + * itself. Message length is in pmlen. sigbuf/sigbuflen point to + * the signature value (excluding the header byte). + */ + if (do_verify(sm + 2, sigbuf, sigbuflen, + sm + 2 + NONCELEN, pmlen, pk) < 0) { + return -1; + } + + /* + * Signature is correct, we just have to copy/move the message + * to its final destination. The memmove() properly handles + * overlaps. + */ + memmove(m, sm + 2 + NONCELEN, pmlen); + *mlen = pmlen; + return 0; +} diff --git a/crypto_sign/falcon/falcon-1024/avx2/rng.c b/crypto_sign/falcon/falcon-1024/avx2/rng.c new file mode 100644 index 00000000..74207c52 --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/avx2/rng.c @@ -0,0 +1,195 @@ +#include "inner.h" +#include +/* + * PRNG and interface to the system RNG. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + + +/* + * Include relevant system header files. For Win32, this will also need + * linking with advapi32.dll, which we trigger with an appropriate #pragma. + */ + +/* see inner.h */ +int +PQCLEAN_FALCON1024_AVX2_get_seed(void *seed, size_t len) { + (void)seed; + if (len == 0) { + return 1; + } + return 0; +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_prng_init(prng *p, inner_shake256_context *src) { + inner_shake256_extract(src, p->state.d, 56); + PQCLEAN_FALCON1024_AVX2_prng_refill(p); +} + +/* + * PRNG based on ChaCha20. + * + * State consists in key (32 bytes) then IV (16 bytes) and block counter + * (8 bytes). Normally, we should not care about local endianness (this + * is for a PRNG), but for the NIST competition we need reproducible KAT + * vectors that work across architectures, so we enforce little-endian + * interpretation where applicable. Moreover, output words are "spread + * out" over the output buffer with the interleaving pattern that is + * naturally obtained from the AVX2 implementation that runs eight + * ChaCha20 instances in parallel. + * + * The block counter is XORed into the first 8 bytes of the IV. + */ +void +PQCLEAN_FALCON1024_AVX2_prng_refill(prng *p) { + + static const uint32_t CW[] = { + 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 + }; + + uint64_t cc; + size_t u; + int i; + uint32_t *sw; + union { + uint32_t w[16]; + __m256i y[2]; /* for alignment */ + } t; + __m256i state[16], init[16]; + + sw = (uint32_t *)p->state.d; + + /* + * XOR next counter values into state. + */ + cc = *(uint64_t *)(p->state.d + 48); + for (u = 0; u < 8; u ++) { + t.w[u] = (uint32_t)(cc + u); + t.w[u + 8] = (uint32_t)((cc + u) >> 32); + } + *(uint64_t *)(p->state.d + 48) = cc + 8; + + /* + * Load state. + */ + for (u = 0; u < 4; u ++) { + state[u] = init[u] = + _mm256_broadcastd_epi32(_mm_cvtsi32_si128((int32_t)CW[u])); + } + for (u = 0; u < 10; u ++) { + state[u + 4] = init[u + 4] = + _mm256_broadcastd_epi32(_mm_cvtsi32_si128((int32_t)sw[u])); + } + state[14] = init[14] = _mm256_xor_si256( + _mm256_broadcastd_epi32(_mm_cvtsi32_si128((int32_t)sw[10])), + _mm256_loadu_si256((__m256i *)&t.w[0])); + state[15] = init[15] = _mm256_xor_si256( + _mm256_broadcastd_epi32(_mm_cvtsi32_si128((int32_t)sw[11])), + _mm256_loadu_si256((__m256i *)&t.w[8])); + + /* + * Do all rounds. + */ + for (i = 0; i < 10; i ++) { + +#define QROUND(a, b, c, d) do { \ + state[a] = _mm256_add_epi32(state[a], state[b]); \ + state[d] = _mm256_xor_si256(state[d], state[a]); \ + state[d] = _mm256_or_si256( \ + _mm256_slli_epi32(state[d], 16), \ + _mm256_srli_epi32(state[d], 16)); \ + state[c] = _mm256_add_epi32(state[c], state[d]); \ + state[b] = _mm256_xor_si256(state[b], state[c]); \ + state[b] = _mm256_or_si256( \ + _mm256_slli_epi32(state[b], 12), \ + _mm256_srli_epi32(state[b], 20)); \ + state[a] = _mm256_add_epi32(state[a], state[b]); \ + state[d] = _mm256_xor_si256(state[d], state[a]); \ + state[d] = _mm256_or_si256( \ + _mm256_slli_epi32(state[d], 8), \ + _mm256_srli_epi32(state[d], 24)); \ + state[c] = _mm256_add_epi32(state[c], state[d]); \ + state[b] = _mm256_xor_si256(state[b], state[c]); \ + state[b] = _mm256_or_si256( \ + _mm256_slli_epi32(state[b], 7), \ + _mm256_srli_epi32(state[b], 25)); \ + } while (0) + + QROUND( 0, 4, 8, 12); + QROUND( 1, 5, 9, 13); + QROUND( 2, 6, 10, 14); + QROUND( 3, 7, 11, 15); + QROUND( 0, 5, 10, 15); + QROUND( 1, 6, 11, 12); + QROUND( 2, 7, 8, 13); + QROUND( 3, 4, 9, 14); + +#undef QROUND + + } + + /* + * Add initial state back and encode the result in the destination + * buffer. We can dump the AVX2 values "as is" because the non-AVX2 + * code uses a compatible order of values. + */ + for (u = 0; u < 16; u ++) { + _mm256_storeu_si256((__m256i *)&p->buf.d[u << 5], + _mm256_add_epi32(state[u], init[u])); + } + + + p->ptr = 0; +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_prng_get_bytes(prng *p, void *dst, size_t len) { + uint8_t *buf; + + buf = dst; + while (len > 0) { + size_t clen; + + clen = (sizeof p->buf.d) - p->ptr; + if (clen > len) { + clen = len; + } + memcpy(buf, p->buf.d, clen); + buf += clen; + len -= clen; + p->ptr += clen; + if (p->ptr == sizeof p->buf.d) { + PQCLEAN_FALCON1024_AVX2_prng_refill(p); + } + } +} diff --git a/crypto_sign/falcon/falcon-1024/avx2/sign.c b/crypto_sign/falcon/falcon-1024/avx2/sign.c new file mode 100644 index 00000000..8ef93bf8 --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/avx2/sign.c @@ -0,0 +1,1312 @@ +#include "inner.h" + +/* + * Falcon signature generation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +/* =================================================================== */ + +/* + * Compute degree N from logarithm 'logn'. + */ +#define MKN(logn) ((size_t)1 << (logn)) + +/* =================================================================== */ +/* + * Binary case: + * N = 2^logn + * phi = X^N+1 + */ + +/* + * Get the size of the LDL tree for an input with polynomials of size + * 2^logn. The size is expressed in the number of elements. + */ +static inline unsigned +ffLDL_treesize(unsigned logn) { + /* + * For logn = 0 (polynomials are constant), the "tree" is a + * single element. Otherwise, the tree node has size 2^logn, and + * has two child trees for size logn-1 each. Thus, treesize s() + * must fulfill these two relations: + * + * s(0) = 1 + * s(logn) = (2^logn) + 2*s(logn-1) + */ + return (logn + 1) << logn; +} + +/* + * Inner function for ffLDL_fft(). It expects the matrix to be both + * auto-adjoint and quasicyclic; also, it uses the source operands + * as modifiable temporaries. + * + * tmp[] must have room for at least one polynomial. + */ +static void +ffLDL_fft_inner(fpr *tree, + fpr *g0, fpr *g1, unsigned logn, fpr *tmp) { + size_t n, hn; + + n = MKN(logn); + if (n == 1) { + tree[0] = g0[0]; + return; + } + hn = n >> 1; + + /* + * The LDL decomposition yields L (which is written in the tree) + * and the diagonal of D. Since d00 = g0, we just write d11 + * into tmp. + */ + PQCLEAN_FALCON1024_AVX2_poly_LDLmv_fft(tmp, tree, g0, g1, g0, logn); + + /* + * Split d00 (currently in g0) and d11 (currently in tmp). We + * reuse g0 and g1 as temporary storage spaces: + * d00 splits into g1, g1+hn + * d11 splits into g0, g0+hn + */ + PQCLEAN_FALCON1024_AVX2_poly_split_fft(g1, g1 + hn, g0, logn); + PQCLEAN_FALCON1024_AVX2_poly_split_fft(g0, g0 + hn, tmp, logn); + + /* + * Each split result is the first row of a new auto-adjoint + * quasicyclic matrix for the next recursive step. + */ + ffLDL_fft_inner(tree + n, + g1, g1 + hn, logn - 1, tmp); + ffLDL_fft_inner(tree + n + ffLDL_treesize(logn - 1), + g0, g0 + hn, logn - 1, tmp); +} + +/* + * Compute the ffLDL tree of an auto-adjoint matrix G. The matrix + * is provided as three polynomials (FFT representation). + * + * The "tree" array is filled with the computed tree, of size + * (logn+1)*(2^logn) elements (see ffLDL_treesize()). + * + * Input arrays MUST NOT overlap, except possibly the three unmodified + * arrays g00, g01 and g11. tmp[] should have room for at least three + * polynomials of 2^logn elements each. + */ +static void +ffLDL_fft(fpr *tree, const fpr *g00, + const fpr *g01, const fpr *g11, + unsigned logn, fpr *tmp) { + size_t n, hn; + fpr *d00, *d11; + + n = MKN(logn); + if (n == 1) { + tree[0] = g00[0]; + return; + } + hn = n >> 1; + d00 = tmp; + d11 = tmp + n; + tmp += n << 1; + + memcpy(d00, g00, n * sizeof * g00); + PQCLEAN_FALCON1024_AVX2_poly_LDLmv_fft(d11, tree, g00, g01, g11, logn); + + PQCLEAN_FALCON1024_AVX2_poly_split_fft(tmp, tmp + hn, d00, logn); + PQCLEAN_FALCON1024_AVX2_poly_split_fft(d00, d00 + hn, d11, logn); + memcpy(d11, tmp, n * sizeof * tmp); + ffLDL_fft_inner(tree + n, + d11, d11 + hn, logn - 1, tmp); + ffLDL_fft_inner(tree + n + ffLDL_treesize(logn - 1), + d00, d00 + hn, logn - 1, tmp); +} + +/* + * Normalize an ffLDL tree: each leaf of value x is replaced with + * sigma / sqrt(x). + */ +static void +ffLDL_binary_normalize(fpr *tree, unsigned logn) { + /* + * TODO: make an iterative version. + */ + size_t n; + + n = MKN(logn); + if (n == 1) { + /* + * We actually store in the tree leaf the inverse of + * the value mandated by the specification: this + * saves a division both here and in the sampler. + */ + tree[0] = fpr_mul(fpr_sqrt(tree[0]), fpr_inv_sigma); + } else { + ffLDL_binary_normalize(tree + n, logn - 1); + ffLDL_binary_normalize(tree + n + ffLDL_treesize(logn - 1), + logn - 1); + } +} + +/* =================================================================== */ + +/* + * Convert an integer polynomial (with small values) into the + * representation with complex numbers. + */ +static void +smallints_to_fpr(fpr *r, const int8_t *t, unsigned logn) { + size_t n, u; + + n = MKN(logn); + for (u = 0; u < n; u ++) { + r[u] = fpr_of(t[u]); + } +} + +/* + * The expanded private key contains: + * - The B0 matrix (four elements) + * - The ffLDL tree + */ + +static inline size_t +skoff_b00(unsigned logn) { + (void)logn; + return 0; +} + +static inline size_t +skoff_b01(unsigned logn) { + return MKN(logn); +} + +static inline size_t +skoff_b10(unsigned logn) { + return 2 * MKN(logn); +} + +static inline size_t +skoff_b11(unsigned logn) { + return 3 * MKN(logn); +} + +static inline size_t +skoff_tree(unsigned logn) { + return 4 * MKN(logn); +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_expand_privkey(fpr *expanded_key, + const int8_t *f, const int8_t *g, + const int8_t *F, const int8_t *G, + unsigned logn, uint8_t *tmp) { + size_t n; + fpr *rf, *rg, *rF, *rG; + fpr *b00, *b01, *b10, *b11; + fpr *g00, *g01, *g11, *gxx; + fpr *tree; + + n = MKN(logn); + b00 = expanded_key + skoff_b00(logn); + b01 = expanded_key + skoff_b01(logn); + b10 = expanded_key + skoff_b10(logn); + b11 = expanded_key + skoff_b11(logn); + tree = expanded_key + skoff_tree(logn); + + /* + * We load the private key elements directly into the B0 matrix, + * since B0 = [[g, -f], [G, -F]]. + */ + rf = b01; + rg = b00; + rF = b11; + rG = b10; + + smallints_to_fpr(rf, f, logn); + smallints_to_fpr(rg, g, logn); + smallints_to_fpr(rF, F, logn); + smallints_to_fpr(rG, G, logn); + + /* + * Compute the FFT for the key elements, and negate f and F. + */ + PQCLEAN_FALCON1024_AVX2_FFT(rf, logn); + PQCLEAN_FALCON1024_AVX2_FFT(rg, logn); + PQCLEAN_FALCON1024_AVX2_FFT(rF, logn); + PQCLEAN_FALCON1024_AVX2_FFT(rG, logn); + PQCLEAN_FALCON1024_AVX2_poly_neg(rf, logn); + PQCLEAN_FALCON1024_AVX2_poly_neg(rF, logn); + + /* + * The Gram matrix is G = B·B*. Formulas are: + * g00 = b00*adj(b00) + b01*adj(b01) + * g01 = b00*adj(b10) + b01*adj(b11) + * g10 = b10*adj(b00) + b11*adj(b01) + * g11 = b10*adj(b10) + b11*adj(b11) + * + * For historical reasons, this implementation uses + * g00, g01 and g11 (upper triangle). + */ + g00 = (fpr *)tmp; + g01 = g00 + n; + g11 = g01 + n; + gxx = g11 + n; + + memcpy(g00, b00, n * sizeof * b00); + PQCLEAN_FALCON1024_AVX2_poly_mulselfadj_fft(g00, logn); + memcpy(gxx, b01, n * sizeof * b01); + PQCLEAN_FALCON1024_AVX2_poly_mulselfadj_fft(gxx, logn); + PQCLEAN_FALCON1024_AVX2_poly_add(g00, gxx, logn); + + memcpy(g01, b00, n * sizeof * b00); + PQCLEAN_FALCON1024_AVX2_poly_muladj_fft(g01, b10, logn); + memcpy(gxx, b01, n * sizeof * b01); + PQCLEAN_FALCON1024_AVX2_poly_muladj_fft(gxx, b11, logn); + PQCLEAN_FALCON1024_AVX2_poly_add(g01, gxx, logn); + + memcpy(g11, b10, n * sizeof * b10); + PQCLEAN_FALCON1024_AVX2_poly_mulselfadj_fft(g11, logn); + memcpy(gxx, b11, n * sizeof * b11); + PQCLEAN_FALCON1024_AVX2_poly_mulselfadj_fft(gxx, logn); + PQCLEAN_FALCON1024_AVX2_poly_add(g11, gxx, logn); + + /* + * Compute the Falcon tree. + */ + ffLDL_fft(tree, g00, g01, g11, logn, gxx); + + /* + * Normalize tree. + */ + ffLDL_binary_normalize(tree, logn); +} + +typedef int (*samplerZ)(void *ctx, fpr mu, fpr sigma); + +/* + * Perform Fast Fourier Sampling for target vector t. The Gram matrix + * is provided (G = [[g00, g01], [adj(g01), g11]]). The sampled vector + * is written over (t0,t1). The Gram matrix is modified as well. The + * tmp[] buffer must have room for four polynomials. + */ +static void +ffSampling_fft_dyntree(samplerZ samp, void *samp_ctx, + fpr *t0, fpr *t1, + fpr *g00, fpr *g01, fpr *g11, + unsigned logn, fpr *tmp) { + size_t n, hn; + fpr *z0, *z1; + + /* + * Deepest level: the LDL tree leaf value is just g00 (the + * array has length only 1 at this point); we normalize it + * with regards to sigma, then use it for sampling. + */ + if (logn == 0) { + fpr leaf; + + leaf = g00[0]; + leaf = fpr_mul(fpr_sqrt(leaf), fpr_inv_sigma); + t0[0] = fpr_of(samp(samp_ctx, t0[0], leaf)); + t1[0] = fpr_of(samp(samp_ctx, t1[0], leaf)); + return; + } + + n = (size_t)1 << logn; + hn = n >> 1; + + /* + * Decompose G into LDL. We only need d00 (identical to g00), + * d11, and l10; we do that in place. + */ + PQCLEAN_FALCON1024_AVX2_poly_LDL_fft(g00, g01, g11, logn); + + /* + * Split d00 and d11 and expand them into half-size quasi-cyclic + * Gram matrices. We also save l10 in tmp[]. + */ + PQCLEAN_FALCON1024_AVX2_poly_split_fft(tmp, tmp + hn, g00, logn); + memcpy(g00, tmp, n * sizeof * tmp); + PQCLEAN_FALCON1024_AVX2_poly_split_fft(tmp, tmp + hn, g11, logn); + memcpy(g11, tmp, n * sizeof * tmp); + memcpy(tmp, g01, n * sizeof * g01); + memcpy(g01, g00, hn * sizeof * g00); + memcpy(g01 + hn, g11, hn * sizeof * g00); + + /* + * The half-size Gram matrices for the recursive LDL tree + * building are now: + * - left sub-tree: g00, g00+hn, g01 + * - right sub-tree: g11, g11+hn, g01+hn + * l10 is in tmp[]. + */ + + /* + * We split t1 and use the first recursive call on the two + * halves, using the right sub-tree. The result is merged + * back into tmp + 2*n. + */ + z1 = tmp + n; + PQCLEAN_FALCON1024_AVX2_poly_split_fft(z1, z1 + hn, t1, logn); + ffSampling_fft_dyntree(samp, samp_ctx, z1, z1 + hn, + g11, g11 + hn, g01 + hn, logn - 1, z1 + n); + PQCLEAN_FALCON1024_AVX2_poly_merge_fft(tmp + (n << 1), z1, z1 + hn, logn); + + /* + * Compute tb0 = t0 + (t1 - z1) * l10. + * At that point, l10 is in tmp, t1 is unmodified, and z1 is + * in tmp + (n << 1). The buffer in z1 is free. + * + * In the end, z1 is written over t1, and tb0 is in t0. + */ + memcpy(z1, t1, n * sizeof * t1); + PQCLEAN_FALCON1024_AVX2_poly_sub(z1, tmp + (n << 1), logn); + memcpy(t1, tmp + (n << 1), n * sizeof * tmp); + PQCLEAN_FALCON1024_AVX2_poly_mul_fft(tmp, z1, logn); + PQCLEAN_FALCON1024_AVX2_poly_add(t0, tmp, logn); + + /* + * Second recursive invocation, on the split tb0 (currently in t0) + * and the left sub-tree. + */ + z0 = tmp; + PQCLEAN_FALCON1024_AVX2_poly_split_fft(z0, z0 + hn, t0, logn); + ffSampling_fft_dyntree(samp, samp_ctx, z0, z0 + hn, + g00, g00 + hn, g01, logn - 1, z0 + n); + PQCLEAN_FALCON1024_AVX2_poly_merge_fft(t0, z0, z0 + hn, logn); +} + +/* + * Perform Fast Fourier Sampling for target vector t and LDL tree T. + * tmp[] must have size for at least two polynomials of size 2^logn. + */ +static void +ffSampling_fft(samplerZ samp, void *samp_ctx, + fpr *z0, fpr *z1, + const fpr *tree, + const fpr *t0, const fpr *t1, unsigned logn, + fpr *tmp) { + size_t n, hn; + const fpr *tree0, *tree1; + + /* + * When logn == 2, we inline the last two recursion levels. + */ + if (logn == 2) { + fpr w0, w1, w2, w3, sigma; + __m128d ww0, ww1, wa, wb, wc, wd; + __m128d wy0, wy1, wz0, wz1; + __m128d half, invsqrt8, invsqrt2, neghi, neglo; + int si0, si1, si2, si3; + + tree0 = tree + 4; + tree1 = tree + 8; + + half = _mm_set1_pd(0.5); + invsqrt8 = _mm_set1_pd(0.353553390593273762200422181052); + invsqrt2 = _mm_set1_pd(0.707106781186547524400844362105); + neghi = _mm_set_pd(-0.0, 0.0); + neglo = _mm_set_pd(0.0, -0.0); + + /* + * We split t1 into w*, then do the recursive invocation, + * with output in w*. We finally merge back into z1. + */ + ww0 = _mm_loadu_pd(&t1[0].v); + ww1 = _mm_loadu_pd(&t1[2].v); + wa = _mm_unpacklo_pd(ww0, ww1); + wb = _mm_unpackhi_pd(ww0, ww1); + wc = _mm_add_pd(wa, wb); + ww0 = _mm_mul_pd(wc, half); + wc = _mm_sub_pd(wa, wb); + wd = _mm_xor_pd(_mm_permute_pd(wc, 1), neghi); + ww1 = _mm_mul_pd(_mm_add_pd(wc, wd), invsqrt8); + + w2.v = _mm_cvtsd_f64(ww1); + w3.v = _mm_cvtsd_f64(_mm_permute_pd(ww1, 1)); + wa = ww1; + sigma = tree1[3]; + si2 = samp(samp_ctx, w2, sigma); + si3 = samp(samp_ctx, w3, sigma); + ww1 = _mm_set_pd((double)si3, (double)si2); + wa = _mm_sub_pd(wa, ww1); + wb = _mm_loadu_pd(&tree1[0].v); + wc = _mm_mul_pd(wa, wb); + wd = _mm_mul_pd(wa, _mm_permute_pd(wb, 1)); + wa = _mm_unpacklo_pd(wc, wd); + wb = _mm_unpackhi_pd(wc, wd); + ww0 = _mm_add_pd(ww0, _mm_add_pd(wa, _mm_xor_pd(wb, neglo))); + w0.v = _mm_cvtsd_f64(ww0); + w1.v = _mm_cvtsd_f64(_mm_permute_pd(ww0, 1)); + sigma = tree1[2]; + si0 = samp(samp_ctx, w0, sigma); + si1 = samp(samp_ctx, w1, sigma); + ww0 = _mm_set_pd((double)si1, (double)si0); + + wc = _mm_mul_pd( + _mm_set_pd((double)(si2 + si3), (double)(si2 - si3)), + invsqrt2); + wa = _mm_add_pd(ww0, wc); + wb = _mm_sub_pd(ww0, wc); + ww0 = _mm_unpacklo_pd(wa, wb); + ww1 = _mm_unpackhi_pd(wa, wb); + _mm_storeu_pd(&z1[0].v, ww0); + _mm_storeu_pd(&z1[2].v, ww1); + + /* + * Compute tb0 = t0 + (t1 - z1) * L. Value tb0 ends up in w*. + */ + wy0 = _mm_sub_pd(_mm_loadu_pd(&t1[0].v), ww0); + wy1 = _mm_sub_pd(_mm_loadu_pd(&t1[2].v), ww1); + wz0 = _mm_loadu_pd(&tree[0].v); + wz1 = _mm_loadu_pd(&tree[2].v); + ww0 = _mm_sub_pd(_mm_mul_pd(wy0, wz0), _mm_mul_pd(wy1, wz1)); + ww1 = _mm_add_pd(_mm_mul_pd(wy0, wz1), _mm_mul_pd(wy1, wz0)); + ww0 = _mm_add_pd(ww0, _mm_loadu_pd(&t0[0].v)); + ww1 = _mm_add_pd(ww1, _mm_loadu_pd(&t0[2].v)); + + /* + * Second recursive invocation. + */ + wa = _mm_unpacklo_pd(ww0, ww1); + wb = _mm_unpackhi_pd(ww0, ww1); + wc = _mm_add_pd(wa, wb); + ww0 = _mm_mul_pd(wc, half); + wc = _mm_sub_pd(wa, wb); + wd = _mm_xor_pd(_mm_permute_pd(wc, 1), neghi); + ww1 = _mm_mul_pd(_mm_add_pd(wc, wd), invsqrt8); + + w2.v = _mm_cvtsd_f64(ww1); + w3.v = _mm_cvtsd_f64(_mm_permute_pd(ww1, 1)); + wa = ww1; + sigma = tree0[3]; + si2 = samp(samp_ctx, w2, sigma); + si3 = samp(samp_ctx, w3, sigma); + ww1 = _mm_set_pd((double)si3, (double)si2); + wa = _mm_sub_pd(wa, ww1); + wb = _mm_loadu_pd(&tree0[0].v); + wc = _mm_mul_pd(wa, wb); + wd = _mm_mul_pd(wa, _mm_permute_pd(wb, 1)); + wa = _mm_unpacklo_pd(wc, wd); + wb = _mm_unpackhi_pd(wc, wd); + ww0 = _mm_add_pd(ww0, _mm_add_pd(wa, _mm_xor_pd(wb, neglo))); + w0.v = _mm_cvtsd_f64(ww0); + w1.v = _mm_cvtsd_f64(_mm_permute_pd(ww0, 1)); + sigma = tree0[2]; + si0 = samp(samp_ctx, w0, sigma); + si1 = samp(samp_ctx, w1, sigma); + ww0 = _mm_set_pd((double)si1, (double)si0); + + wc = _mm_mul_pd( + _mm_set_pd((double)(si2 + si3), (double)(si2 - si3)), + invsqrt2); + wa = _mm_add_pd(ww0, wc); + wb = _mm_sub_pd(ww0, wc); + ww0 = _mm_unpacklo_pd(wa, wb); + ww1 = _mm_unpackhi_pd(wa, wb); + _mm_storeu_pd(&z0[0].v, ww0); + _mm_storeu_pd(&z0[2].v, ww1); + + return; + } + + /* + * Case logn == 1 is reachable only when using Falcon-2 (the + * smallest size for which Falcon is mathematically defined, but + * of course way too insecure to be of any use). + */ + if (logn == 1) { + fpr x0, x1, y0, y1, sigma; + fpr a_re, a_im, b_re, b_im, c_re, c_im; + + x0 = t1[0]; + x1 = t1[1]; + sigma = tree[3]; + z1[0] = y0 = fpr_of(samp(samp_ctx, x0, sigma)); + z1[1] = y1 = fpr_of(samp(samp_ctx, x1, sigma)); + a_re = fpr_sub(x0, y0); + a_im = fpr_sub(x1, y1); + b_re = tree[0]; + b_im = tree[1]; + c_re = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im)); + c_im = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re)); + x0 = fpr_add(c_re, t0[0]); + x1 = fpr_add(c_im, t0[1]); + sigma = tree[2]; + z0[0] = fpr_of(samp(samp_ctx, x0, sigma)); + z0[1] = fpr_of(samp(samp_ctx, x1, sigma)); + + return; + } + + /* + * Normal end of recursion is for logn == 0. Since the last + * steps of the recursions were inlined in the blocks above + * (when logn == 1 or 2), this case is not reachable, and is + * retained here only for documentation purposes. + + if (logn == 0) { + fpr x0, x1, sigma; + + x0 = t0[0]; + x1 = t1[0]; + sigma = tree[0]; + z0[0] = fpr_of(samp(samp_ctx, x0, sigma)); + z1[0] = fpr_of(samp(samp_ctx, x1, sigma)); + return; + } + + */ + + /* + * General recursive case (logn >= 3). + */ + + n = (size_t)1 << logn; + hn = n >> 1; + tree0 = tree + n; + tree1 = tree + n + ffLDL_treesize(logn - 1); + + /* + * We split t1 into z1 (reused as temporary storage), then do + * the recursive invocation, with output in tmp. We finally + * merge back into z1. + */ + PQCLEAN_FALCON1024_AVX2_poly_split_fft(z1, z1 + hn, t1, logn); + ffSampling_fft(samp, samp_ctx, tmp, tmp + hn, + tree1, z1, z1 + hn, logn - 1, tmp + n); + PQCLEAN_FALCON1024_AVX2_poly_merge_fft(z1, tmp, tmp + hn, logn); + + /* + * Compute tb0 = t0 + (t1 - z1) * L. Value tb0 ends up in tmp[]. + */ + memcpy(tmp, t1, n * sizeof * t1); + PQCLEAN_FALCON1024_AVX2_poly_sub(tmp, z1, logn); + PQCLEAN_FALCON1024_AVX2_poly_mul_fft(tmp, tree, logn); + PQCLEAN_FALCON1024_AVX2_poly_add(tmp, t0, logn); + + /* + * Second recursive invocation. + */ + PQCLEAN_FALCON1024_AVX2_poly_split_fft(z0, z0 + hn, tmp, logn); + ffSampling_fft(samp, samp_ctx, tmp, tmp + hn, + tree0, z0, z0 + hn, logn - 1, tmp + n); + PQCLEAN_FALCON1024_AVX2_poly_merge_fft(z0, tmp, tmp + hn, logn); +} + +/* + * Compute a signature: the signature contains two vectors, s1 and s2. + * The s1 vector is not returned. The squared norm of (s1,s2) is + * computed, and if it is short enough, then s2 is returned into the + * s2[] buffer, and 1 is returned; otherwise, s2[] is untouched and 0 is + * returned; the caller should then try again. This function uses an + * expanded key. + * + * tmp[] must have room for at least six polynomials. + */ +static int +do_sign_tree(samplerZ samp, void *samp_ctx, int16_t *s2, + const fpr *expanded_key, + const uint16_t *hm, + unsigned logn, fpr *tmp) { + size_t n, u; + fpr *t0, *t1, *tx, *ty; + const fpr *b00, *b01, *b10, *b11, *tree; + fpr ni; + uint32_t sqn, ng; + int16_t *s1tmp, *s2tmp; + + n = MKN(logn); + t0 = tmp; + t1 = t0 + n; + b00 = expanded_key + skoff_b00(logn); + b01 = expanded_key + skoff_b01(logn); + b10 = expanded_key + skoff_b10(logn); + b11 = expanded_key + skoff_b11(logn); + tree = expanded_key + skoff_tree(logn); + + /* + * Set the target vector to [hm, 0] (hm is the hashed message). + */ + for (u = 0; u < n; u ++) { + t0[u] = fpr_of(hm[u]); + /* This is implicit. + t1[u] = fpr_zero; + */ + } + + /* + * Apply the lattice basis to obtain the real target + * vector (after normalization with regards to modulus). + */ + PQCLEAN_FALCON1024_AVX2_FFT(t0, logn); + ni = fpr_inverse_of_q; + memcpy(t1, t0, n * sizeof * t0); + PQCLEAN_FALCON1024_AVX2_poly_mul_fft(t1, b01, logn); + PQCLEAN_FALCON1024_AVX2_poly_mulconst(t1, fpr_neg(ni), logn); + PQCLEAN_FALCON1024_AVX2_poly_mul_fft(t0, b11, logn); + PQCLEAN_FALCON1024_AVX2_poly_mulconst(t0, ni, logn); + + tx = t1 + n; + ty = tx + n; + + /* + * Apply sampling. Output is written back in [tx, ty]. + */ + ffSampling_fft(samp, samp_ctx, tx, ty, tree, t0, t1, logn, ty + n); + + /* + * Get the lattice point corresponding to that tiny vector. + */ + memcpy(t0, tx, n * sizeof * tx); + memcpy(t1, ty, n * sizeof * ty); + PQCLEAN_FALCON1024_AVX2_poly_mul_fft(tx, b00, logn); + PQCLEAN_FALCON1024_AVX2_poly_mul_fft(ty, b10, logn); + PQCLEAN_FALCON1024_AVX2_poly_add(tx, ty, logn); + memcpy(ty, t0, n * sizeof * t0); + PQCLEAN_FALCON1024_AVX2_poly_mul_fft(ty, b01, logn); + + memcpy(t0, tx, n * sizeof * tx); + PQCLEAN_FALCON1024_AVX2_poly_mul_fft(t1, b11, logn); + PQCLEAN_FALCON1024_AVX2_poly_add(t1, ty, logn); + + PQCLEAN_FALCON1024_AVX2_iFFT(t0, logn); + PQCLEAN_FALCON1024_AVX2_iFFT(t1, logn); + + /* + * Compute the signature. + */ + s1tmp = (int16_t *)tx; + sqn = 0; + ng = 0; + for (u = 0; u < n; u ++) { + int32_t z; + + z = (int32_t)hm[u] - (int32_t)fpr_rint(t0[u]); + sqn += (uint32_t)(z * z); + ng |= sqn; + s1tmp[u] = (int16_t)z; + } + sqn |= -(ng >> 31); + + /* + * With "normal" degrees (e.g. 512 or 1024), it is very + * improbable that the computed vector is not short enough; + * however, it may happen in practice for the very reduced + * versions (e.g. degree 16 or below). In that case, the caller + * will loop, and we must not write anything into s2[] because + * s2[] may overlap with the hashed message hm[] and we need + * hm[] for the next iteration. + */ + s2tmp = (int16_t *)tmp; + for (u = 0; u < n; u ++) { + s2tmp[u] = (int16_t) - fpr_rint(t1[u]); + } + if (PQCLEAN_FALCON1024_AVX2_is_short_half(sqn, s2tmp, logn)) { + memcpy(s2, s2tmp, n * sizeof * s2); + memcpy(tmp, s1tmp, n * sizeof * s1tmp); + return 1; + } + return 0; +} + +/* + * Compute a signature: the signature contains two vectors, s1 and s2. + * The s1 vector is not returned. The squared norm of (s1,s2) is + * computed, and if it is short enough, then s2 is returned into the + * s2[] buffer, and 1 is returned; otherwise, s2[] is untouched and 0 is + * returned; the caller should then try again. + * + * tmp[] must have room for at least nine polynomials. + */ +static int +do_sign_dyn(samplerZ samp, void *samp_ctx, int16_t *s2, + const int8_t *f, const int8_t *g, + const int8_t *F, const int8_t *G, + const uint16_t *hm, unsigned logn, fpr *tmp) { + size_t n, u; + fpr *t0, *t1, *tx, *ty; + fpr *b00, *b01, *b10, *b11, *g00, *g01, *g11; + fpr ni; + uint32_t sqn, ng; + int16_t *s1tmp, *s2tmp; + + n = MKN(logn); + + /* + * Lattice basis is B = [[g, -f], [G, -F]]. We convert it to FFT. + */ + b00 = tmp; + b01 = b00 + n; + b10 = b01 + n; + b11 = b10 + n; + smallints_to_fpr(b01, f, logn); + smallints_to_fpr(b00, g, logn); + smallints_to_fpr(b11, F, logn); + smallints_to_fpr(b10, G, logn); + PQCLEAN_FALCON1024_AVX2_FFT(b01, logn); + PQCLEAN_FALCON1024_AVX2_FFT(b00, logn); + PQCLEAN_FALCON1024_AVX2_FFT(b11, logn); + PQCLEAN_FALCON1024_AVX2_FFT(b10, logn); + PQCLEAN_FALCON1024_AVX2_poly_neg(b01, logn); + PQCLEAN_FALCON1024_AVX2_poly_neg(b11, logn); + + /* + * Compute the Gram matrix G = B·B*. Formulas are: + * g00 = b00*adj(b00) + b01*adj(b01) + * g01 = b00*adj(b10) + b01*adj(b11) + * g10 = b10*adj(b00) + b11*adj(b01) + * g11 = b10*adj(b10) + b11*adj(b11) + * + * For historical reasons, this implementation uses + * g00, g01 and g11 (upper triangle). g10 is not kept + * since it is equal to adj(g01). + * + * We _replace_ the matrix B with the Gram matrix, but we + * must keep b01 and b11 for computing the target vector. + */ + t0 = b11 + n; + t1 = t0 + n; + + memcpy(t0, b01, n * sizeof * b01); + PQCLEAN_FALCON1024_AVX2_poly_mulselfadj_fft(t0, logn); // t0 <- b01*adj(b01) + + memcpy(t1, b00, n * sizeof * b00); + PQCLEAN_FALCON1024_AVX2_poly_muladj_fft(t1, b10, logn); // t1 <- b00*adj(b10) + PQCLEAN_FALCON1024_AVX2_poly_mulselfadj_fft(b00, logn); // b00 <- b00*adj(b00) + PQCLEAN_FALCON1024_AVX2_poly_add(b00, t0, logn); // b00 <- g00 + memcpy(t0, b01, n * sizeof * b01); + PQCLEAN_FALCON1024_AVX2_poly_muladj_fft(b01, b11, logn); // b01 <- b01*adj(b11) + PQCLEAN_FALCON1024_AVX2_poly_add(b01, t1, logn); // b01 <- g01 + + PQCLEAN_FALCON1024_AVX2_poly_mulselfadj_fft(b10, logn); // b10 <- b10*adj(b10) + memcpy(t1, b11, n * sizeof * b11); + PQCLEAN_FALCON1024_AVX2_poly_mulselfadj_fft(t1, logn); // t1 <- b11*adj(b11) + PQCLEAN_FALCON1024_AVX2_poly_add(b10, t1, logn); // b10 <- g11 + + /* + * We rename variables to make things clearer. The three elements + * of the Gram matrix uses the first 3*n slots of tmp[], followed + * by b11 and b01 (in that order). + */ + g00 = b00; + g01 = b01; + g11 = b10; + b01 = t0; + t0 = b01 + n; + t1 = t0 + n; + + /* + * Memory layout at that point: + * g00 g01 g11 b11 b01 t0 t1 + */ + + /* + * Set the target vector to [hm, 0] (hm is the hashed message). + */ + for (u = 0; u < n; u ++) { + t0[u] = fpr_of(hm[u]); + /* This is implicit. + t1[u] = fpr_zero; + */ + } + + /* + * Apply the lattice basis to obtain the real target + * vector (after normalization with regards to modulus). + */ + PQCLEAN_FALCON1024_AVX2_FFT(t0, logn); + ni = fpr_inverse_of_q; + memcpy(t1, t0, n * sizeof * t0); + PQCLEAN_FALCON1024_AVX2_poly_mul_fft(t1, b01, logn); + PQCLEAN_FALCON1024_AVX2_poly_mulconst(t1, fpr_neg(ni), logn); + PQCLEAN_FALCON1024_AVX2_poly_mul_fft(t0, b11, logn); + PQCLEAN_FALCON1024_AVX2_poly_mulconst(t0, ni, logn); + + /* + * b01 and b11 can be discarded, so we move back (t0,t1). + * Memory layout is now: + * g00 g01 g11 t0 t1 + */ + memcpy(b11, t0, n * 2 * sizeof * t0); + t0 = g11 + n; + t1 = t0 + n; + + /* + * Apply sampling; result is written over (t0,t1). + */ + ffSampling_fft_dyntree(samp, samp_ctx, + t0, t1, g00, g01, g11, logn, t1 + n); + + /* + * We arrange the layout back to: + * b00 b01 b10 b11 t0 t1 + * + * We did not conserve the matrix basis, so we must recompute + * it now. + */ + b00 = tmp; + b01 = b00 + n; + b10 = b01 + n; + b11 = b10 + n; + memmove(b11 + n, t0, n * 2 * sizeof * t0); + t0 = b11 + n; + t1 = t0 + n; + smallints_to_fpr(b01, f, logn); + smallints_to_fpr(b00, g, logn); + smallints_to_fpr(b11, F, logn); + smallints_to_fpr(b10, G, logn); + PQCLEAN_FALCON1024_AVX2_FFT(b01, logn); + PQCLEAN_FALCON1024_AVX2_FFT(b00, logn); + PQCLEAN_FALCON1024_AVX2_FFT(b11, logn); + PQCLEAN_FALCON1024_AVX2_FFT(b10, logn); + PQCLEAN_FALCON1024_AVX2_poly_neg(b01, logn); + PQCLEAN_FALCON1024_AVX2_poly_neg(b11, logn); + tx = t1 + n; + ty = tx + n; + + /* + * Get the lattice point corresponding to that tiny vector. + */ + memcpy(tx, t0, n * sizeof * t0); + memcpy(ty, t1, n * sizeof * t1); + PQCLEAN_FALCON1024_AVX2_poly_mul_fft(tx, b00, logn); + PQCLEAN_FALCON1024_AVX2_poly_mul_fft(ty, b10, logn); + PQCLEAN_FALCON1024_AVX2_poly_add(tx, ty, logn); + memcpy(ty, t0, n * sizeof * t0); + PQCLEAN_FALCON1024_AVX2_poly_mul_fft(ty, b01, logn); + + memcpy(t0, tx, n * sizeof * tx); + PQCLEAN_FALCON1024_AVX2_poly_mul_fft(t1, b11, logn); + PQCLEAN_FALCON1024_AVX2_poly_add(t1, ty, logn); + PQCLEAN_FALCON1024_AVX2_iFFT(t0, logn); + PQCLEAN_FALCON1024_AVX2_iFFT(t1, logn); + + s1tmp = (int16_t *)tx; + sqn = 0; + ng = 0; + for (u = 0; u < n; u ++) { + int32_t z; + + z = (int32_t)hm[u] - (int32_t)fpr_rint(t0[u]); + sqn += (uint32_t)(z * z); + ng |= sqn; + s1tmp[u] = (int16_t)z; + } + sqn |= -(ng >> 31); + + /* + * With "normal" degrees (e.g. 512 or 1024), it is very + * improbable that the computed vector is not short enough; + * however, it may happen in practice for the very reduced + * versions (e.g. degree 16 or below). In that case, the caller + * will loop, and we must not write anything into s2[] because + * s2[] may overlap with the hashed message hm[] and we need + * hm[] for the next iteration. + */ + s2tmp = (int16_t *)tmp; + for (u = 0; u < n; u ++) { + s2tmp[u] = (int16_t) - fpr_rint(t1[u]); + } + if (PQCLEAN_FALCON1024_AVX2_is_short_half(sqn, s2tmp, logn)) { + memcpy(s2, s2tmp, n * sizeof * s2); + memcpy(tmp, s1tmp, n * sizeof * s1tmp); + return 1; + } + return 0; +} + +/* + * Sample an integer value along a half-gaussian distribution centered + * on zero and standard deviation 1.8205, with a precision of 72 bits. + */ +int +PQCLEAN_FALCON1024_AVX2_gaussian0_sampler(prng *p) { + + /* + * High words. + */ + static const union { + uint16_t u16[16]; + __m256i ymm[1]; + } rhi15 = { + { + 0x51FB, 0x2A69, 0x113E, 0x0568, + 0x014A, 0x003B, 0x0008, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + } + }; + + static const union { + uint64_t u64[20]; + __m256i ymm[5]; + } rlo57 = { + { + 0x1F42ED3AC391802, 0x12B181F3F7DDB82, + 0x1CDD0934829C1FF, 0x1754377C7994AE4, + 0x1846CAEF33F1F6F, 0x14AC754ED74BD5F, + 0x024DD542B776AE4, 0x1A1FFDC65AD63DA, + 0x01F80D88A7B6428, 0x001C3FDB2040C69, + 0x00012CF24D031FB, 0x00000949F8B091F, + 0x0000003665DA998, 0x00000000EBF6EBB, + 0x0000000002F5D7E, 0x000000000007098, + 0x0000000000000C6, 0x000000000000001, + 0x000000000000000, 0x000000000000000 + } + }; + + uint64_t lo; + unsigned hi; + __m256i xhi, rhi, gthi, eqhi, eqm; + __m256i xlo, gtlo0, gtlo1, gtlo2, gtlo3, gtlo4; + __m128i t, zt; + int r; + + /* + * Get a 72-bit random value and split it into a low part + * (57 bits) and a high part (15 bits) + */ + lo = prng_get_u64(p); + hi = prng_get_u8(p); + hi = (hi << 7) | (unsigned)(lo >> 57); + lo &= 0x1FFFFFFFFFFFFFF; + + /* + * Broadcast the high part and compare it with the relevant + * values. We need both a "greater than" and an "equal" + * comparisons. + */ + xhi = _mm256_broadcastw_epi16(_mm_cvtsi32_si128((int32_t)hi)); + rhi = _mm256_loadu_si256(&rhi15.ymm[0]); + gthi = _mm256_cmpgt_epi16(rhi, xhi); + eqhi = _mm256_cmpeq_epi16(rhi, xhi); + + /* + * The result is the number of 72-bit values (among the list of 19) + * which are greater than the 72-bit random value. We first count + * all non-zero 16-bit elements in the first eight of gthi. Such + * elements have value -1 or 0, so we first negate them. + */ + t = _mm_srli_epi16(_mm256_castsi256_si128(gthi), 15); + zt = _mm_setzero_si128(); + t = _mm_hadd_epi16(t, zt); + t = _mm_hadd_epi16(t, zt); + t = _mm_hadd_epi16(t, zt); + r = _mm_cvtsi128_si32(t); + + /* + * We must look at the low bits for all values for which the + * high bits are an "equal" match; values 8-18 all have the + * same high bits (0). + * On 32-bit systems, 'lo' really is two registers, requiring + * some extra code. + */ + xlo = _mm256_broadcastq_epi64(_mm_cvtsi64_si128(*(int64_t *)&lo)); + gtlo0 = _mm256_cmpgt_epi64(_mm256_loadu_si256(&rlo57.ymm[0]), xlo); + gtlo1 = _mm256_cmpgt_epi64(_mm256_loadu_si256(&rlo57.ymm[1]), xlo); + gtlo2 = _mm256_cmpgt_epi64(_mm256_loadu_si256(&rlo57.ymm[2]), xlo); + gtlo3 = _mm256_cmpgt_epi64(_mm256_loadu_si256(&rlo57.ymm[3]), xlo); + gtlo4 = _mm256_cmpgt_epi64(_mm256_loadu_si256(&rlo57.ymm[4]), xlo); + + /* + * Keep only comparison results that correspond to the non-zero + * elements in eqhi. + */ + gtlo0 = _mm256_and_si256(gtlo0, _mm256_cvtepi16_epi64( + _mm256_castsi256_si128(eqhi))); + gtlo1 = _mm256_and_si256(gtlo1, _mm256_cvtepi16_epi64( + _mm256_castsi256_si128(_mm256_bsrli_epi128(eqhi, 8)))); + eqm = _mm256_permute4x64_epi64(eqhi, 0xFF); + gtlo2 = _mm256_and_si256(gtlo2, eqm); + gtlo3 = _mm256_and_si256(gtlo3, eqm); + gtlo4 = _mm256_and_si256(gtlo4, eqm); + + /* + * Add all values to count the total number of "-1" elements. + * Since the first eight "high" words are all different, only + * one element (at most) in gtlo0:gtlo1 can be non-zero; however, + * if the high word of the random value is zero, then many + * elements of gtlo2:gtlo3:gtlo4 can be non-zero. + */ + gtlo0 = _mm256_or_si256(gtlo0, gtlo1); + gtlo0 = _mm256_add_epi64( + _mm256_add_epi64(gtlo0, gtlo2), + _mm256_add_epi64(gtlo3, gtlo4)); + t = _mm_add_epi64( + _mm256_castsi256_si128(gtlo0), + _mm256_extracti128_si256(gtlo0, 1)); + t = _mm_add_epi64(t, _mm_srli_si128(t, 8)); + r -= _mm_cvtsi128_si32(t); + + return r; + +} + +/* + * Sample a bit with probability exp(-x) for some x >= 0. + */ +static int +BerExp(prng *p, fpr x, fpr ccs) { + int s, i; + fpr r; + uint32_t sw, w; + uint64_t z; + + /* + * Reduce x modulo log(2): x = s*log(2) + r, with s an integer, + * and 0 <= r < log(2). Since x >= 0, we can use fpr_trunc(). + */ + s = (int)fpr_trunc(fpr_mul(x, fpr_inv_log2)); + r = fpr_sub(x, fpr_mul(fpr_of(s), fpr_log2)); + + /* + * It may happen (quite rarely) that s >= 64; if sigma = 1.2 + * (the minimum value for sigma), r = 0 and b = 1, then we get + * s >= 64 if the half-Gaussian produced a z >= 13, which happens + * with probability about 0.000000000230383991, which is + * approximatively equal to 2^(-32). In any case, if s >= 64, + * then BerExp will be non-zero with probability less than + * 2^(-64), so we can simply saturate s at 63. + */ + sw = (uint32_t)s; + sw ^= (sw ^ 63) & -((63 - sw) >> 31); + s = (int)sw; + + /* + * Compute exp(-r); we know that 0 <= r < log(2) at this point, so + * we can use fpr_expm_p63(), which yields a result scaled to 2^63. + * We scale it up to 2^64, then right-shift it by s bits because + * we really want exp(-x) = 2^(-s)*exp(-r). + * + * The "-1" operation makes sure that the value fits on 64 bits + * (i.e. if r = 0, we may get 2^64, and we prefer 2^64-1 in that + * case). The bias is negligible since fpr_expm_p63() only computes + * with 51 bits of precision or so. + */ + z = ((fpr_expm_p63(r, ccs) << 1) - 1) >> s; + + /* + * Sample a bit with probability exp(-x). Since x = s*log(2) + r, + * exp(-x) = 2^-s * exp(-r), we compare lazily exp(-x) with the + * PRNG output to limit its consumption, the sign of the difference + * yields the expected result. + */ + i = 64; + do { + i -= 8; + w = prng_get_u8(p) - ((uint32_t)(z >> i) & 0xFF); + } while (!w && i > 0); + return (int)(w >> 31); +} + +/* + * The sampler produces a random integer that follows a discrete Gaussian + * distribution, centered on mu, and with standard deviation sigma. The + * provided parameter isigma is equal to 1/sigma. + * + * The value of sigma MUST lie between 1 and 2 (i.e. isigma lies between + * 0.5 and 1); in Falcon, sigma should always be between 1.2 and 1.9. + */ +int +PQCLEAN_FALCON1024_AVX2_sampler(void *ctx, fpr mu, fpr isigma) { + sampler_context *spc; + int s, z0, z, b; + fpr r, dss, ccs, x; + + spc = ctx; + + /* + * Center is mu. We compute mu = s + r where s is an integer + * and 0 <= r < 1. + */ + s = (int)fpr_floor(mu); + r = fpr_sub(mu, fpr_of(s)); + + /* + * dss = 1/(2*sigma^2) = 0.5*(isigma^2). + */ + dss = fpr_half(fpr_sqr(isigma)); + + /* + * ccs = sigma_min / sigma = sigma_min * isigma. + */ + ccs = fpr_mul(isigma, spc->sigma_min); + + /* + * We now need to sample on center r. + */ + for (;;) { + /* + * Sample z for a Gaussian distribution. Then get a + * random bit b to turn the sampling into a bimodal + * distribution: if b = 1, we use z+1, otherwise we + * use -z. We thus have two situations: + * + * - b = 1: z >= 1 and sampled against a Gaussian + * centered on 1. + * - b = 0: z <= 0 and sampled against a Gaussian + * centered on 0. + */ + z0 = PQCLEAN_FALCON1024_AVX2_gaussian0_sampler(&spc->p); + b = (int)prng_get_u8(&spc->p) & 1; + z = b + ((b << 1) - 1) * z0; + + /* + * Rejection sampling. We want a Gaussian centered on r; + * but we sampled against a Gaussian centered on b (0 or + * 1). But we know that z is always in the range where + * our sampling distribution is greater than the Gaussian + * distribution, so rejection works. + * + * We got z with distribution: + * G(z) = exp(-((z-b)^2)/(2*sigma0^2)) + * We target distribution: + * S(z) = exp(-((z-r)^2)/(2*sigma^2)) + * Rejection sampling works by keeping the value z with + * probability S(z)/G(z), and starting again otherwise. + * This requires S(z) <= G(z), which is the case here. + * Thus, we simply need to keep our z with probability: + * P = exp(-x) + * where: + * x = ((z-r)^2)/(2*sigma^2) - ((z-b)^2)/(2*sigma0^2) + * + * Here, we scale up the Bernouilli distribution, which + * makes rejection more probable, but makes rejection + * rate sufficiently decorrelated from the Gaussian + * center and standard deviation that the whole sampler + * can be said to be constant-time. + */ + x = fpr_mul(fpr_sqr(fpr_sub(fpr_of(z), r)), dss); + x = fpr_sub(x, fpr_mul(fpr_of(z0 * z0), fpr_inv_2sqrsigma0)); + if (BerExp(&spc->p, x, ccs)) { + /* + * Rejection sampling was centered on r, but the + * actual center is mu = s + r. + */ + return s + z; + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_sign_tree(int16_t *sig, inner_shake256_context *rng, + const fpr *expanded_key, + const uint16_t *hm, unsigned logn, uint8_t *tmp) { + fpr *ftmp; + + ftmp = (fpr *)tmp; + for (;;) { + /* + * Signature produces short vectors s1 and s2. The + * signature is acceptable only if the aggregate vector + * s1,s2 is short; we must use the same bound as the + * verifier. + * + * If the signature is acceptable, then we return only s2 + * (the verifier recomputes s1 from s2, the hashed message, + * and the public key). + */ + sampler_context spc; + samplerZ samp; + void *samp_ctx; + + /* + * Normal sampling. We use a fast PRNG seeded from our + * SHAKE context ('rng'). + */ + if (logn == 10) { + spc.sigma_min = fpr_sigma_min_10; + } else { + spc.sigma_min = fpr_sigma_min_9; + } + PQCLEAN_FALCON1024_AVX2_prng_init(&spc.p, rng); + samp = PQCLEAN_FALCON1024_AVX2_sampler; + samp_ctx = &spc; + + /* + * Do the actual signature. + */ + if (do_sign_tree(samp, samp_ctx, sig, + expanded_key, hm, logn, ftmp)) { + break; + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_sign_dyn(int16_t *sig, inner_shake256_context *rng, + const int8_t *f, const int8_t *g, + const int8_t *F, const int8_t *G, + const uint16_t *hm, unsigned logn, uint8_t *tmp) { + fpr *ftmp; + + ftmp = (fpr *)tmp; + for (;;) { + /* + * Signature produces short vectors s1 and s2. The + * signature is acceptable only if the aggregate vector + * s1,s2 is short; we must use the same bound as the + * verifier. + * + * If the signature is acceptable, then we return only s2 + * (the verifier recomputes s1 from s2, the hashed message, + * and the public key). + */ + sampler_context spc; + samplerZ samp; + void *samp_ctx; + + /* + * Normal sampling. We use a fast PRNG seeded from our + * SHAKE context ('rng'). + */ + if (logn == 10) { + spc.sigma_min = fpr_sigma_min_10; + } else { + spc.sigma_min = fpr_sigma_min_9; + } + PQCLEAN_FALCON1024_AVX2_prng_init(&spc.p, rng); + samp = PQCLEAN_FALCON1024_AVX2_sampler; + samp_ctx = &spc; + + /* + * Do the actual signature. + */ + if (do_sign_dyn(samp, samp_ctx, sig, + f, g, F, G, hm, logn, ftmp)) { + break; + } + } +} diff --git a/crypto_sign/falcon/falcon-1024/avx2/vrfy.c b/crypto_sign/falcon/falcon-1024/avx2/vrfy.c new file mode 100644 index 00000000..cf3e8e65 --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/avx2/vrfy.c @@ -0,0 +1,853 @@ +#include "inner.h" + +/* + * Falcon signature verification. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +/* ===================================================================== */ +/* + * Constants for NTT. + * + * n = 2^logn (2 <= n <= 1024) + * phi = X^n + 1 + * q = 12289 + * q0i = -1/q mod 2^16 + * R = 2^16 mod q + * R2 = 2^32 mod q + */ + +#define Q 12289 +#define Q0I 12287 +#define R 4091 +#define R2 10952 + +/* + * Table for NTT, binary case: + * GMb[x] = R*(g^rev(x)) mod q + * where g = 7 (it is a 2048-th primitive root of 1 modulo q) + * and rev() is the bit-reversal function over 10 bits. + */ +static const uint16_t GMb[] = { + 4091, 7888, 11060, 11208, 6960, 4342, 6275, 9759, + 1591, 6399, 9477, 5266, 586, 5825, 7538, 9710, + 1134, 6407, 1711, 965, 7099, 7674, 3743, 6442, + 10414, 8100, 1885, 1688, 1364, 10329, 10164, 9180, + 12210, 6240, 997, 117, 4783, 4407, 1549, 7072, + 2829, 6458, 4431, 8877, 7144, 2564, 5664, 4042, + 12189, 432, 10751, 1237, 7610, 1534, 3983, 7863, + 2181, 6308, 8720, 6570, 4843, 1690, 14, 3872, + 5569, 9368, 12163, 2019, 7543, 2315, 4673, 7340, + 1553, 1156, 8401, 11389, 1020, 2967, 10772, 7045, + 3316, 11236, 5285, 11578, 10637, 10086, 9493, 6180, + 9277, 6130, 3323, 883, 10469, 489, 1502, 2851, + 11061, 9729, 2742, 12241, 4970, 10481, 10078, 1195, + 730, 1762, 3854, 2030, 5892, 10922, 9020, 5274, + 9179, 3604, 3782, 10206, 3180, 3467, 4668, 2446, + 7613, 9386, 834, 7703, 6836, 3403, 5351, 12276, + 3580, 1739, 10820, 9787, 10209, 4070, 12250, 8525, + 10401, 2749, 7338, 10574, 6040, 943, 9330, 1477, + 6865, 9668, 3585, 6633, 12145, 4063, 3684, 7680, + 8188, 6902, 3533, 9807, 6090, 727, 10099, 7003, + 6945, 1949, 9731, 10559, 6057, 378, 7871, 8763, + 8901, 9229, 8846, 4551, 9589, 11664, 7630, 8821, + 5680, 4956, 6251, 8388, 10156, 8723, 2341, 3159, + 1467, 5460, 8553, 7783, 2649, 2320, 9036, 6188, + 737, 3698, 4699, 5753, 9046, 3687, 16, 914, + 5186, 10531, 4552, 1964, 3509, 8436, 7516, 5381, + 10733, 3281, 7037, 1060, 2895, 7156, 8887, 5357, + 6409, 8197, 2962, 6375, 5064, 6634, 5625, 278, + 932, 10229, 8927, 7642, 351, 9298, 237, 5858, + 7692, 3146, 12126, 7586, 2053, 11285, 3802, 5204, + 4602, 1748, 11300, 340, 3711, 4614, 300, 10993, + 5070, 10049, 11616, 12247, 7421, 10707, 5746, 5654, + 3835, 5553, 1224, 8476, 9237, 3845, 250, 11209, + 4225, 6326, 9680, 12254, 4136, 2778, 692, 8808, + 6410, 6718, 10105, 10418, 3759, 7356, 11361, 8433, + 6437, 3652, 6342, 8978, 5391, 2272, 6476, 7416, + 8418, 10824, 11986, 5733, 876, 7030, 2167, 2436, + 3442, 9217, 8206, 4858, 5964, 2746, 7178, 1434, + 7389, 8879, 10661, 11457, 4220, 1432, 10832, 4328, + 8557, 1867, 9454, 2416, 3816, 9076, 686, 5393, + 2523, 4339, 6115, 619, 937, 2834, 7775, 3279, + 2363, 7488, 6112, 5056, 824, 10204, 11690, 1113, + 2727, 9848, 896, 2028, 5075, 2654, 10464, 7884, + 12169, 5434, 3070, 6400, 9132, 11672, 12153, 4520, + 1273, 9739, 11468, 9937, 10039, 9720, 2262, 9399, + 11192, 315, 4511, 1158, 6061, 6751, 11865, 357, + 7367, 4550, 983, 8534, 8352, 10126, 7530, 9253, + 4367, 5221, 3999, 8777, 3161, 6990, 4130, 11652, + 3374, 11477, 1753, 292, 8681, 2806, 10378, 12188, + 5800, 11811, 3181, 1988, 1024, 9340, 2477, 10928, + 4582, 6750, 3619, 5503, 5233, 2463, 8470, 7650, + 7964, 6395, 1071, 1272, 3474, 11045, 3291, 11344, + 8502, 9478, 9837, 1253, 1857, 6233, 4720, 11561, + 6034, 9817, 3339, 1797, 2879, 6242, 5200, 2114, + 7962, 9353, 11363, 5475, 6084, 9601, 4108, 7323, + 10438, 9471, 1271, 408, 6911, 3079, 360, 8276, + 11535, 9156, 9049, 11539, 850, 8617, 784, 7919, + 8334, 12170, 1846, 10213, 12184, 7827, 11903, 5600, + 9779, 1012, 721, 2784, 6676, 6552, 5348, 4424, + 6816, 8405, 9959, 5150, 2356, 5552, 5267, 1333, + 8801, 9661, 7308, 5788, 4910, 909, 11613, 4395, + 8238, 6686, 4302, 3044, 2285, 12249, 1963, 9216, + 4296, 11918, 695, 4371, 9793, 4884, 2411, 10230, + 2650, 841, 3890, 10231, 7248, 8505, 11196, 6688, + 4059, 6060, 3686, 4722, 11853, 5816, 7058, 6868, + 11137, 7926, 4894, 12284, 4102, 3908, 3610, 6525, + 7938, 7982, 11977, 6755, 537, 4562, 1623, 8227, + 11453, 7544, 906, 11816, 9548, 10858, 9703, 2815, + 11736, 6813, 6979, 819, 8903, 6271, 10843, 348, + 7514, 8339, 6439, 694, 852, 5659, 2781, 3716, + 11589, 3024, 1523, 8659, 4114, 10738, 3303, 5885, + 2978, 7289, 11884, 9123, 9323, 11830, 98, 2526, + 2116, 4131, 11407, 1844, 3645, 3916, 8133, 2224, + 10871, 8092, 9651, 5989, 7140, 8480, 1670, 159, + 10923, 4918, 128, 7312, 725, 9157, 5006, 6393, + 3494, 6043, 10972, 6181, 11838, 3423, 10514, 7668, + 3693, 6658, 6905, 11953, 10212, 11922, 9101, 8365, + 5110, 45, 2400, 1921, 4377, 2720, 1695, 51, + 2808, 650, 1896, 9997, 9971, 11980, 8098, 4833, + 4135, 4257, 5838, 4765, 10985, 11532, 590, 12198, + 482, 12173, 2006, 7064, 10018, 3912, 12016, 10519, + 11362, 6954, 2210, 284, 5413, 6601, 3865, 10339, + 11188, 6231, 517, 9564, 11281, 3863, 1210, 4604, + 8160, 11447, 153, 7204, 5763, 5089, 9248, 12154, + 11748, 1354, 6672, 179, 5532, 2646, 5941, 12185, + 862, 3158, 477, 7279, 5678, 7914, 4254, 302, + 2893, 10114, 6890, 9560, 9647, 11905, 4098, 9824, + 10269, 1353, 10715, 5325, 6254, 3951, 1807, 6449, + 5159, 1308, 8315, 3404, 1877, 1231, 112, 6398, + 11724, 12272, 7286, 1459, 12274, 9896, 3456, 800, + 1397, 10678, 103, 7420, 7976, 936, 764, 632, + 7996, 8223, 8445, 7758, 10870, 9571, 2508, 1946, + 6524, 10158, 1044, 4338, 2457, 3641, 1659, 4139, + 4688, 9733, 11148, 3946, 2082, 5261, 2036, 11850, + 7636, 12236, 5366, 2380, 1399, 7720, 2100, 3217, + 10912, 8898, 7578, 11995, 2791, 1215, 3355, 2711, + 2267, 2004, 8568, 10176, 3214, 2337, 1750, 4729, + 4997, 7415, 6315, 12044, 4374, 7157, 4844, 211, + 8003, 10159, 9290, 11481, 1735, 2336, 5793, 9875, + 8192, 986, 7527, 1401, 870, 3615, 8465, 2756, + 9770, 2034, 10168, 3264, 6132, 54, 2880, 4763, + 11805, 3074, 8286, 9428, 4881, 6933, 1090, 10038, + 2567, 708, 893, 6465, 4962, 10024, 2090, 5718, + 10743, 780, 4733, 4623, 2134, 2087, 4802, 884, + 5372, 5795, 5938, 4333, 6559, 7549, 5269, 10664, + 4252, 3260, 5917, 10814, 5768, 9983, 8096, 7791, + 6800, 7491, 6272, 1907, 10947, 6289, 11803, 6032, + 11449, 1171, 9201, 7933, 2479, 7970, 11337, 7062, + 8911, 6728, 6542, 8114, 8828, 6595, 3545, 4348, + 4610, 2205, 6999, 8106, 5560, 10390, 9321, 2499, + 2413, 7272, 6881, 10582, 9308, 9437, 3554, 3326, + 5991, 11969, 3415, 12283, 9838, 12063, 4332, 7830, + 11329, 6605, 12271, 2044, 11611, 7353, 11201, 11582, + 3733, 8943, 9978, 1627, 7168, 3935, 5050, 2762, + 7496, 10383, 755, 1654, 12053, 4952, 10134, 4394, + 6592, 7898, 7497, 8904, 12029, 3581, 10748, 5674, + 10358, 4901, 7414, 8771, 710, 6764, 8462, 7193, + 5371, 7274, 11084, 290, 7864, 6827, 11822, 2509, + 6578, 4026, 5807, 1458, 5721, 5762, 4178, 2105, + 11621, 4852, 8897, 2856, 11510, 9264, 2520, 8776, + 7011, 2647, 1898, 7039, 5950, 11163, 5488, 6277, + 9182, 11456, 633, 10046, 11554, 5633, 9587, 2333, + 7008, 7084, 5047, 7199, 9865, 8997, 569, 6390, + 10845, 9679, 8268, 11472, 4203, 1997, 2, 9331, + 162, 6182, 2000, 3649, 9792, 6363, 7557, 6187, + 8510, 9935, 5536, 9019, 3706, 12009, 1452, 3067, + 5494, 9692, 4865, 6019, 7106, 9610, 4588, 10165, + 6261, 5887, 2652, 10172, 1580, 10379, 4638, 9949 +}; + +/* + * Table for inverse NTT, binary case: + * iGMb[x] = R*((1/g)^rev(x)) mod q + * Since g = 7, 1/g = 8778 mod 12289. + */ +static const uint16_t iGMb[] = { + 4091, 4401, 1081, 1229, 2530, 6014, 7947, 5329, + 2579, 4751, 6464, 11703, 7023, 2812, 5890, 10698, + 3109, 2125, 1960, 10925, 10601, 10404, 4189, 1875, + 5847, 8546, 4615, 5190, 11324, 10578, 5882, 11155, + 8417, 12275, 10599, 7446, 5719, 3569, 5981, 10108, + 4426, 8306, 10755, 4679, 11052, 1538, 11857, 100, + 8247, 6625, 9725, 5145, 3412, 7858, 5831, 9460, + 5217, 10740, 7882, 7506, 12172, 11292, 6049, 79, + 13, 6938, 8886, 5453, 4586, 11455, 2903, 4676, + 9843, 7621, 8822, 9109, 2083, 8507, 8685, 3110, + 7015, 3269, 1367, 6397, 10259, 8435, 10527, 11559, + 11094, 2211, 1808, 7319, 48, 9547, 2560, 1228, + 9438, 10787, 11800, 1820, 11406, 8966, 6159, 3012, + 6109, 2796, 2203, 1652, 711, 7004, 1053, 8973, + 5244, 1517, 9322, 11269, 900, 3888, 11133, 10736, + 4949, 7616, 9974, 4746, 10270, 126, 2921, 6720, + 6635, 6543, 1582, 4868, 42, 673, 2240, 7219, + 1296, 11989, 7675, 8578, 11949, 989, 10541, 7687, + 7085, 8487, 1004, 10236, 4703, 163, 9143, 4597, + 6431, 12052, 2991, 11938, 4647, 3362, 2060, 11357, + 12011, 6664, 5655, 7225, 5914, 9327, 4092, 5880, + 6932, 3402, 5133, 9394, 11229, 5252, 9008, 1556, + 6908, 4773, 3853, 8780, 10325, 7737, 1758, 7103, + 11375, 12273, 8602, 3243, 6536, 7590, 8591, 11552, + 6101, 3253, 9969, 9640, 4506, 3736, 6829, 10822, + 9130, 9948, 3566, 2133, 3901, 6038, 7333, 6609, + 3468, 4659, 625, 2700, 7738, 3443, 3060, 3388, + 3526, 4418, 11911, 6232, 1730, 2558, 10340, 5344, + 5286, 2190, 11562, 6199, 2482, 8756, 5387, 4101, + 4609, 8605, 8226, 144, 5656, 8704, 2621, 5424, + 10812, 2959, 11346, 6249, 1715, 4951, 9540, 1888, + 3764, 39, 8219, 2080, 2502, 1469, 10550, 8709, + 5601, 1093, 3784, 5041, 2058, 8399, 11448, 9639, + 2059, 9878, 7405, 2496, 7918, 11594, 371, 7993, + 3073, 10326, 40, 10004, 9245, 7987, 5603, 4051, + 7894, 676, 11380, 7379, 6501, 4981, 2628, 3488, + 10956, 7022, 6737, 9933, 7139, 2330, 3884, 5473, + 7865, 6941, 5737, 5613, 9505, 11568, 11277, 2510, + 6689, 386, 4462, 105, 2076, 10443, 119, 3955, + 4370, 11505, 3672, 11439, 750, 3240, 3133, 754, + 4013, 11929, 9210, 5378, 11881, 11018, 2818, 1851, + 4966, 8181, 2688, 6205, 6814, 926, 2936, 4327, + 10175, 7089, 6047, 9410, 10492, 8950, 2472, 6255, + 728, 7569, 6056, 10432, 11036, 2452, 2811, 3787, + 945, 8998, 1244, 8815, 11017, 11218, 5894, 4325, + 4639, 3819, 9826, 7056, 6786, 8670, 5539, 7707, + 1361, 9812, 2949, 11265, 10301, 9108, 478, 6489, + 101, 1911, 9483, 3608, 11997, 10536, 812, 8915, + 637, 8159, 5299, 9128, 3512, 8290, 7068, 7922, + 3036, 4759, 2163, 3937, 3755, 11306, 7739, 4922, + 11932, 424, 5538, 6228, 11131, 7778, 11974, 1097, + 2890, 10027, 2569, 2250, 2352, 821, 2550, 11016, + 7769, 136, 617, 3157, 5889, 9219, 6855, 120, + 4405, 1825, 9635, 7214, 10261, 11393, 2441, 9562, + 11176, 599, 2085, 11465, 7233, 6177, 4801, 9926, + 9010, 4514, 9455, 11352, 11670, 6174, 7950, 9766, + 6896, 11603, 3213, 8473, 9873, 2835, 10422, 3732, + 7961, 1457, 10857, 8069, 832, 1628, 3410, 4900, + 10855, 5111, 9543, 6325, 7431, 4083, 3072, 8847, + 9853, 10122, 5259, 11413, 6556, 303, 1465, 3871, + 4873, 5813, 10017, 6898, 3311, 5947, 8637, 5852, + 3856, 928, 4933, 8530, 1871, 2184, 5571, 5879, + 3481, 11597, 9511, 8153, 35, 2609, 5963, 8064, + 1080, 12039, 8444, 3052, 3813, 11065, 6736, 8454, + 2340, 7651, 1910, 10709, 2117, 9637, 6402, 6028, + 2124, 7701, 2679, 5183, 6270, 7424, 2597, 6795, + 9222, 10837, 280, 8583, 3270, 6753, 2354, 3779, + 6102, 4732, 5926, 2497, 8640, 10289, 6107, 12127, + 2958, 12287, 10292, 8086, 817, 4021, 2610, 1444, + 5899, 11720, 3292, 2424, 5090, 7242, 5205, 5281, + 9956, 2702, 6656, 735, 2243, 11656, 833, 3107, + 6012, 6801, 1126, 6339, 5250, 10391, 9642, 5278, + 3513, 9769, 3025, 779, 9433, 3392, 7437, 668, + 10184, 8111, 6527, 6568, 10831, 6482, 8263, 5711, + 9780, 467, 5462, 4425, 11999, 1205, 5015, 6918, + 5096, 3827, 5525, 11579, 3518, 4875, 7388, 1931, + 6615, 1541, 8708, 260, 3385, 4792, 4391, 5697, + 7895, 2155, 7337, 236, 10635, 11534, 1906, 4793, + 9527, 7239, 8354, 5121, 10662, 2311, 3346, 8556, + 707, 1088, 4936, 678, 10245, 18, 5684, 960, + 4459, 7957, 226, 2451, 6, 8874, 320, 6298, + 8963, 8735, 2852, 2981, 1707, 5408, 5017, 9876, + 9790, 2968, 1899, 6729, 4183, 5290, 10084, 7679, + 7941, 8744, 5694, 3461, 4175, 5747, 5561, 3378, + 5227, 952, 4319, 9810, 4356, 3088, 11118, 840, + 6257, 486, 6000, 1342, 10382, 6017, 4798, 5489, + 4498, 4193, 2306, 6521, 1475, 6372, 9029, 8037, + 1625, 7020, 4740, 5730, 7956, 6351, 6494, 6917, + 11405, 7487, 10202, 10155, 7666, 7556, 11509, 1546, + 6571, 10199, 2265, 7327, 5824, 11396, 11581, 9722, + 2251, 11199, 5356, 7408, 2861, 4003, 9215, 484, + 7526, 9409, 12235, 6157, 9025, 2121, 10255, 2519, + 9533, 3824, 8674, 11419, 10888, 4762, 11303, 4097, + 2414, 6496, 9953, 10554, 808, 2999, 2130, 4286, + 12078, 7445, 5132, 7915, 245, 5974, 4874, 7292, + 7560, 10539, 9952, 9075, 2113, 3721, 10285, 10022, + 9578, 8934, 11074, 9498, 294, 4711, 3391, 1377, + 9072, 10189, 4569, 10890, 9909, 6923, 53, 4653, + 439, 10253, 7028, 10207, 8343, 1141, 2556, 7601, + 8150, 10630, 8648, 9832, 7951, 11245, 2131, 5765, + 10343, 9781, 2718, 1419, 4531, 3844, 4066, 4293, + 11657, 11525, 11353, 4313, 4869, 12186, 1611, 10892, + 11489, 8833, 2393, 15, 10830, 5003, 17, 565, + 5891, 12177, 11058, 10412, 8885, 3974, 10981, 7130, + 5840, 10482, 8338, 6035, 6964, 1574, 10936, 2020, + 2465, 8191, 384, 2642, 2729, 5399, 2175, 9396, + 11987, 8035, 4375, 6611, 5010, 11812, 9131, 11427, + 104, 6348, 9643, 6757, 12110, 5617, 10935, 541, + 135, 3041, 7200, 6526, 5085, 12136, 842, 4129, + 7685, 11079, 8426, 1008, 2725, 11772, 6058, 1101, + 1950, 8424, 5688, 6876, 12005, 10079, 5335, 927, + 1770, 273, 8377, 2271, 5225, 10283, 116, 11807, + 91, 11699, 757, 1304, 7524, 6451, 8032, 8154, + 7456, 4191, 309, 2318, 2292, 10393, 11639, 9481, + 12238, 10594, 9569, 7912, 10368, 9889, 12244, 7179, + 3924, 3188, 367, 2077, 336, 5384, 5631, 8596, + 4621, 1775, 8866, 451, 6108, 1317, 6246, 8795, + 5896, 7283, 3132, 11564, 4977, 12161, 7371, 1366, + 12130, 10619, 3809, 5149, 6300, 2638, 4197, 1418, + 10065, 4156, 8373, 8644, 10445, 882, 8158, 10173, + 9763, 12191, 459, 2966, 3166, 405, 5000, 9311, + 6404, 8986, 1551, 8175, 3630, 10766, 9265, 700, + 8573, 9508, 6630, 11437, 11595, 5850, 3950, 4775, + 11941, 1446, 6018, 3386, 11470, 5310, 5476, 553, + 9474, 2586, 1431, 2741, 473, 11383, 4745, 836, + 4062, 10666, 7727, 11752, 5534, 312, 4307, 4351, + 5764, 8679, 8381, 8187, 5, 7395, 4363, 1152, + 5421, 5231, 6473, 436, 7567, 8603, 6229, 8230 +}; + +/* + * Reduce a small signed integer modulo q. The source integer MUST + * be between -q/2 and +q/2. + */ +static inline uint32_t +mq_conv_small(int x) { + /* + * If x < 0, the cast to uint32_t will set the high bit to 1. + */ + uint32_t y; + + y = (uint32_t)x; + y += Q & -(y >> 31); + return y; +} + +/* + * Addition modulo q. Operands must be in the 0..q-1 range. + */ +static inline uint32_t +mq_add(uint32_t x, uint32_t y) { + /* + * We compute x + y - q. If the result is negative, then the + * high bit will be set, and 'd >> 31' will be equal to 1; + * thus '-(d >> 31)' will be an all-one pattern. Otherwise, + * it will be an all-zero pattern. In other words, this + * implements a conditional addition of q. + */ + uint32_t d; + + d = x + y - Q; + d += Q & -(d >> 31); + return d; +} + +/* + * Subtraction modulo q. Operands must be in the 0..q-1 range. + */ +static inline uint32_t +mq_sub(uint32_t x, uint32_t y) { + /* + * As in mq_add(), we use a conditional addition to ensure the + * result is in the 0..q-1 range. + */ + uint32_t d; + + d = x - y; + d += Q & -(d >> 31); + return d; +} + +/* + * Division by 2 modulo q. Operand must be in the 0..q-1 range. + */ +static inline uint32_t +mq_rshift1(uint32_t x) { + x += Q & -(x & 1); + return (x >> 1); +} + +/* + * Montgomery multiplication modulo q. If we set R = 2^16 mod q, then + * this function computes: x * y / R mod q + * Operands must be in the 0..q-1 range. + */ +static inline uint32_t +mq_montymul(uint32_t x, uint32_t y) { + uint32_t z, w; + + /* + * We compute x*y + k*q with a value of k chosen so that the 16 + * low bits of the result are 0. We can then shift the value. + * After the shift, result may still be larger than q, but it + * will be lower than 2*q, so a conditional subtraction works. + */ + + z = x * y; + w = ((z * Q0I) & 0xFFFF) * Q; + + /* + * When adding z and w, the result will have its low 16 bits + * equal to 0. Since x, y and z are lower than q, the sum will + * be no more than (2^15 - 1) * q + (q - 1)^2, which will + * fit on 29 bits. + */ + z = (z + w) >> 16; + + /* + * After the shift, analysis shows that the value will be less + * than 2q. We do a subtraction then conditional subtraction to + * ensure the result is in the expected range. + */ + z -= Q; + z += Q & -(z >> 31); + return z; +} + +/* + * Montgomery squaring (computes (x^2)/R). + */ +static inline uint32_t +mq_montysqr(uint32_t x) { + return mq_montymul(x, x); +} + +/* + * Divide x by y modulo q = 12289. + */ +static inline uint32_t +mq_div_12289(uint32_t x, uint32_t y) { + /* + * We invert y by computing y^(q-2) mod q. + * + * We use the following addition chain for exponent e = 12287: + * + * e0 = 1 + * e1 = 2 * e0 = 2 + * e2 = e1 + e0 = 3 + * e3 = e2 + e1 = 5 + * e4 = 2 * e3 = 10 + * e5 = 2 * e4 = 20 + * e6 = 2 * e5 = 40 + * e7 = 2 * e6 = 80 + * e8 = 2 * e7 = 160 + * e9 = e8 + e2 = 163 + * e10 = e9 + e8 = 323 + * e11 = 2 * e10 = 646 + * e12 = 2 * e11 = 1292 + * e13 = e12 + e9 = 1455 + * e14 = 2 * e13 = 2910 + * e15 = 2 * e14 = 5820 + * e16 = e15 + e10 = 6143 + * e17 = 2 * e16 = 12286 + * e18 = e17 + e0 = 12287 + * + * Additions on exponents are converted to Montgomery + * multiplications. We define all intermediate results as so + * many local variables, and let the C compiler work out which + * must be kept around. + */ + uint32_t y0, y1, y2, y3, y4, y5, y6, y7, y8, y9; + uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18; + + y0 = mq_montymul(y, R2); + y1 = mq_montysqr(y0); + y2 = mq_montymul(y1, y0); + y3 = mq_montymul(y2, y1); + y4 = mq_montysqr(y3); + y5 = mq_montysqr(y4); + y6 = mq_montysqr(y5); + y7 = mq_montysqr(y6); + y8 = mq_montysqr(y7); + y9 = mq_montymul(y8, y2); + y10 = mq_montymul(y9, y8); + y11 = mq_montysqr(y10); + y12 = mq_montysqr(y11); + y13 = mq_montymul(y12, y9); + y14 = mq_montysqr(y13); + y15 = mq_montysqr(y14); + y16 = mq_montymul(y15, y10); + y17 = mq_montysqr(y16); + y18 = mq_montymul(y17, y0); + + /* + * Final multiplication with x, which is not in Montgomery + * representation, computes the correct division result. + */ + return mq_montymul(y18, x); +} + +/* + * Compute NTT on a ring element. + */ +static void +mq_NTT(uint16_t *a, unsigned logn) { + size_t n, t, m; + + n = (size_t)1 << logn; + t = n; + for (m = 1; m < n; m <<= 1) { + size_t ht, i, j1; + + ht = t >> 1; + for (i = 0, j1 = 0; i < m; i ++, j1 += t) { + size_t j, j2; + uint32_t s; + + s = GMb[m + i]; + j2 = j1 + ht; + for (j = j1; j < j2; j ++) { + uint32_t u, v; + + u = a[j]; + v = mq_montymul(a[j + ht], s); + a[j] = (uint16_t)mq_add(u, v); + a[j + ht] = (uint16_t)mq_sub(u, v); + } + } + t = ht; + } +} + +/* + * Compute the inverse NTT on a ring element, binary case. + */ +static void +mq_iNTT(uint16_t *a, unsigned logn) { + size_t n, t, m; + uint32_t ni; + + n = (size_t)1 << logn; + t = 1; + m = n; + while (m > 1) { + size_t hm, dt, i, j1; + + hm = m >> 1; + dt = t << 1; + for (i = 0, j1 = 0; i < hm; i ++, j1 += dt) { + size_t j, j2; + uint32_t s; + + j2 = j1 + t; + s = iGMb[hm + i]; + for (j = j1; j < j2; j ++) { + uint32_t u, v, w; + + u = a[j]; + v = a[j + t]; + a[j] = (uint16_t)mq_add(u, v); + w = mq_sub(u, v); + a[j + t] = (uint16_t) + mq_montymul(w, s); + } + } + t = dt; + m = hm; + } + + /* + * To complete the inverse NTT, we must now divide all values by + * n (the vector size). We thus need the inverse of n, i.e. we + * need to divide 1 by 2 logn times. But we also want it in + * Montgomery representation, i.e. we also want to multiply it + * by R = 2^16. In the common case, this should be a simple right + * shift. The loop below is generic and works also in corner cases; + * its computation time is negligible. + */ + ni = R; + for (m = n; m > 1; m >>= 1) { + ni = mq_rshift1(ni); + } + for (m = 0; m < n; m ++) { + a[m] = (uint16_t)mq_montymul(a[m], ni); + } +} + +/* + * Convert a polynomial (mod q) to Montgomery representation. + */ +static void +mq_poly_tomonty(uint16_t *f, unsigned logn) { + size_t u, n; + + n = (size_t)1 << logn; + for (u = 0; u < n; u ++) { + f[u] = (uint16_t)mq_montymul(f[u], R2); + } +} + +/* + * Multiply two polynomials together (NTT representation, and using + * a Montgomery multiplication). Result f*g is written over f. + */ +static void +mq_poly_montymul_ntt(uint16_t *f, const uint16_t *g, unsigned logn) { + size_t u, n; + + n = (size_t)1 << logn; + for (u = 0; u < n; u ++) { + f[u] = (uint16_t)mq_montymul(f[u], g[u]); + } +} + +/* + * Subtract polynomial g from polynomial f. + */ +static void +mq_poly_sub(uint16_t *f, const uint16_t *g, unsigned logn) { + size_t u, n; + + n = (size_t)1 << logn; + for (u = 0; u < n; u ++) { + f[u] = (uint16_t)mq_sub(f[u], g[u]); + } +} + +/* ===================================================================== */ + +/* see inner.h */ +void +PQCLEAN_FALCON1024_AVX2_to_ntt_monty(uint16_t *h, unsigned logn) { + mq_NTT(h, logn); + mq_poly_tomonty(h, logn); +} + +/* see inner.h */ +int +PQCLEAN_FALCON1024_AVX2_verify_raw(const uint16_t *c0, const int16_t *s2, + const uint16_t *h, unsigned logn, uint8_t *tmp) { + size_t u, n; + uint16_t *tt; + + n = (size_t)1 << logn; + tt = (uint16_t *)tmp; + + /* + * Reduce s2 elements modulo q ([0..q-1] range). + */ + for (u = 0; u < n; u ++) { + uint32_t w; + + w = (uint32_t)s2[u]; + w += Q & -(w >> 31); + tt[u] = (uint16_t)w; + } + + /* + * Compute -s1 = s2*h - c0 mod phi mod q (in tt[]). + */ + mq_NTT(tt, logn); + mq_poly_montymul_ntt(tt, h, logn); + mq_iNTT(tt, logn); + mq_poly_sub(tt, c0, logn); + + /* + * Normalize -s1 elements into the [-q/2..q/2] range. + */ + for (u = 0; u < n; u ++) { + int32_t w; + + w = (int32_t)tt[u]; + w -= (int32_t)(Q & -(((Q >> 1) - (uint32_t)w) >> 31)); + ((int16_t *)tt)[u] = (int16_t)w; + } + + /* + * Signature is valid if and only if the aggregate (-s1,s2) vector + * is short enough. + */ + return PQCLEAN_FALCON1024_AVX2_is_short((int16_t *)tt, s2, logn); +} + +/* see inner.h */ +int +PQCLEAN_FALCON1024_AVX2_compute_public(uint16_t *h, + const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp) { + size_t u, n; + uint16_t *tt; + + n = (size_t)1 << logn; + tt = (uint16_t *)tmp; + for (u = 0; u < n; u ++) { + tt[u] = (uint16_t)mq_conv_small(f[u]); + h[u] = (uint16_t)mq_conv_small(g[u]); + } + mq_NTT(h, logn); + mq_NTT(tt, logn); + for (u = 0; u < n; u ++) { + if (tt[u] == 0) { + return 0; + } + h[u] = (uint16_t)mq_div_12289(h[u], tt[u]); + } + mq_iNTT(h, logn); + return 1; +} + +/* see inner.h */ +int +PQCLEAN_FALCON1024_AVX2_complete_private(int8_t *G, + const int8_t *f, const int8_t *g, const int8_t *F, + unsigned logn, uint8_t *tmp) { + size_t u, n; + uint16_t *t1, *t2; + + n = (size_t)1 << logn; + t1 = (uint16_t *)tmp; + t2 = t1 + n; + for (u = 0; u < n; u ++) { + t1[u] = (uint16_t)mq_conv_small(g[u]); + t2[u] = (uint16_t)mq_conv_small(F[u]); + } + mq_NTT(t1, logn); + mq_NTT(t2, logn); + mq_poly_tomonty(t1, logn); + mq_poly_montymul_ntt(t1, t2, logn); + for (u = 0; u < n; u ++) { + t2[u] = (uint16_t)mq_conv_small(f[u]); + } + mq_NTT(t2, logn); + for (u = 0; u < n; u ++) { + if (t2[u] == 0) { + return 0; + } + t1[u] = (uint16_t)mq_div_12289(t1[u], t2[u]); + } + mq_iNTT(t1, logn); + for (u = 0; u < n; u ++) { + uint32_t w; + int32_t gi; + + w = t1[u]; + w -= (Q & ~ -((w - (Q >> 1)) >> 31)); + gi = *(int32_t *)&w; + if (gi < -127 || gi > +127) { + return 0; + } + G[u] = (int8_t)gi; + } + return 1; +} + +/* see inner.h */ +int +PQCLEAN_FALCON1024_AVX2_is_invertible( + const int16_t *s2, unsigned logn, uint8_t *tmp) { + size_t u, n; + uint16_t *tt; + uint32_t r; + + n = (size_t)1 << logn; + tt = (uint16_t *)tmp; + for (u = 0; u < n; u ++) { + uint32_t w; + + w = (uint32_t)s2[u]; + w += Q & -(w >> 31); + tt[u] = (uint16_t)w; + } + mq_NTT(tt, logn); + r = 0; + for (u = 0; u < n; u ++) { + r |= (uint32_t)(tt[u] - 1); + } + return (int)(1u - (r >> 31)); +} + +/* see inner.h */ +int +PQCLEAN_FALCON1024_AVX2_verify_recover(uint16_t *h, + const uint16_t *c0, const int16_t *s1, const int16_t *s2, + unsigned logn, uint8_t *tmp) { + size_t u, n; + uint16_t *tt; + uint32_t r; + + n = (size_t)1 << logn; + + /* + * Reduce elements of s1 and s2 modulo q; then write s2 into tt[] + * and c0 - s1 into h[]. + */ + tt = (uint16_t *)tmp; + for (u = 0; u < n; u ++) { + uint32_t w; + + w = (uint32_t)s2[u]; + w += Q & -(w >> 31); + tt[u] = (uint16_t)w; + + w = (uint32_t)s1[u]; + w += Q & -(w >> 31); + w = mq_sub(c0[u], w); + h[u] = (uint16_t)w; + } + + /* + * Compute h = (c0 - s1) / s2. If one of the coefficients of s2 + * is zero (in NTT representation) then the operation fails. We + * keep that information into a flag so that we do not deviate + * from strict constant-time processing; if all coefficients of + * s2 are non-zero, then the high bit of r will be zero. + */ + mq_NTT(tt, logn); + mq_NTT(h, logn); + r = 0; + for (u = 0; u < n; u ++) { + r |= (uint32_t)(tt[u] - 1); + h[u] = (uint16_t)mq_div_12289(h[u], tt[u]); + } + mq_iNTT(h, logn); + + /* + * Signature is acceptable if and only if it is short enough, + * and s2 was invertible mod phi mod q. The caller must still + * check that the rebuilt public key matches the expected + * value (e.g. through a hash). + */ + r = ~r & (uint32_t) - PQCLEAN_FALCON1024_AVX2_is_short(s1, s2, logn); + return (int)(r >> 31); +} + +/* see inner.h */ +int +PQCLEAN_FALCON1024_AVX2_count_nttzero(const int16_t *sig, unsigned logn, uint8_t *tmp) { + uint16_t *s2; + size_t u, n; + uint32_t r; + + n = (size_t)1 << logn; + s2 = (uint16_t *)tmp; + for (u = 0; u < n; u ++) { + uint32_t w; + + w = (uint32_t)sig[u]; + w += Q & -(w >> 31); + s2[u] = (uint16_t)w; + } + mq_NTT(s2, logn); + r = 0; + for (u = 0; u < n; u ++) { + uint32_t w; + + w = (uint32_t)s2[u] - 1u; + r += (w >> 31); + } + return (int)r; +} diff --git a/crypto_sign/falcon/falcon-1024/clean/LICENSE b/crypto_sign/falcon/falcon-1024/clean/LICENSE new file mode 100644 index 00000000..12c7b56c --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/clean/LICENSE @@ -0,0 +1,24 @@ +\ +MIT License + +Copyright (c) 2017-2019 Falcon Project + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/crypto_sign/falcon/falcon-1024/clean/Makefile.Microsoft_nmake b/crypto_sign/falcon/falcon-1024/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..15a74498 --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/clean/Makefile.Microsoft_nmake @@ -0,0 +1,23 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libfalcon-1024_clean.lib +OBJECTS=codec.obj common.obj fft.obj fpr.obj keygen.obj pqclean.obj rng.obj sign.obj vrfy.obj + +# Warning C4146 is raised when a unary minus operator is applied to an +# unsigned type; this has nonetheless been standard and portable for as +# long as there has been a C standard, and we do that a lot, especially +# for constant-time computations. Thus, we disable that spurious warning. +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /wd4146 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/falcon/falcon-1024/clean/api.h b/crypto_sign/falcon/falcon-1024/clean/api.h new file mode 100644 index 00000000..7a1c6569 --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/clean/api.h @@ -0,0 +1,80 @@ +#ifndef PQCLEAN_FALCON1024_CLEAN_API_H +#define PQCLEAN_FALCON1024_CLEAN_API_H + +#include +#include + +#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES 2305 +#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES 1793 +#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES 1330 + +#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_ALGNAME "Falcon-1024" + +/* + * Generate a new key pair. Public key goes into pk[], private key in sk[]. + * Key sizes are exact (in bytes): + * public (pk): PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES + * private (sk): PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES + * + * Return value: 0 on success, -1 on error. + */ +int PQCLEAN_FALCON1024_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/* + * Compute a signature on a provided message (m, mlen), with a given + * private key (sk). Signature is written in sig[], with length written + * into *siglen. Signature length is variable; maximum signature length + * (in bytes) is PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES. + * + * sig[], m[] and sk[] may overlap each other arbitrarily. + * + * Return value: 0 on success, -1 on error. + */ +int PQCLEAN_FALCON1024_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/* + * Verify a signature (sig, siglen) on a message (m, mlen) with a given + * public key (pk). + * + * sig[], m[] and pk[] may overlap each other arbitrarily. + * + * Return value: 0 on success, -1 on error. + */ +int PQCLEAN_FALCON1024_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/* + * Compute a signature on a message and pack the signature and message + * into a single object, written into sm[]. The length of that output is + * written in *smlen; that length may be larger than the message length + * (mlen) by up to PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES. + * + * sm[] and m[] may overlap each other arbitrarily; however, sm[] shall + * not overlap with sk[]. + * + * Return value: 0 on success, -1 on error. + */ +int PQCLEAN_FALCON1024_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/* + * Open a signed message object (sm, smlen) and verify the signature; + * on success, the message itself is written into m[] and its length + * into *mlen. The message is shorter than the signed message object, + * but the size difference depends on the signature value; the difference + * may range up to PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES. + * + * m[], sm[] and pk[] may overlap each other arbitrarily. + * + * Return value: 0 on success, -1 on error. + */ +int PQCLEAN_FALCON1024_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/falcon/falcon-1024/clean/codec.c b/crypto_sign/falcon/falcon-1024/clean/codec.c new file mode 100644 index 00000000..c5ab4938 --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/clean/codec.c @@ -0,0 +1,555 @@ +#include "inner.h" + +/* + * Encoding/decoding of keys and signatures. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +/* see inner.h */ +size_t +PQCLEAN_FALCON1024_CLEAN_modq_encode( + void *out, size_t max_out_len, + const uint16_t *x, unsigned logn) { + size_t n, out_len, u; + uint8_t *buf; + uint32_t acc; + int acc_len; + + n = (size_t)1 << logn; + for (u = 0; u < n; u ++) { + if (x[u] >= 12289) { + return 0; + } + } + out_len = ((n * 14) + 7) >> 3; + if (out == NULL) { + return out_len; + } + if (out_len > max_out_len) { + return 0; + } + buf = out; + acc = 0; + acc_len = 0; + for (u = 0; u < n; u ++) { + acc = (acc << 14) | x[u]; + acc_len += 14; + while (acc_len >= 8) { + acc_len -= 8; + *buf ++ = (uint8_t)(acc >> acc_len); + } + } + if (acc_len > 0) { + *buf = (uint8_t)(acc << (8 - acc_len)); + } + return out_len; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON1024_CLEAN_modq_decode( + uint16_t *x, unsigned logn, + const void *in, size_t max_in_len) { + size_t n, in_len, u; + const uint8_t *buf; + uint32_t acc; + int acc_len; + + n = (size_t)1 << logn; + in_len = ((n * 14) + 7) >> 3; + if (in_len > max_in_len) { + return 0; + } + buf = in; + acc = 0; + acc_len = 0; + u = 0; + while (u < n) { + acc = (acc << 8) | (*buf ++); + acc_len += 8; + if (acc_len >= 14) { + unsigned w; + + acc_len -= 14; + w = (acc >> acc_len) & 0x3FFF; + if (w >= 12289) { + return 0; + } + x[u ++] = (uint16_t)w; + } + } + if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { + return 0; + } + return in_len; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON1024_CLEAN_trim_i16_encode( + void *out, size_t max_out_len, + const int16_t *x, unsigned logn, unsigned bits) { + size_t n, u, out_len; + int minv, maxv; + uint8_t *buf; + uint32_t acc, mask; + unsigned acc_len; + + n = (size_t)1 << logn; + maxv = (1 << (bits - 1)) - 1; + minv = -maxv; + for (u = 0; u < n; u ++) { + if (x[u] < minv || x[u] > maxv) { + return 0; + } + } + out_len = ((n * bits) + 7) >> 3; + if (out == NULL) { + return out_len; + } + if (out_len > max_out_len) { + return 0; + } + buf = out; + acc = 0; + acc_len = 0; + mask = ((uint32_t)1 << bits) - 1; + for (u = 0; u < n; u ++) { + acc = (acc << bits) | ((uint16_t)x[u] & mask); + acc_len += bits; + while (acc_len >= 8) { + acc_len -= 8; + *buf ++ = (uint8_t)(acc >> acc_len); + } + } + if (acc_len > 0) { + *buf ++ = (uint8_t)(acc << (8 - acc_len)); + } + return out_len; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON1024_CLEAN_trim_i16_decode( + int16_t *x, unsigned logn, unsigned bits, + const void *in, size_t max_in_len) { + size_t n, in_len; + const uint8_t *buf; + size_t u; + uint32_t acc, mask1, mask2; + unsigned acc_len; + + n = (size_t)1 << logn; + in_len = ((n * bits) + 7) >> 3; + if (in_len > max_in_len) { + return 0; + } + buf = in; + u = 0; + acc = 0; + acc_len = 0; + mask1 = ((uint32_t)1 << bits) - 1; + mask2 = (uint32_t)1 << (bits - 1); + while (u < n) { + acc = (acc << 8) | *buf ++; + acc_len += 8; + while (acc_len >= bits && u < n) { + uint32_t w; + + acc_len -= bits; + w = (acc >> acc_len) & mask1; + w |= -(w & mask2); + if (w == -mask2) { + /* + * The -2^(bits-1) value is forbidden. + */ + return 0; + } + w |= -(w & mask2); + x[u ++] = (int16_t) * (int32_t *)&w; + } + } + if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { + /* + * Extra bits in the last byte must be zero. + */ + return 0; + } + return in_len; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON1024_CLEAN_trim_i8_encode( + void *out, size_t max_out_len, + const int8_t *x, unsigned logn, unsigned bits) { + size_t n, u, out_len; + int minv, maxv; + uint8_t *buf; + uint32_t acc, mask; + unsigned acc_len; + + n = (size_t)1 << logn; + maxv = (1 << (bits - 1)) - 1; + minv = -maxv; + for (u = 0; u < n; u ++) { + if (x[u] < minv || x[u] > maxv) { + return 0; + } + } + out_len = ((n * bits) + 7) >> 3; + if (out == NULL) { + return out_len; + } + if (out_len > max_out_len) { + return 0; + } + buf = out; + acc = 0; + acc_len = 0; + mask = ((uint32_t)1 << bits) - 1; + for (u = 0; u < n; u ++) { + acc = (acc << bits) | ((uint8_t)x[u] & mask); + acc_len += bits; + while (acc_len >= 8) { + acc_len -= 8; + *buf ++ = (uint8_t)(acc >> acc_len); + } + } + if (acc_len > 0) { + *buf ++ = (uint8_t)(acc << (8 - acc_len)); + } + return out_len; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON1024_CLEAN_trim_i8_decode( + int8_t *x, unsigned logn, unsigned bits, + const void *in, size_t max_in_len) { + size_t n, in_len; + const uint8_t *buf; + size_t u; + uint32_t acc, mask1, mask2; + unsigned acc_len; + + n = (size_t)1 << logn; + in_len = ((n * bits) + 7) >> 3; + if (in_len > max_in_len) { + return 0; + } + buf = in; + u = 0; + acc = 0; + acc_len = 0; + mask1 = ((uint32_t)1 << bits) - 1; + mask2 = (uint32_t)1 << (bits - 1); + while (u < n) { + acc = (acc << 8) | *buf ++; + acc_len += 8; + while (acc_len >= bits && u < n) { + uint32_t w; + + acc_len -= bits; + w = (acc >> acc_len) & mask1; + w |= -(w & mask2); + if (w == -mask2) { + /* + * The -2^(bits-1) value is forbidden. + */ + return 0; + } + x[u ++] = (int8_t) * (int32_t *)&w; + } + } + if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { + /* + * Extra bits in the last byte must be zero. + */ + return 0; + } + return in_len; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON1024_CLEAN_comp_encode( + void *out, size_t max_out_len, + const int16_t *x, unsigned logn) { + uint8_t *buf; + size_t n, u, v; + uint32_t acc; + unsigned acc_len; + + n = (size_t)1 << logn; + buf = out; + + /* + * Make sure that all values are within the -2047..+2047 range. + */ + for (u = 0; u < n; u ++) { + if (x[u] < -2047 || x[u] > +2047) { + return 0; + } + } + + acc = 0; + acc_len = 0; + v = 0; + for (u = 0; u < n; u ++) { + int t; + unsigned w; + + /* + * Get sign and absolute value of next integer; push the + * sign bit. + */ + acc <<= 1; + t = x[u]; + if (t < 0) { + t = -t; + acc |= 1; + } + w = (unsigned)t; + + /* + * Push the low 7 bits of the absolute value. + */ + acc <<= 7; + acc |= w & 127u; + w >>= 7; + + /* + * We pushed exactly 8 bits. + */ + acc_len += 8; + + /* + * Push as many zeros as necessary, then a one. Since the + * absolute value is at most 2047, w can only range up to + * 15 at this point, thus we will add at most 16 bits + * here. With the 8 bits above and possibly up to 7 bits + * from previous iterations, we may go up to 31 bits, which + * will fit in the accumulator, which is an uint32_t. + */ + acc <<= (w + 1); + acc |= 1; + acc_len += w + 1; + + /* + * Produce all full bytes. + */ + while (acc_len >= 8) { + acc_len -= 8; + if (buf != NULL) { + if (v >= max_out_len) { + return 0; + } + buf[v] = (uint8_t)(acc >> acc_len); + } + v ++; + } + } + + /* + * Flush remaining bits (if any). + */ + if (acc_len > 0) { + if (buf != NULL) { + if (v >= max_out_len) { + return 0; + } + buf[v] = (uint8_t)(acc << (8 - acc_len)); + } + v ++; + } + + return v; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON1024_CLEAN_comp_decode( + int16_t *x, unsigned logn, + const void *in, size_t max_in_len) { + const uint8_t *buf; + size_t n, u, v; + uint32_t acc; + unsigned acc_len; + + n = (size_t)1 << logn; + buf = in; + acc = 0; + acc_len = 0; + v = 0; + for (u = 0; u < n; u ++) { + unsigned b, s, m; + + /* + * Get next eight bits: sign and low seven bits of the + * absolute value. + */ + if (v >= max_in_len) { + return 0; + } + acc = (acc << 8) | (uint32_t)buf[v ++]; + b = acc >> acc_len; + s = b & 128; + m = b & 127; + + /* + * Get next bits until a 1 is reached. + */ + for (;;) { + if (acc_len == 0) { + if (v >= max_in_len) { + return 0; + } + acc = (acc << 8) | (uint32_t)buf[v ++]; + acc_len = 8; + } + acc_len --; + if (((acc >> acc_len) & 1) != 0) { + break; + } + m += 128; + if (m > 2047) { + return 0; + } + } + x[u] = (int16_t) m; + if (s) { + x[u] = (int16_t) - x[u]; + } + } + return v; +} + +/* + * Key elements and signatures are polynomials with small integer + * coefficients. Here are some statistics gathered over many + * generated key pairs (10000 or more for each degree): + * + * log(n) n max(f,g) std(f,g) max(F,G) std(F,G) + * 1 2 129 56.31 143 60.02 + * 2 4 123 40.93 160 46.52 + * 3 8 97 28.97 159 38.01 + * 4 16 100 21.48 154 32.50 + * 5 32 71 15.41 151 29.36 + * 6 64 59 11.07 138 27.77 + * 7 128 39 7.91 144 27.00 + * 8 256 32 5.63 148 26.61 + * 9 512 22 4.00 137 26.46 + * 10 1024 15 2.84 146 26.41 + * + * We want a compact storage format for private key, and, as part of + * key generation, we are allowed to reject some keys which would + * otherwise be fine (this does not induce any noticeable vulnerability + * as long as we reject only a small proportion of possible keys). + * Hence, we enforce at key generation time maximum values for the + * elements of f, g, F and G, so that their encoding can be expressed + * in fixed-width values. Limits have been chosen so that generated + * keys are almost always within bounds, thus not impacting neither + * security or performance. + * + * IMPORTANT: the code assumes that all coefficients of f, g, F and G + * ultimately fit in the -127..+127 range. Thus, none of the elements + * of max_fg_bits[] and max_FG_bits[] shall be greater than 8. + */ + +const uint8_t PQCLEAN_FALCON1024_CLEAN_max_fg_bits[] = { + 0, /* unused */ + 8, + 8, + 8, + 8, + 8, + 7, + 7, + 6, + 6, + 5 +}; + +const uint8_t PQCLEAN_FALCON1024_CLEAN_max_FG_bits[] = { + 0, /* unused */ + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8 +}; + +/* + * When generating a new key pair, we can always reject keys which + * feature an abnormally large coefficient. This can also be done for + * signatures, albeit with some care: in case the signature process is + * used in a derandomized setup (explicitly seeded with the message and + * private key), we have to follow the specification faithfully, and the + * specification only enforces a limit on the L2 norm of the signature + * vector. The limit on the L2 norm implies that the absolute value of + * a coefficient of the signature cannot be more than the following: + * + * log(n) n max sig coeff (theoretical) + * 1 2 412 + * 2 4 583 + * 3 8 824 + * 4 16 1166 + * 5 32 1649 + * 6 64 2332 + * 7 128 3299 + * 8 256 4665 + * 9 512 6598 + * 10 1024 9331 + * + * However, the largest observed signature coefficients during our + * experiments was 1077 (in absolute value), hence we can assume that, + * with overwhelming probability, signature coefficients will fit + * in -2047..2047, i.e. 12 bits. + */ + +const uint8_t PQCLEAN_FALCON1024_CLEAN_max_sig_bits[] = { + 0, /* unused */ + 10, + 11, + 11, + 12, + 12, + 12, + 12, + 12, + 12, + 12 +}; diff --git a/crypto_sign/falcon/falcon-1024/clean/common.c b/crypto_sign/falcon/falcon-1024/clean/common.c new file mode 100644 index 00000000..2e3005b2 --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/clean/common.c @@ -0,0 +1,294 @@ +#include "inner.h" + +/* + * Support functions for signatures (hash-to-point, norm). + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_hash_to_point_vartime( + inner_shake256_context *sc, + uint16_t *x, unsigned logn) { + /* + * This is the straightforward per-the-spec implementation. It + * is not constant-time, thus it might reveal information on the + * plaintext (at least, enough to check the plaintext against a + * list of potential plaintexts) in a scenario where the + * attacker does not have access to the signature value or to + * the public key, but knows the nonce (without knowledge of the + * nonce, the hashed output cannot be matched against potential + * plaintexts). + */ + size_t n; + + n = (size_t)1 << logn; + while (n > 0) { + uint8_t buf[2]; + uint32_t w; + + inner_shake256_extract(sc, (void *)buf, sizeof buf); + w = ((unsigned)buf[0] << 8) | (unsigned)buf[1]; + if (w < 61445) { + while (w >= 12289) { + w -= 12289; + } + *x ++ = (uint16_t)w; + n --; + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_hash_to_point_ct( + inner_shake256_context *sc, + uint16_t *x, unsigned logn, uint8_t *tmp) { + /* + * Each 16-bit sample is a value in 0..65535. The value is + * kept if it falls in 0..61444 (because 61445 = 5*12289) + * and rejected otherwise; thus, each sample has probability + * about 0.93758 of being selected. + * + * We want to oversample enough to be sure that we will + * have enough values with probability at least 1 - 2^(-256). + * Depending on degree N, this leads to the following + * required oversampling: + * + * logn n oversampling + * 1 2 65 + * 2 4 67 + * 3 8 71 + * 4 16 77 + * 5 32 86 + * 6 64 100 + * 7 128 122 + * 8 256 154 + * 9 512 205 + * 10 1024 287 + * + * If logn >= 7, then the provided temporary buffer is large + * enough. Otherwise, we use a stack buffer of 63 entries + * (i.e. 126 bytes) for the values that do not fit in tmp[]. + */ + + static const uint16_t overtab[] = { + 0, /* unused */ + 65, + 67, + 71, + 77, + 86, + 100, + 122, + 154, + 205, + 287 + }; + + unsigned n, n2, u, m, p, over; + uint16_t *tt1, tt2[63]; + + /* + * We first generate m 16-bit value. Values 0..n-1 go to x[]. + * Values n..2*n-1 go to tt1[]. Values 2*n and later go to tt2[]. + * We also reduce modulo q the values; rejected values are set + * to 0xFFFF. + */ + n = 1U << logn; + n2 = n << 1; + over = overtab[logn]; + m = n + over; + tt1 = (uint16_t *)tmp; + for (u = 0; u < m; u ++) { + uint8_t buf[2]; + uint32_t w, wr; + + inner_shake256_extract(sc, buf, sizeof buf); + w = ((uint32_t)buf[0] << 8) | (uint32_t)buf[1]; + wr = w - ((uint32_t)24578 & (((w - 24578) >> 31) - 1)); + wr = wr - ((uint32_t)24578 & (((wr - 24578) >> 31) - 1)); + wr = wr - ((uint32_t)12289 & (((wr - 12289) >> 31) - 1)); + wr |= ((w - 61445) >> 31) - 1; + if (u < n) { + x[u] = (uint16_t)wr; + } else if (u < n2) { + tt1[u - n] = (uint16_t)wr; + } else { + tt2[u - n2] = (uint16_t)wr; + } + } + + /* + * Now we must "squeeze out" the invalid values. We do this in + * a logarithmic sequence of passes; each pass computes where a + * value should go, and moves it down by 'p' slots if necessary, + * where 'p' uses an increasing powers-of-two scale. It can be + * shown that in all cases where the loop decides that a value + * has to be moved down by p slots, the destination slot is + * "free" (i.e. contains an invalid value). + */ + for (p = 1; p <= over; p <<= 1) { + unsigned v; + + /* + * In the loop below: + * + * - v contains the index of the final destination of + * the value; it is recomputed dynamically based on + * whether values are valid or not. + * + * - u is the index of the value we consider ("source"); + * its address is s. + * + * - The loop may swap the value with the one at index + * u-p. The address of the swap destination is d. + */ + v = 0; + for (u = 0; u < m; u ++) { + uint16_t *s, *d; + unsigned j, sv, dv, mk; + + if (u < n) { + s = &x[u]; + } else if (u < n2) { + s = &tt1[u - n]; + } else { + s = &tt2[u - n2]; + } + sv = *s; + + /* + * The value in sv should ultimately go to + * address v, i.e. jump back by u-v slots. + */ + j = u - v; + + /* + * We increment v for the next iteration, but + * only if the source value is valid. The mask + * 'mk' is -1 if the value is valid, 0 otherwise, + * so we _subtract_ mk. + */ + mk = (sv >> 15) - 1U; + v -= mk; + + /* + * In this loop we consider jumps by p slots; if + * u < p then there is nothing more to do. + */ + if (u < p) { + continue; + } + + /* + * Destination for the swap: value at address u-p. + */ + if ((u - p) < n) { + d = &x[u - p]; + } else if ((u - p) < n2) { + d = &tt1[(u - p) - n]; + } else { + d = &tt2[(u - p) - n2]; + } + dv = *d; + + /* + * The swap should be performed only if the source + * is valid AND the jump j has its 'p' bit set. + */ + mk &= -(((j & p) + 0x1FF) >> 9); + + *s = (uint16_t)(sv ^ (mk & (sv ^ dv))); + *d = (uint16_t)(dv ^ (mk & (sv ^ dv))); + } + } +} + +/* see inner.h */ +int +PQCLEAN_FALCON1024_CLEAN_is_short( + const int16_t *s1, const int16_t *s2, unsigned logn) { + /* + * We use the l2-norm. Code below uses only 32-bit operations to + * compute the square of the norm with saturation to 2^32-1 if + * the value exceeds 2^31-1. + */ + size_t n, u; + uint32_t s, ng; + + n = (size_t)1 << logn; + s = 0; + ng = 0; + for (u = 0; u < n; u ++) { + int32_t z; + + z = s1[u]; + s += (uint32_t)(z * z); + ng |= s; + z = s2[u]; + s += (uint32_t)(z * z); + ng |= s; + } + s |= -(ng >> 31); + + /* + * Acceptance bound on the l2-norm is: + * 1.2*1.55*sqrt(q)*sqrt(2*N) + * Value 7085 is floor((1.2^2)*(1.55^2)*2*1024). + */ + return s < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn)); +} + +/* see inner.h */ +int +PQCLEAN_FALCON1024_CLEAN_is_short_half( + uint32_t sqn, const int16_t *s2, unsigned logn) { + size_t n, u; + uint32_t ng; + + n = (size_t)1 << logn; + ng = -(sqn >> 31); + for (u = 0; u < n; u ++) { + int32_t z; + + z = s2[u]; + sqn += (uint32_t)(z * z); + ng |= sqn; + } + sqn |= -(ng >> 31); + + /* + * Acceptance bound on the l2-norm is: + * 1.2*1.55*sqrt(q)*sqrt(2*N) + * Value 7085 is floor((1.2^2)*(1.55^2)*2*1024). + */ + return sqn < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn)); +} diff --git a/crypto_sign/falcon/falcon-1024/clean/fft.c b/crypto_sign/falcon/falcon-1024/clean/fft.c new file mode 100644 index 00000000..a25bac4e --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/clean/fft.c @@ -0,0 +1,700 @@ +#include "inner.h" + +/* + * FFT code. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +/* + * Rules for complex number macros: + * -------------------------------- + * + * Operand order is: destination, source1, source2... + * + * Each operand is a real and an imaginary part. + * + * All overlaps are allowed. + */ + +/* + * Addition of two complex numbers (d = a + b). + */ +#define FPC_ADD(d_re, d_im, a_re, a_im, b_re, b_im) do { \ + fpr fpct_re, fpct_im; \ + fpct_re = fpr_add(a_re, b_re); \ + fpct_im = fpr_add(a_im, b_im); \ + (d_re) = fpct_re; \ + (d_im) = fpct_im; \ + } while (0) + +/* + * Subtraction of two complex numbers (d = a - b). + */ +#define FPC_SUB(d_re, d_im, a_re, a_im, b_re, b_im) do { \ + fpr fpct_re, fpct_im; \ + fpct_re = fpr_sub(a_re, b_re); \ + fpct_im = fpr_sub(a_im, b_im); \ + (d_re) = fpct_re; \ + (d_im) = fpct_im; \ + } while (0) + +/* + * Multplication of two complex numbers (d = a * b). + */ +#define FPC_MUL(d_re, d_im, a_re, a_im, b_re, b_im) do { \ + fpr fpct_a_re, fpct_a_im; \ + fpr fpct_b_re, fpct_b_im; \ + fpr fpct_d_re, fpct_d_im; \ + fpct_a_re = (a_re); \ + fpct_a_im = (a_im); \ + fpct_b_re = (b_re); \ + fpct_b_im = (b_im); \ + fpct_d_re = fpr_sub( \ + fpr_mul(fpct_a_re, fpct_b_re), \ + fpr_mul(fpct_a_im, fpct_b_im)); \ + fpct_d_im = fpr_add( \ + fpr_mul(fpct_a_re, fpct_b_im), \ + fpr_mul(fpct_a_im, fpct_b_re)); \ + (d_re) = fpct_d_re; \ + (d_im) = fpct_d_im; \ + } while (0) + +/* + * Squaring of a complex number (d = a * a). + */ +#define FPC_SQR(d_re, d_im, a_re, a_im) do { \ + fpr fpct_a_re, fpct_a_im; \ + fpr fpct_d_re, fpct_d_im; \ + fpct_a_re = (a_re); \ + fpct_a_im = (a_im); \ + fpct_d_re = fpr_sub(fpr_sqr(fpct_a_re), fpr_sqr(fpct_a_im)); \ + fpct_d_im = fpr_double(fpr_mul(fpct_a_re, fpct_a_im)); \ + (d_re) = fpct_d_re; \ + (d_im) = fpct_d_im; \ + } while (0) + +/* + * Inversion of a complex number (d = 1 / a). + */ +#define FPC_INV(d_re, d_im, a_re, a_im) do { \ + fpr fpct_a_re, fpct_a_im; \ + fpr fpct_d_re, fpct_d_im; \ + fpr fpct_m; \ + fpct_a_re = (a_re); \ + fpct_a_im = (a_im); \ + fpct_m = fpr_add(fpr_sqr(fpct_a_re), fpr_sqr(fpct_a_im)); \ + fpct_m = fpr_inv(fpct_m); \ + fpct_d_re = fpr_mul(fpct_a_re, fpct_m); \ + fpct_d_im = fpr_mul(fpr_neg(fpct_a_im), fpct_m); \ + (d_re) = fpct_d_re; \ + (d_im) = fpct_d_im; \ + } while (0) + +/* + * Division of complex numbers (d = a / b). + */ +#define FPC_DIV(d_re, d_im, a_re, a_im, b_re, b_im) do { \ + fpr fpct_a_re, fpct_a_im; \ + fpr fpct_b_re, fpct_b_im; \ + fpr fpct_d_re, fpct_d_im; \ + fpr fpct_m; \ + fpct_a_re = (a_re); \ + fpct_a_im = (a_im); \ + fpct_b_re = (b_re); \ + fpct_b_im = (b_im); \ + fpct_m = fpr_add(fpr_sqr(fpct_b_re), fpr_sqr(fpct_b_im)); \ + fpct_m = fpr_inv(fpct_m); \ + fpct_b_re = fpr_mul(fpct_b_re, fpct_m); \ + fpct_b_im = fpr_mul(fpr_neg(fpct_b_im), fpct_m); \ + fpct_d_re = fpr_sub( \ + fpr_mul(fpct_a_re, fpct_b_re), \ + fpr_mul(fpct_a_im, fpct_b_im)); \ + fpct_d_im = fpr_add( \ + fpr_mul(fpct_a_re, fpct_b_im), \ + fpr_mul(fpct_a_im, fpct_b_re)); \ + (d_re) = fpct_d_re; \ + (d_im) = fpct_d_im; \ + } while (0) + +/* + * Let w = exp(i*pi/N); w is a primitive 2N-th root of 1. We define the + * values w_j = w^(2j+1) for all j from 0 to N-1: these are the roots + * of X^N+1 in the field of complex numbers. A crucial property is that + * w_{N-1-j} = conj(w_j) = 1/w_j for all j. + * + * FFT representation of a polynomial f (taken modulo X^N+1) is the + * set of values f(w_j). Since f is real, conj(f(w_j)) = f(conj(w_j)), + * thus f(w_{N-1-j}) = conj(f(w_j)). We thus store only half the values, + * for j = 0 to N/2-1; the other half can be recomputed easily when (if) + * needed. A consequence is that FFT representation has the same size + * as normal representation: N/2 complex numbers use N real numbers (each + * complex number is the combination of a real and an imaginary part). + * + * We use a specific ordering which makes computations easier. Let rev() + * be the bit-reversal function over log(N) bits. For j in 0..N/2-1, we + * store the real and imaginary parts of f(w_j) in slots: + * + * Re(f(w_j)) -> slot rev(j)/2 + * Im(f(w_j)) -> slot rev(j)/2+N/2 + * + * (Note that rev(j) is even for j < N/2.) + */ + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_FFT(fpr *f, unsigned logn) { + /* + * FFT algorithm in bit-reversal order uses the following + * iterative algorithm: + * + * t = N + * for m = 1; m < N; m *= 2: + * ht = t/2 + * for i1 = 0; i1 < m; i1 ++: + * j1 = i1 * t + * s = GM[m + i1] + * for j = j1; j < (j1 + ht); j ++: + * x = f[j] + * y = s * f[j + ht] + * f[j] = x + y + * f[j + ht] = x - y + * t = ht + * + * GM[k] contains w^rev(k) for primitive root w = exp(i*pi/N). + * + * In the description above, f[] is supposed to contain complex + * numbers. In our in-memory representation, the real and + * imaginary parts of f[k] are in array slots k and k+N/2. + * + * We only keep the first half of the complex numbers. We can + * see that after the first iteration, the first and second halves + * of the array of complex numbers have separate lives, so we + * simply ignore the second part. + */ + + unsigned u; + size_t t, n, hn, m; + + /* + * First iteration: compute f[j] + i * f[j+N/2] for all j < N/2 + * (because GM[1] = w^rev(1) = w^(N/2) = i). + * In our chosen representation, this is a no-op: everything is + * already where it should be. + */ + + /* + * Subsequent iterations are truncated to use only the first + * half of values. + */ + n = (size_t)1 << logn; + hn = n >> 1; + t = hn; + for (u = 1, m = 2; u < logn; u ++, m <<= 1) { + size_t ht, hm, i1, j1; + + ht = t >> 1; + hm = m >> 1; + for (i1 = 0, j1 = 0; i1 < hm; i1 ++, j1 += t) { + size_t j, j2; + + j2 = j1 + ht; + fpr s_re, s_im; + + s_re = fpr_gm_tab[((m + i1) << 1) + 0]; + s_im = fpr_gm_tab[((m + i1) << 1) + 1]; + for (j = j1; j < j2; j ++) { + fpr x_re, x_im, y_re, y_im; + + x_re = f[j]; + x_im = f[j + hn]; + y_re = f[j + ht]; + y_im = f[j + ht + hn]; + FPC_MUL(y_re, y_im, y_re, y_im, s_re, s_im); + FPC_ADD(f[j], f[j + hn], + x_re, x_im, y_re, y_im); + FPC_SUB(f[j + ht], f[j + ht + hn], + x_re, x_im, y_re, y_im); + } + } + t = ht; + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_iFFT(fpr *f, unsigned logn) { + /* + * Inverse FFT algorithm in bit-reversal order uses the following + * iterative algorithm: + * + * t = 1 + * for m = N; m > 1; m /= 2: + * hm = m/2 + * dt = t*2 + * for i1 = 0; i1 < hm; i1 ++: + * j1 = i1 * dt + * s = iGM[hm + i1] + * for j = j1; j < (j1 + t); j ++: + * x = f[j] + * y = f[j + t] + * f[j] = x + y + * f[j + t] = s * (x - y) + * t = dt + * for i1 = 0; i1 < N; i1 ++: + * f[i1] = f[i1] / N + * + * iGM[k] contains (1/w)^rev(k) for primitive root w = exp(i*pi/N) + * (actually, iGM[k] = 1/GM[k] = conj(GM[k])). + * + * In the main loop (not counting the final division loop), in + * all iterations except the last, the first and second half of f[] + * (as an array of complex numbers) are separate. In our chosen + * representation, we do not keep the second half. + * + * The last iteration recombines the recomputed half with the + * implicit half, and should yield only real numbers since the + * target polynomial is real; moreover, s = i at that step. + * Thus, when considering x and y: + * y = conj(x) since the final f[j] must be real + * Therefore, f[j] is filled with 2*Re(x), and f[j + t] is + * filled with 2*Im(x). + * But we already have Re(x) and Im(x) in array slots j and j+t + * in our chosen representation. That last iteration is thus a + * simple doubling of the values in all the array. + * + * We make the last iteration a no-op by tweaking the final + * division into a division by N/2, not N. + */ + size_t u, n, hn, t, m; + + n = (size_t)1 << logn; + t = 1; + m = n; + hn = n >> 1; + for (u = logn; u > 1; u --) { + size_t hm, dt, i1, j1; + + hm = m >> 1; + dt = t << 1; + for (i1 = 0, j1 = 0; j1 < hn; i1 ++, j1 += dt) { + size_t j, j2; + + j2 = j1 + t; + fpr s_re, s_im; + + s_re = fpr_gm_tab[((hm + i1) << 1) + 0]; + s_im = fpr_neg(fpr_gm_tab[((hm + i1) << 1) + 1]); + for (j = j1; j < j2; j ++) { + fpr x_re, x_im, y_re, y_im; + + x_re = f[j]; + x_im = f[j + hn]; + y_re = f[j + t]; + y_im = f[j + t + hn]; + FPC_ADD(f[j], f[j + hn], + x_re, x_im, y_re, y_im); + FPC_SUB(x_re, x_im, x_re, x_im, y_re, y_im); + FPC_MUL(f[j + t], f[j + t + hn], + x_re, x_im, s_re, s_im); + } + } + t = dt; + m = hm; + } + + /* + * Last iteration is a no-op, provided that we divide by N/2 + * instead of N. We need to make a special case for logn = 0. + */ + if (logn > 0) { + fpr ni; + + ni = fpr_p2_tab[logn]; + for (u = 0; u < n; u ++) { + f[u] = fpr_mul(f[u], ni); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_poly_add( + fpr *a, const fpr *b, unsigned logn) { + size_t n, u; + + n = (size_t)1 << logn; + for (u = 0; u < n; u ++) { + a[u] = fpr_add(a[u], b[u]); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_poly_sub( + fpr *a, const fpr *b, unsigned logn) { + size_t n, u; + + n = (size_t)1 << logn; + for (u = 0; u < n; u ++) { + a[u] = fpr_sub(a[u], b[u]); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_poly_neg(fpr *a, unsigned logn) { + size_t n, u; + + n = (size_t)1 << logn; + for (u = 0; u < n; u ++) { + a[u] = fpr_neg(a[u]); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_poly_adj_fft(fpr *a, unsigned logn) { + size_t n, u; + + n = (size_t)1 << logn; + for (u = (n >> 1); u < n; u ++) { + a[u] = fpr_neg(a[u]); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_poly_mul_fft( + fpr *a, const fpr *b, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + for (u = 0; u < hn; u ++) { + fpr a_re, a_im, b_re, b_im; + + a_re = a[u]; + a_im = a[u + hn]; + b_re = b[u]; + b_im = b[u + hn]; + FPC_MUL(a[u], a[u + hn], a_re, a_im, b_re, b_im); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_poly_muladj_fft( + fpr *a, const fpr *b, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + for (u = 0; u < hn; u ++) { + fpr a_re, a_im, b_re, b_im; + + a_re = a[u]; + a_im = a[u + hn]; + b_re = b[u]; + b_im = fpr_neg(b[u + hn]); + FPC_MUL(a[u], a[u + hn], a_re, a_im, b_re, b_im); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_poly_mulselfadj_fft(fpr *a, unsigned logn) { + /* + * Since each coefficient is multiplied with its own conjugate, + * the result contains only real values. + */ + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + for (u = 0; u < hn; u ++) { + fpr a_re, a_im; + + a_re = a[u]; + a_im = a[u + hn]; + a[u] = fpr_add(fpr_sqr(a_re), fpr_sqr(a_im)); + a[u + hn] = fpr_zero; + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_poly_mulconst(fpr *a, fpr x, unsigned logn) { + size_t n, u; + + n = (size_t)1 << logn; + for (u = 0; u < n; u ++) { + a[u] = fpr_mul(a[u], x); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_poly_div_fft( + fpr *a, const fpr *b, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + for (u = 0; u < hn; u ++) { + fpr a_re, a_im, b_re, b_im; + + a_re = a[u]; + a_im = a[u + hn]; + b_re = b[u]; + b_im = b[u + hn]; + FPC_DIV(a[u], a[u + hn], a_re, a_im, b_re, b_im); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_poly_invnorm2_fft(fpr *d, + const fpr *a, const fpr *b, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + for (u = 0; u < hn; u ++) { + fpr a_re, a_im; + fpr b_re, b_im; + + a_re = a[u]; + a_im = a[u + hn]; + b_re = b[u]; + b_im = b[u + hn]; + d[u] = fpr_inv(fpr_add( + fpr_add(fpr_sqr(a_re), fpr_sqr(a_im)), + fpr_add(fpr_sqr(b_re), fpr_sqr(b_im)))); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_poly_add_muladj_fft(fpr *d, + const fpr *F, const fpr *G, + const fpr *f, const fpr *g, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + for (u = 0; u < hn; u ++) { + fpr F_re, F_im, G_re, G_im; + fpr f_re, f_im, g_re, g_im; + fpr a_re, a_im, b_re, b_im; + + F_re = F[u]; + F_im = F[u + hn]; + G_re = G[u]; + G_im = G[u + hn]; + f_re = f[u]; + f_im = f[u + hn]; + g_re = g[u]; + g_im = g[u + hn]; + + FPC_MUL(a_re, a_im, F_re, F_im, f_re, fpr_neg(f_im)); + FPC_MUL(b_re, b_im, G_re, G_im, g_re, fpr_neg(g_im)); + d[u] = fpr_add(a_re, b_re); + d[u + hn] = fpr_add(a_im, b_im); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_poly_mul_autoadj_fft( + fpr *a, const fpr *b, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + for (u = 0; u < hn; u ++) { + a[u] = fpr_mul(a[u], b[u]); + a[u + hn] = fpr_mul(a[u + hn], b[u]); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_poly_div_autoadj_fft( + fpr *a, const fpr *b, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + for (u = 0; u < hn; u ++) { + fpr ib; + + ib = fpr_inv(b[u]); + a[u] = fpr_mul(a[u], ib); + a[u + hn] = fpr_mul(a[u + hn], ib); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_poly_LDL_fft( + const fpr *g00, + fpr *g01, fpr *g11, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + for (u = 0; u < hn; u ++) { + fpr g00_re, g00_im, g01_re, g01_im, g11_re, g11_im; + fpr mu_re, mu_im; + + g00_re = g00[u]; + g00_im = g00[u + hn]; + g01_re = g01[u]; + g01_im = g01[u + hn]; + g11_re = g11[u]; + g11_im = g11[u + hn]; + FPC_DIV(mu_re, mu_im, g01_re, g01_im, g00_re, g00_im); + FPC_MUL(g01_re, g01_im, mu_re, mu_im, g01_re, fpr_neg(g01_im)); + FPC_SUB(g11[u], g11[u + hn], g11_re, g11_im, g01_re, g01_im); + g01[u] = mu_re; + g01[u + hn] = fpr_neg(mu_im); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_poly_LDLmv_fft( + fpr *d11, fpr *l10, + const fpr *g00, const fpr *g01, + const fpr *g11, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + for (u = 0; u < hn; u ++) { + fpr g00_re, g00_im, g01_re, g01_im, g11_re, g11_im; + fpr mu_re, mu_im; + + g00_re = g00[u]; + g00_im = g00[u + hn]; + g01_re = g01[u]; + g01_im = g01[u + hn]; + g11_re = g11[u]; + g11_im = g11[u + hn]; + FPC_DIV(mu_re, mu_im, g01_re, g01_im, g00_re, g00_im); + FPC_MUL(g01_re, g01_im, mu_re, mu_im, g01_re, fpr_neg(g01_im)); + FPC_SUB(d11[u], d11[u + hn], g11_re, g11_im, g01_re, g01_im); + l10[u] = mu_re; + l10[u + hn] = fpr_neg(mu_im); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_poly_split_fft( + fpr *f0, fpr *f1, + const fpr *f, unsigned logn) { + /* + * The FFT representation we use is in bit-reversed order + * (element i contains f(w^(rev(i))), where rev() is the + * bit-reversal function over the ring degree. This changes + * indexes with regards to the Falcon specification. + */ + size_t n, hn, qn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + qn = hn >> 1; + + /* + * We process complex values by pairs. For logn = 1, there is only + * one complex value (the other one is the implicit conjugate), + * so we add the two lines below because the loop will be + * skipped. + */ + f0[0] = f[0]; + f1[0] = f[hn]; + + for (u = 0; u < qn; u ++) { + fpr a_re, a_im, b_re, b_im; + fpr t_re, t_im; + + a_re = f[(u << 1) + 0]; + a_im = f[(u << 1) + 0 + hn]; + b_re = f[(u << 1) + 1]; + b_im = f[(u << 1) + 1 + hn]; + + FPC_ADD(t_re, t_im, a_re, a_im, b_re, b_im); + f0[u] = fpr_half(t_re); + f0[u + qn] = fpr_half(t_im); + + FPC_SUB(t_re, t_im, a_re, a_im, b_re, b_im); + FPC_MUL(t_re, t_im, t_re, t_im, + fpr_gm_tab[((u + hn) << 1) + 0], + fpr_neg(fpr_gm_tab[((u + hn) << 1) + 1])); + f1[u] = fpr_half(t_re); + f1[u + qn] = fpr_half(t_im); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_poly_merge_fft( + fpr *f, + const fpr *f0, const fpr *f1, unsigned logn) { + size_t n, hn, qn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + qn = hn >> 1; + + /* + * An extra copy to handle the special case logn = 1. + */ + f[0] = f0[0]; + f[hn] = f1[0]; + + for (u = 0; u < qn; u ++) { + fpr a_re, a_im, b_re, b_im; + fpr t_re, t_im; + + a_re = f0[u]; + a_im = f0[u + qn]; + FPC_MUL(b_re, b_im, f1[u], f1[u + qn], + fpr_gm_tab[((u + hn) << 1) + 0], + fpr_gm_tab[((u + hn) << 1) + 1]); + FPC_ADD(t_re, t_im, a_re, a_im, b_re, b_im); + f[(u << 1) + 0] = t_re; + f[(u << 1) + 0 + hn] = t_im; + FPC_SUB(t_re, t_im, a_re, a_im, b_re, b_im); + f[(u << 1) + 1] = t_re; + f[(u << 1) + 1 + hn] = t_im; + } +} diff --git a/crypto_sign/falcon/falcon-1024/clean/fpr.c b/crypto_sign/falcon/falcon-1024/clean/fpr.c new file mode 100644 index 00000000..091462a7 --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/clean/fpr.c @@ -0,0 +1,1634 @@ +#include "inner.h" + +/* + * Floating-point operations. + * + * This file implements the non-inline functions declared in + * fpr.h, as well as the constants for FFT / iFFT. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + + +/* + * Normalize a provided unsigned integer to the 2^63..2^64-1 range by + * left-shifting it if necessary. The exponent e is adjusted accordingly + * (i.e. if the value was left-shifted by n bits, then n is subtracted + * from e). If source m is 0, then it remains 0, but e is altered. + * Both m and e must be simple variables (no expressions allowed). + */ +#define FPR_NORM64(m, e) do { \ + uint32_t nt; \ + \ + (e) -= 63; \ + \ + nt = (uint32_t)((m) >> 32); \ + nt = (nt | -nt) >> 31; \ + (m) ^= ((m) ^ ((m) << 32)) & ((uint64_t)nt - 1); \ + (e) += (int)(nt << 5); \ + \ + nt = (uint32_t)((m) >> 48); \ + nt = (nt | -nt) >> 31; \ + (m) ^= ((m) ^ ((m) << 16)) & ((uint64_t)nt - 1); \ + (e) += (int)(nt << 4); \ + \ + nt = (uint32_t)((m) >> 56); \ + nt = (nt | -nt) >> 31; \ + (m) ^= ((m) ^ ((m) << 8)) & ((uint64_t)nt - 1); \ + (e) += (int)(nt << 3); \ + \ + nt = (uint32_t)((m) >> 60); \ + nt = (nt | -nt) >> 31; \ + (m) ^= ((m) ^ ((m) << 4)) & ((uint64_t)nt - 1); \ + (e) += (int)(nt << 2); \ + \ + nt = (uint32_t)((m) >> 62); \ + nt = (nt | -nt) >> 31; \ + (m) ^= ((m) ^ ((m) << 2)) & ((uint64_t)nt - 1); \ + (e) += (int)(nt << 1); \ + \ + nt = (uint32_t)((m) >> 63); \ + (m) ^= ((m) ^ ((m) << 1)) & ((uint64_t)nt - 1); \ + (e) += (int)(nt); \ + } while (0) + + +fpr +fpr_scaled(int64_t i, int sc) { + /* + * To convert from int to float, we have to do the following: + * 1. Get the absolute value of the input, and its sign + * 2. Shift right or left the value as appropriate + * 3. Pack the result + * + * We can assume that the source integer is not -2^63. + */ + int s, e; + uint32_t t; + uint64_t m; + + /* + * Extract sign bit. + * We have: -i = 1 + ~i + */ + s = (int)((uint64_t)i >> 63); + i ^= -(int64_t)s; + i += s; + + /* + * For now we suppose that i != 0. + * Otherwise, we set m to i and left-shift it as much as needed + * to get a 1 in the top bit. We can do that in a logarithmic + * number of conditional shifts. + */ + m = (uint64_t)i; + e = 9 + sc; + FPR_NORM64(m, e); + + /* + * Now m is in the 2^63..2^64-1 range. We must divide it by 512; + * if one of the dropped bits is a 1, this should go into the + * "sticky bit". + */ + m |= ((uint32_t)m & 0x1FF) + 0x1FF; + m >>= 9; + + /* + * Corrective action: if i = 0 then all of the above was + * incorrect, and we clamp e and m down to zero. + */ + t = (uint32_t)((uint64_t)(i | -i) >> 63); + m &= -(uint64_t)t; + e &= -(int)t; + + /* + * Assemble back everything. The FPR() function will handle cases + * where e is too low. + */ + return FPR(s, e, m); +} + + + +fpr +fpr_add(fpr x, fpr y) { + uint64_t m, xu, yu, za; + uint32_t cs; + int ex, ey, sx, sy, cc; + + /* + * Make sure that the first operand (x) has the larger absolute + * value. This guarantees that the exponent of y is less than + * or equal to the exponent of x, and, if they are equal, then + * the mantissa of y will not be greater than the mantissa of x. + * + * After this swap, the result will have the sign x, except in + * the following edge case: abs(x) = abs(y), and x and y have + * opposite sign bits; in that case, the result shall be +0 + * even if the sign bit of x is 1. To handle this case properly, + * we do the swap is abs(x) = abs(y) AND the sign of x is 1. + */ + m = ((uint64_t)1 << 63) - 1; + za = (x & m) - (y & m); + cs = (uint32_t)(za >> 63) + | ((1U - (uint32_t)(-za >> 63)) & (uint32_t)(x >> 63)); + m = (x ^ y) & -(uint64_t)cs; + x ^= m; + y ^= m; + + /* + * Extract sign bits, exponents and mantissas. The mantissas are + * scaled up to 2^55..2^56-1, and the exponent is unbiased. If + * an operand is zero, its mantissa is set to 0 at this step, and + * its exponent will be -1078. + */ + ex = (int)(x >> 52); + sx = ex >> 11; + ex &= 0x7FF; + m = (uint64_t)(uint32_t)((ex + 0x7FF) >> 11) << 52; + xu = ((x & (((uint64_t)1 << 52) - 1)) | m) << 3; + ex -= 1078; + ey = (int)(y >> 52); + sy = ey >> 11; + ey &= 0x7FF; + m = (uint64_t)(uint32_t)((ey + 0x7FF) >> 11) << 52; + yu = ((y & (((uint64_t)1 << 52) - 1)) | m) << 3; + ey -= 1078; + + /* + * x has the larger exponent; hence, we only need to right-shift y. + * If the shift count is larger than 59 bits then we clamp the + * value to zero. + */ + cc = ex - ey; + yu &= -(uint64_t)((uint32_t)(cc - 60) >> 31); + cc &= 63; + + /* + * The lowest bit of yu is "sticky". + */ + m = fpr_ulsh(1, cc) - 1; + yu |= (yu & m) + m; + yu = fpr_ursh(yu, cc); + + /* + * If the operands have the same sign, then we add the mantissas; + * otherwise, we subtract the mantissas. + */ + xu += yu - ((yu << 1) & -(uint64_t)(sx ^ sy)); + + /* + * The result may be smaller, or slightly larger. We normalize + * it to the 2^63..2^64-1 range (if xu is zero, then it stays + * at zero). + */ + FPR_NORM64(xu, ex); + + /* + * Scale down the value to 2^54..s^55-1, handling the last bit + * as sticky. + */ + xu |= ((uint32_t)xu & 0x1FF) + 0x1FF; + xu >>= 9; + ex += 9; + + /* + * In general, the result has the sign of x. However, if the + * result is exactly zero, then the following situations may + * be encountered: + * x > 0, y = -x -> result should be +0 + * x < 0, y = -x -> result should be +0 + * x = +0, y = +0 -> result should be +0 + * x = -0, y = +0 -> result should be +0 + * x = +0, y = -0 -> result should be +0 + * x = -0, y = -0 -> result should be -0 + * + * But at the conditional swap step at the start of the + * function, we ensured that if abs(x) = abs(y) and the + * sign of x was 1, then x and y were swapped. Thus, the + * two following cases cannot actually happen: + * x < 0, y = -x + * x = -0, y = +0 + * In all other cases, the sign bit of x is conserved, which + * is what the FPR() function does. The FPR() function also + * properly clamps values to zero when the exponent is too + * low, but does not alter the sign in that case. + */ + return FPR(sx, ex, xu); +} + + + +fpr +fpr_mul(fpr x, fpr y) { + uint64_t xu, yu, w, zu, zv; + uint32_t x0, x1, y0, y1, z0, z1, z2; + int ex, ey, d, e, s; + + /* + * Extract absolute values as scaled unsigned integers. We + * don't extract exponents yet. + */ + xu = (x & (((uint64_t)1 << 52) - 1)) | ((uint64_t)1 << 52); + yu = (y & (((uint64_t)1 << 52) - 1)) | ((uint64_t)1 << 52); + + /* + * We have two 53-bit integers to multiply; we need to split + * each into a lower half and a upper half. Moreover, we + * prefer to have lower halves to be of 25 bits each, for + * reasons explained later on. + */ + x0 = (uint32_t)xu & 0x01FFFFFF; + x1 = (uint32_t)(xu >> 25); + y0 = (uint32_t)yu & 0x01FFFFFF; + y1 = (uint32_t)(yu >> 25); + w = (uint64_t)x0 * (uint64_t)y0; + z0 = (uint32_t)w & 0x01FFFFFF; + z1 = (uint32_t)(w >> 25); + w = (uint64_t)x0 * (uint64_t)y1; + z1 += (uint32_t)w & 0x01FFFFFF; + z2 = (uint32_t)(w >> 25); + w = (uint64_t)x1 * (uint64_t)y0; + z1 += (uint32_t)w & 0x01FFFFFF; + z2 += (uint32_t)(w >> 25); + zu = (uint64_t)x1 * (uint64_t)y1; + z2 += (z1 >> 25); + z1 &= 0x01FFFFFF; + zu += z2; + + /* + * Since xu and yu are both in the 2^52..2^53-1 range, the + * product is in the 2^104..2^106-1 range. We first reassemble + * it and round it into the 2^54..2^56-1 range; the bottom bit + * is made "sticky". Since the low limbs z0 and z1 are 25 bits + * each, we just take the upper part (zu), and consider z0 and + * z1 only for purposes of stickiness. + * (This is the reason why we chose 25-bit limbs above.) + */ + zu |= ((z0 | z1) + 0x01FFFFFF) >> 25; + + /* + * We normalize zu to the 2^54..s^55-1 range: it could be one + * bit too large at this point. This is done with a conditional + * right-shift that takes into account the sticky bit. + */ + zv = (zu >> 1) | (zu & 1); + w = zu >> 55; + zu ^= (zu ^ zv) & -w; + + /* + * Get the aggregate scaling factor: + * + * - Each exponent is biased by 1023. + * + * - Integral mantissas are scaled by 2^52, hence an + * extra 52 bias for each exponent. + * + * - However, we right-shifted z by 50 bits, and then + * by 0 or 1 extra bit (depending on the value of w). + * + * In total, we must add the exponents, then subtract + * 2 * (1023 + 52), then add 50 + w. + */ + ex = (int)((x >> 52) & 0x7FF); + ey = (int)((y >> 52) & 0x7FF); + e = ex + ey - 2100 + (int)w; + + /* + * Sign bit is the XOR of the operand sign bits. + */ + s = (int)((x ^ y) >> 63); + + /* + * Corrective actions for zeros: if either of the operands is + * zero, then the computations above were wrong. Test for zero + * is whether ex or ey is zero. We just have to set the mantissa + * (zu) to zero, the FPR() function will normalize e. + */ + d = ((ex + 0x7FF) & (ey + 0x7FF)) >> 11; + zu &= -(uint64_t)d; + + /* + * FPR() packs the result and applies proper rounding. + */ + return FPR(s, e, zu); +} + + + +fpr +fpr_div(fpr x, fpr y) { + uint64_t xu, yu, q, q2, w; + int i, ex, ey, e, d, s; + + /* + * Extract mantissas of x and y (unsigned). + */ + xu = (x & (((uint64_t)1 << 52) - 1)) | ((uint64_t)1 << 52); + yu = (y & (((uint64_t)1 << 52) - 1)) | ((uint64_t)1 << 52); + + /* + * Perform bit-by-bit division of xu by yu. We run it for 55 bits. + */ + q = 0; + for (i = 0; i < 55; i ++) { + /* + * If yu is less than or equal xu, then subtract it and + * push a 1 in the quotient; otherwise, leave xu unchanged + * and push a 0. + */ + uint64_t b; + + b = ((xu - yu) >> 63) - 1; + xu -= b & yu; + q |= b & 1; + xu <<= 1; + q <<= 1; + } + + /* + * We got 55 bits in the quotient, followed by an extra zero. We + * want that 56th bit to be "sticky": it should be a 1 if and + * only if the remainder (xu) is non-zero. + */ + q |= (xu | -xu) >> 63; + + /* + * Quotient is at most 2^56-1. Its top bit may be zero, but in + * that case the next-to-top bit will be a one, since the + * initial xu and yu were both in the 2^52..2^53-1 range. + * We perform a conditional shift to normalize q to the + * 2^54..2^55-1 range (with the bottom bit being sticky). + */ + q2 = (q >> 1) | (q & 1); + w = q >> 55; + q ^= (q ^ q2) & -w; + + /* + * Extract exponents to compute the scaling factor: + * + * - Each exponent is biased and we scaled them up by + * 52 bits; but these biases will cancel out. + * + * - The division loop produced a 55-bit shifted result, + * so we must scale it down by 55 bits. + * + * - If w = 1, we right-shifted the integer by 1 bit, + * hence we must add 1 to the scaling. + */ + ex = (int)((x >> 52) & 0x7FF); + ey = (int)((y >> 52) & 0x7FF); + e = ex - ey - 55 + (int)w; + + /* + * Sign is the XOR of the signs of the operands. + */ + s = (int)((x ^ y) >> 63); + + /* + * Corrective actions for zeros: if x = 0, then the computation + * is wrong, and we must clamp e and q to 0. We do not care + * about the case y = 0 (as per assumptions in this module, + * the caller does not perform divisions by zero). + */ + d = (ex + 0x7FF) >> 11; + s &= d; + e &= -d; + q &= -(uint64_t)d; + + /* + * FPR() packs the result and applies proper rounding. + */ + return FPR(s, e, q); +} + + + +fpr +fpr_sqrt(fpr x) { + uint64_t xu, q, s, r; + int ex, e; + + /* + * Extract the mantissa and the exponent. We don't care about + * the sign: by assumption, the operand is nonnegative. + * We want the "true" exponent corresponding to a mantissa + * in the 1..2 range. + */ + xu = (x & (((uint64_t)1 << 52) - 1)) | ((uint64_t)1 << 52); + ex = (int)((x >> 52) & 0x7FF); + e = ex - 1023; + + /* + * If the exponent is odd, double the mantissa and decrement + * the exponent. The exponent is then halved to account for + * the square root. + */ + xu += xu & -(uint64_t)(e & 1); + e >>= 1; + + /* + * Double the mantissa. + */ + xu <<= 1; + + /* + * We now have a mantissa in the 2^53..2^55-1 range. It + * represents a value between 1 (inclusive) and 4 (exclusive) + * in fixed point notation (with 53 fractional bits). We + * compute the square root bit by bit. + */ + q = 0; + s = 0; + r = (uint64_t)1 << 53; + for (int i = 0; i < 54; i ++) { + uint64_t t, b; + + t = s + r; + b = ((xu - t) >> 63) - 1; + s += (r << 1) & b; + xu -= t & b; + q += r & b; + xu <<= 1; + r >>= 1; + } + + /* + * Now, q is a rounded-low 54-bit value, with a leading 1, + * 52 fractional digits, and an additional guard bit. We add + * an extra sticky bit to account for what remains of the operand. + */ + q <<= 1; + q |= (xu | -xu) >> 63; + + /* + * Result q is in the 2^54..2^55-1 range; we bias the exponent + * by 54 bits (the value e at that point contains the "true" + * exponent, but q is now considered an integer, i.e. scaled + * up. + */ + e -= 54; + + /* + * Corrective action for an operand of value zero. + */ + q &= -(uint64_t)((ex + 0x7FF) >> 11); + + /* + * Apply rounding and back result. + */ + return FPR(0, e, q); +} + + +uint64_t +fpr_expm_p63(fpr x, fpr ccs) { + /* + * Polynomial approximation of exp(-x) is taken from FACCT: + * https://eprint.iacr.org/2018/1234 + * Specifically, values are extracted from the implementation + * referenced from the FACCT article, and available at: + * https://github.com/raykzhao/gaussian + * Here, the coefficients have been scaled up by 2^63 and + * converted to integers. + * + * Tests over more than 24 billions of random inputs in the + * 0..log(2) range have never shown a deviation larger than + * 2^(-50) from the true mathematical value. + */ + static const uint64_t C[] = { + 0x00000004741183A3u, + 0x00000036548CFC06u, + 0x0000024FDCBF140Au, + 0x0000171D939DE045u, + 0x0000D00CF58F6F84u, + 0x000680681CF796E3u, + 0x002D82D8305B0FEAu, + 0x011111110E066FD0u, + 0x0555555555070F00u, + 0x155555555581FF00u, + 0x400000000002B400u, + 0x7FFFFFFFFFFF4800u, + 0x8000000000000000u + }; + + uint64_t z, y; + size_t u; + uint32_t z0, z1, y0, y1; + uint64_t a, b; + + y = C[0]; + z = (uint64_t)fpr_trunc(fpr_mul(x, fpr_ptwo63)) << 1; + for (u = 1; u < (sizeof C) / sizeof(C[0]); u ++) { + /* + * Compute product z * y over 128 bits, but keep only + * the top 64 bits. + * + * TODO: On some architectures/compilers we could use + * some intrinsics (__umulh() on MSVC) or other compiler + * extensions (unsigned __int128 on GCC / Clang) for + * improved speed; however, most 64-bit architectures + * also have appropriate IEEE754 floating-point support, + * which is better. + */ + uint64_t c; + + z0 = (uint32_t)z; + z1 = (uint32_t)(z >> 32); + y0 = (uint32_t)y; + y1 = (uint32_t)(y >> 32); + a = ((uint64_t)z0 * (uint64_t)y1) + + (((uint64_t)z0 * (uint64_t)y0) >> 32); + b = ((uint64_t)z1 * (uint64_t)y0); + c = (a >> 32) + (b >> 32); + c += (((uint64_t)(uint32_t)a + (uint64_t)(uint32_t)b) >> 32); + c += (uint64_t)z1 * (uint64_t)y1; + y = C[u] - c; + } + + /* + * The scaling factor must be applied at the end. Since y is now + * in fixed-point notation, we have to convert the factor to the + * same format, and do an extra integer multiplication. + */ + z = (uint64_t)fpr_trunc(fpr_mul(ccs, fpr_ptwo63)) << 1; + z0 = (uint32_t)z; + z1 = (uint32_t)(z >> 32); + y0 = (uint32_t)y; + y1 = (uint32_t)(y >> 32); + a = ((uint64_t)z0 * (uint64_t)y1) + + (((uint64_t)z0 * (uint64_t)y0) >> 32); + b = ((uint64_t)z1 * (uint64_t)y0); + y = (a >> 32) + (b >> 32); + y += (((uint64_t)(uint32_t)a + (uint64_t)(uint32_t)b) >> 32); + y += (uint64_t)z1 * (uint64_t)y1; + + return y; +} + +const fpr fpr_gm_tab[] = { + 0, 0, + 9223372036854775808U, 4607182418800017408U, + 4604544271217802189U, 4604544271217802189U, + 13827916308072577997U, 4604544271217802189U, + 4606496786581982534U, 4600565431771507043U, + 13823937468626282851U, 4606496786581982534U, + 4600565431771507043U, 4606496786581982534U, + 13829868823436758342U, 4600565431771507043U, + 4607009347991985328U, 4596196889902818827U, + 13819568926757594635U, 4607009347991985328U, + 4603179351334086856U, 4605664432017547683U, + 13829036468872323491U, 4603179351334086856U, + 4605664432017547683U, 4603179351334086856U, + 13826551388188862664U, 4605664432017547683U, + 4596196889902818827U, 4607009347991985328U, + 13830381384846761136U, 4596196889902818827U, + 4607139046673687846U, 4591727299969791020U, + 13815099336824566828U, 4607139046673687846U, + 4603889326261607894U, 4605137878724712257U, + 13828509915579488065U, 4603889326261607894U, + 4606118860100255153U, 4602163548591158843U, + 13825535585445934651U, 4606118860100255153U, + 4598900923775164166U, 4606794571824115162U, + 13830166608678890970U, 4598900923775164166U, + 4606794571824115162U, 4598900923775164166U, + 13822272960629939974U, 4606794571824115162U, + 4602163548591158843U, 4606118860100255153U, + 13829490896955030961U, 4602163548591158843U, + 4605137878724712257U, 4603889326261607894U, + 13827261363116383702U, 4605137878724712257U, + 4591727299969791020U, 4607139046673687846U, + 13830511083528463654U, 4591727299969791020U, + 4607171569234046334U, 4587232218149935124U, + 13810604255004710932U, 4607171569234046334U, + 4604224084862889120U, 4604849113969373103U, + 13828221150824148911U, 4604224084862889120U, + 4606317631232591731U, 4601373767755717824U, + 13824745804610493632U, 4606317631232591731U, + 4599740487990714333U, 4606655894547498725U, + 13830027931402274533U, 4599740487990714333U, + 4606912484326125783U, 4597922303871901467U, + 13821294340726677275U, 4606912484326125783U, + 4602805845399633902U, 4605900952042040894U, + 13829272988896816702U, 4602805845399633902U, + 4605409869824231233U, 4603540801876750389U, + 13826912838731526197U, 4605409869824231233U, + 4594454542771183930U, 4607084929468638487U, + 13830456966323414295U, 4594454542771183930U, + 4607084929468638487U, 4594454542771183930U, + 13817826579625959738U, 4607084929468638487U, + 4603540801876750389U, 4605409869824231233U, + 13828781906679007041U, 4603540801876750389U, + 4605900952042040894U, 4602805845399633902U, + 13826177882254409710U, 4605900952042040894U, + 4597922303871901467U, 4606912484326125783U, + 13830284521180901591U, 4597922303871901467U, + 4606655894547498725U, 4599740487990714333U, + 13823112524845490141U, 4606655894547498725U, + 4601373767755717824U, 4606317631232591731U, + 13829689668087367539U, 4601373767755717824U, + 4604849113969373103U, 4604224084862889120U, + 13827596121717664928U, 4604849113969373103U, + 4587232218149935124U, 4607171569234046334U, + 13830543606088822142U, 4587232218149935124U, + 4607179706000002317U, 4582730748936808062U, + 13806102785791583870U, 4607179706000002317U, + 4604386048625945823U, 4604698657331085206U, + 13828070694185861014U, 4604386048625945823U, + 4606409688975526202U, 4600971798440897930U, + 13824343835295673738U, 4606409688975526202U, + 4600154912527631775U, 4606578871587619388U, + 13829950908442395196U, 4600154912527631775U, + 4606963563043808649U, 4597061974398750563U, + 13820434011253526371U, 4606963563043808649U, + 4602994049708411683U, 4605784983948558848U, + 13829157020803334656U, 4602994049708411683U, + 4605539368864982914U, 4603361638657888991U, + 13826733675512664799U, 4605539368864982914U, + 4595327571478659014U, 4607049811591515049U, + 13830421848446290857U, 4595327571478659014U, + 4607114680469659603U, 4593485039402578702U, + 13816857076257354510U, 4607114680469659603U, + 4603716733069447353U, 4605276012900672507U, + 13828648049755448315U, 4603716733069447353U, + 4606012266443150634U, 4602550884377336506U, + 13825922921232112314U, 4606012266443150634U, + 4598476289818621559U, 4606856142606846307U, + 13830228179461622115U, 4598476289818621559U, + 4606727809065869586U, 4599322407794599425U, + 13822694444649375233U, 4606727809065869586U, + 4601771097584682078U, 4606220668805321205U, + 13829592705660097013U, 4601771097584682078U, + 4604995550503212910U, 4604058477489546729U, + 13827430514344322537U, 4604995550503212910U, + 4589965306122607094U, 4607158013403433018U, + 13830530050258208826U, 4589965306122607094U, + 4607158013403433018U, 4589965306122607094U, + 13813337342977382902U, 4607158013403433018U, + 4604058477489546729U, 4604995550503212910U, + 13828367587357988718U, 4604058477489546729U, + 4606220668805321205U, 4601771097584682078U, + 13825143134439457886U, 4606220668805321205U, + 4599322407794599425U, 4606727809065869586U, + 13830099845920645394U, 4599322407794599425U, + 4606856142606846307U, 4598476289818621559U, + 13821848326673397367U, 4606856142606846307U, + 4602550884377336506U, 4606012266443150634U, + 13829384303297926442U, 4602550884377336506U, + 4605276012900672507U, 4603716733069447353U, + 13827088769924223161U, 4605276012900672507U, + 4593485039402578702U, 4607114680469659603U, + 13830486717324435411U, 4593485039402578702U, + 4607049811591515049U, 4595327571478659014U, + 13818699608333434822U, 4607049811591515049U, + 4603361638657888991U, 4605539368864982914U, + 13828911405719758722U, 4603361638657888991U, + 4605784983948558848U, 4602994049708411683U, + 13826366086563187491U, 4605784983948558848U, + 4597061974398750563U, 4606963563043808649U, + 13830335599898584457U, 4597061974398750563U, + 4606578871587619388U, 4600154912527631775U, + 13823526949382407583U, 4606578871587619388U, + 4600971798440897930U, 4606409688975526202U, + 13829781725830302010U, 4600971798440897930U, + 4604698657331085206U, 4604386048625945823U, + 13827758085480721631U, 4604698657331085206U, + 4582730748936808062U, 4607179706000002317U, + 13830551742854778125U, 4582730748936808062U, + 4607181740574479067U, 4578227681973159812U, + 13801599718827935620U, 4607181740574479067U, + 4604465633578481725U, 4604621949701367983U, + 13827993986556143791U, 4604465633578481725U, + 4606453861145241227U, 4600769149537129431U, + 13824141186391905239U, 4606453861145241227U, + 4600360675823176935U, 4606538458821337243U, + 13829910495676113051U, 4600360675823176935U, + 4606987119037722413U, 4596629994023683153U, + 13820002030878458961U, 4606987119037722413U, + 4603087070374583113U, 4605725276488455441U, + 13829097313343231249U, 4603087070374583113U, + 4605602459698789090U, 4603270878689749849U, + 13826642915544525657U, 4605602459698789090U, + 4595762727260045105U, 4607030246558998647U, + 13830402283413774455U, 4595762727260045105U, + 4607127537664763515U, 4592606767730311893U, + 13815978804585087701U, 4607127537664763515U, + 4603803453461190356U, 4605207475328619533U, + 13828579512183395341U, 4603803453461190356U, + 4606066157444814153U, 4602357870542944470U, + 13825729907397720278U, 4606066157444814153U, + 4598688984595225406U, 4606826008603986804U, + 13830198045458762612U, 4598688984595225406U, + 4606761837001494797U, 4599112075441176914U, + 13822484112295952722U, 4606761837001494797U, + 4601967947786150793U, 4606170366472647579U, + 13829542403327423387U, 4601967947786150793U, + 4605067233569943231U, 4603974338538572089U, + 13827346375393347897U, 4605067233569943231U, + 4590846768565625881U, 4607149205763218185U, + 13830521242617993993U, 4590846768565625881U, + 4607165468267934125U, 4588998070480937184U, + 13812370107335712992U, 4607165468267934125U, + 4604141730443515286U, 4604922840319727473U, + 13828294877174503281U, 4604141730443515286U, + 4606269759522929756U, 4601573027631668967U, + 13824945064486444775U, 4606269759522929756U, + 4599531889160152938U, 4606692493141721470U, + 13830064529996497278U, 4599531889160152938U, + 4606884969294623682U, 4598262871476403630U, + 13821634908331179438U, 4606884969294623682U, + 4602710690099904183U, 4605957195211051218U, + 13829329232065827026U, 4602710690099904183U, + 4605343481119364930U, 4603629178146150899U, + 13827001215000926707U, 4605343481119364930U, + 4594016801320007031U, 4607100477024622401U, + 13830472513879398209U, 4594016801320007031U, + 4607068040143112603U, 4594891488091520602U, + 13818263524946296410U, 4607068040143112603U, + 4603451617570386922U, 4605475169017376660U, + 13828847205872152468U, 4603451617570386922U, + 4605843545406134034U, 4602900303344142735U, + 13826272340198918543U, 4605843545406134034U, + 4597492765973365521U, 4606938683557690074U, + 13830310720412465882U, 4597492765973365521U, + 4606618018794815019U, 4599948172872067014U, + 13823320209726842822U, 4606618018794815019U, + 4601173347964633034U, 4606364276725003740U, + 13829736313579779548U, 4601173347964633034U, + 4604774382555066977U, 4604305528345395596U, + 13827677565200171404U, 4604774382555066977U, + 4585465300892538317U, 4607176315382986589U, + 13830548352237762397U, 4585465300892538317U, + 4607176315382986589U, 4585465300892538317U, + 13808837337747314125U, 4607176315382986589U, + 4604305528345395596U, 4604774382555066977U, + 13828146419409842785U, 4604305528345395596U, + 4606364276725003740U, 4601173347964633034U, + 13824545384819408842U, 4606364276725003740U, + 4599948172872067014U, 4606618018794815019U, + 13829990055649590827U, 4599948172872067014U, + 4606938683557690074U, 4597492765973365521U, + 13820864802828141329U, 4606938683557690074U, + 4602900303344142735U, 4605843545406134034U, + 13829215582260909842U, 4602900303344142735U, + 4605475169017376660U, 4603451617570386922U, + 13826823654425162730U, 4605475169017376660U, + 4594891488091520602U, 4607068040143112603U, + 13830440076997888411U, 4594891488091520602U, + 4607100477024622401U, 4594016801320007031U, + 13817388838174782839U, 4607100477024622401U, + 4603629178146150899U, 4605343481119364930U, + 13828715517974140738U, 4603629178146150899U, + 4605957195211051218U, 4602710690099904183U, + 13826082726954679991U, 4605957195211051218U, + 4598262871476403630U, 4606884969294623682U, + 13830257006149399490U, 4598262871476403630U, + 4606692493141721470U, 4599531889160152938U, + 13822903926014928746U, 4606692493141721470U, + 4601573027631668967U, 4606269759522929756U, + 13829641796377705564U, 4601573027631668967U, + 4604922840319727473U, 4604141730443515286U, + 13827513767298291094U, 4604922840319727473U, + 4588998070480937184U, 4607165468267934125U, + 13830537505122709933U, 4588998070480937184U, + 4607149205763218185U, 4590846768565625881U, + 13814218805420401689U, 4607149205763218185U, + 4603974338538572089U, 4605067233569943231U, + 13828439270424719039U, 4603974338538572089U, + 4606170366472647579U, 4601967947786150793U, + 13825339984640926601U, 4606170366472647579U, + 4599112075441176914U, 4606761837001494797U, + 13830133873856270605U, 4599112075441176914U, + 4606826008603986804U, 4598688984595225406U, + 13822061021450001214U, 4606826008603986804U, + 4602357870542944470U, 4606066157444814153U, + 13829438194299589961U, 4602357870542944470U, + 4605207475328619533U, 4603803453461190356U, + 13827175490315966164U, 4605207475328619533U, + 4592606767730311893U, 4607127537664763515U, + 13830499574519539323U, 4592606767730311893U, + 4607030246558998647U, 4595762727260045105U, + 13819134764114820913U, 4607030246558998647U, + 4603270878689749849U, 4605602459698789090U, + 13828974496553564898U, 4603270878689749849U, + 4605725276488455441U, 4603087070374583113U, + 13826459107229358921U, 4605725276488455441U, + 4596629994023683153U, 4606987119037722413U, + 13830359155892498221U, 4596629994023683153U, + 4606538458821337243U, 4600360675823176935U, + 13823732712677952743U, 4606538458821337243U, + 4600769149537129431U, 4606453861145241227U, + 13829825898000017035U, 4600769149537129431U, + 4604621949701367983U, 4604465633578481725U, + 13827837670433257533U, 4604621949701367983U, + 4578227681973159812U, 4607181740574479067U, + 13830553777429254875U, 4578227681973159812U, + 4607182249242036882U, 4573724215515480177U, + 13797096252370255985U, 4607182249242036882U, + 4604505071555817232U, 4604583231088591477U, + 13827955267943367285U, 4604505071555817232U, + 4606475480113671417U, 4600667422348321968U, + 13824039459203097776U, 4606475480113671417U, + 4600463181646572228U, 4606517779747998088U, + 13829889816602773896U, 4600463181646572228U, + 4606998399608725124U, 4596413578358834022U, + 13819785615213609830U, 4606998399608725124U, + 4603133304188877240U, 4605694995810664660U, + 13829067032665440468U, 4603133304188877240U, + 4605633586259814045U, 4603225210076562971U, + 13826597246931338779U, 4605633586259814045U, + 4595979936813835462U, 4607019963775302583U, + 13830392000630078391U, 4595979936813835462U, + 4607133460805585796U, 4592167175087283203U, + 13815539211942059011U, 4607133460805585796U, + 4603846496621587377U, 4605172808754305228U, + 13828544845609081036U, 4603846496621587377U, + 4606092657816072624U, 4602260871257280788U, + 13825632908112056596U, 4606092657816072624U, + 4598795050632330097U, 4606810452769876110U, + 13830182489624651918U, 4598795050632330097U, + 4606778366364612594U, 4599006600037663623U, + 13822378636892439431U, 4606778366364612594U, + 4602065906208722008U, 4606144763310860551U, + 13829516800165636359U, 4602065906208722008U, + 4605102686554936490U, 4603931940768740167U, + 13827303977623515975U, 4605102686554936490U, + 4591287158938884897U, 4607144295058764886U, + 13830516331913540694U, 4591287158938884897U, + 4607168688050493276U, 4588115294056142819U, + 13811487330910918627U, 4607168688050493276U, + 4604183020748362039U, 4604886103475043762U, + 13828258140329819570U, 4604183020748362039U, + 4606293848208650998U, 4601473544562720001U, + 13824845581417495809U, 4606293848208650998U, + 4599636300858866724U, 4606674353838411301U, + 13830046390693187109U, 4599636300858866724U, + 4606898891031025132U, 4598136582470364665U, + 13821508619325140473U, 4606898891031025132U, + 4602758354025980442U, 4605929219593405673U, + 13829301256448181481U, 4602758354025980442U, + 4605376811039722786U, 4603585091850767959U, + 13826957128705543767U, 4605376811039722786U, + 4594235767444503503U, 4607092871118901179U, + 13830464907973676987U, 4594235767444503503U, + 4607076652372832968U, 4594673119063280916U, + 13818045155918056724U, 4607076652372832968U, + 4603496309891590679U, 4605442656228245717U, + 13828814693083021525U, 4603496309891590679U, + 4605872393621214213U, 4602853162432841185U, + 13826225199287616993U, 4605872393621214213U, + 4597707695679609371U, 4606925748668145757U, + 13830297785522921565U, 4597707695679609371U, + 4606637115963965612U, 4599844446633109139U, + 13823216483487884947U, 4606637115963965612U, + 4601273700967202825U, 4606341107699334546U, + 13829713144554110354U, 4601273700967202825U, + 4604811873195349477U, 4604264921241055824U, + 13827636958095831632U, 4604811873195349477U, + 4586348876009622851U, 4607174111710118367U, + 13830546148564894175U, 4586348876009622851U, + 4607178180169683960U, 4584498631466405633U, + 13807870668321181441U, 4607178180169683960U, + 4604345904647073908U, 4604736643460027021U, + 13828108680314802829U, 4604345904647073908U, + 4606387137437298591U, 4601072712526242277U, + 13824444749381018085U, 4606387137437298591U, + 4600051662802353687U, 4606598603759044570U, + 13829970640613820378U, 4600051662802353687U, + 4606951288507767453U, 4597277522845151878U, + 13820649559699927686U, 4606951288507767453U, + 4602947266358709886U, 4605814408482919348U, + 13829186445337695156U, 4602947266358709886U, + 4605507406967535927U, 4603406726595779752U, + 13826778763450555560U, 4605507406967535927U, + 4595109641634432498U, 4607059093103722971U, + 13830431129958498779U, 4595109641634432498U, + 4607107746899444102U, 4593797652641645341U, + 13817169689496421149U, 4607107746899444102U, + 4603673059103075106U, 4605309881318010327U, + 13828681918172786135U, 4603673059103075106U, + 4605984877841711338U, 4602646891659203088U, + 13826018928513978896U, 4605984877841711338U, + 4598369669086960528U, 4606870719641066940U, + 13830242756495842748U, 4598369669086960528U, + 4606710311774494716U, 4599427256825614420U, + 13822799293680390228U, 4606710311774494716U, + 4601672213217083403U, 4606245366082353408U, + 13829617402937129216U, 4601672213217083403U, + 4604959323120302796U, 4604100215502905499U, + 13827472252357681307U, 4604959323120302796U, + 4589524267239410099U, 4607161910007591876U, + 13830533946862367684U, 4589524267239410099U, + 4607153778602162496U, 4590406145430462614U, + 13813778182285238422U, 4607153778602162496U, + 4604016517974851588U, 4605031521104517324U, + 13828403557959293132U, 4604016517974851588U, + 4606195668621671667U, 4601869677011524443U, + 13825241713866300251U, 4606195668621671667U, + 4599217346014614711U, 4606744984357082948U, + 13830117021211858756U, 4599217346014614711U, + 4606841238740778884U, 4598582729657176439U, + 13821954766511952247U, 4606841238740778884U, + 4602454542796181607U, 4606039359984203741U, + 13829411396838979549U, 4602454542796181607U, + 4605241877142478242U, 4603760198400967492U, + 13827132235255743300U, 4605241877142478242U, + 4593046061348462537U, 4607121277474223905U, + 13830493314328999713U, 4593046061348462537U, + 4607040195955932526U, 4595545269419264690U, + 13818917306274040498U, 4607040195955932526U, + 4603316355454250015U, 4605571053506370248U, + 13828943090361146056U, 4603316355454250015U, + 4605755272910869620U, 4603040651631881451U, + 13826412688486657259U, 4605755272910869620U, + 4596846128749438754U, 4606975506703684317U, + 13830347543558460125U, 4596846128749438754U, + 4606558823023444576U, 4600257918160607478U, + 13823629955015383286U, 4606558823023444576U, + 4600870609507958271U, 4606431930490633905U, + 13829803967345409713U, 4600870609507958271U, + 4604660425598397818U, 4604425958770613225U, + 13827797995625389033U, 4604660425598397818U, + 4580962600092897021U, 4607180892816495009U, + 13830552929671270817U, 4580962600092897021U, + 4607180892816495009U, 4580962600092897021U, + 13804334636947672829U, 4607180892816495009U, + 4604425958770613225U, 4604660425598397818U, + 13828032462453173626U, 4604425958770613225U, + 4606431930490633905U, 4600870609507958271U, + 13824242646362734079U, 4606431930490633905U, + 4600257918160607478U, 4606558823023444576U, + 13829930859878220384U, 4600257918160607478U, + 4606975506703684317U, 4596846128749438754U, + 13820218165604214562U, 4606975506703684317U, + 4603040651631881451U, 4605755272910869620U, + 13829127309765645428U, 4603040651631881451U, + 4605571053506370248U, 4603316355454250015U, + 13826688392309025823U, 4605571053506370248U, + 4595545269419264690U, 4607040195955932526U, + 13830412232810708334U, 4595545269419264690U, + 4607121277474223905U, 4593046061348462537U, + 13816418098203238345U, 4607121277474223905U, + 4603760198400967492U, 4605241877142478242U, + 13828613913997254050U, 4603760198400967492U, + 4606039359984203741U, 4602454542796181607U, + 13825826579650957415U, 4606039359984203741U, + 4598582729657176439U, 4606841238740778884U, + 13830213275595554692U, 4598582729657176439U, + 4606744984357082948U, 4599217346014614711U, + 13822589382869390519U, 4606744984357082948U, + 4601869677011524443U, 4606195668621671667U, + 13829567705476447475U, 4601869677011524443U, + 4605031521104517324U, 4604016517974851588U, + 13827388554829627396U, 4605031521104517324U, + 4590406145430462614U, 4607153778602162496U, + 13830525815456938304U, 4590406145430462614U, + 4607161910007591876U, 4589524267239410099U, + 13812896304094185907U, 4607161910007591876U, + 4604100215502905499U, 4604959323120302796U, + 13828331359975078604U, 4604100215502905499U, + 4606245366082353408U, 4601672213217083403U, + 13825044250071859211U, 4606245366082353408U, + 4599427256825614420U, 4606710311774494716U, + 13830082348629270524U, 4599427256825614420U, + 4606870719641066940U, 4598369669086960528U, + 13821741705941736336U, 4606870719641066940U, + 4602646891659203088U, 4605984877841711338U, + 13829356914696487146U, 4602646891659203088U, + 4605309881318010327U, 4603673059103075106U, + 13827045095957850914U, 4605309881318010327U, + 4593797652641645341U, 4607107746899444102U, + 13830479783754219910U, 4593797652641645341U, + 4607059093103722971U, 4595109641634432498U, + 13818481678489208306U, 4607059093103722971U, + 4603406726595779752U, 4605507406967535927U, + 13828879443822311735U, 4603406726595779752U, + 4605814408482919348U, 4602947266358709886U, + 13826319303213485694U, 4605814408482919348U, + 4597277522845151878U, 4606951288507767453U, + 13830323325362543261U, 4597277522845151878U, + 4606598603759044570U, 4600051662802353687U, + 13823423699657129495U, 4606598603759044570U, + 4601072712526242277U, 4606387137437298591U, + 13829759174292074399U, 4601072712526242277U, + 4604736643460027021U, 4604345904647073908U, + 13827717941501849716U, 4604736643460027021U, + 4584498631466405633U, 4607178180169683960U, + 13830550217024459768U, 4584498631466405633U, + 4607174111710118367U, 4586348876009622851U, + 13809720912864398659U, 4607174111710118367U, + 4604264921241055824U, 4604811873195349477U, + 13828183910050125285U, 4604264921241055824U, + 4606341107699334546U, 4601273700967202825U, + 13824645737821978633U, 4606341107699334546U, + 4599844446633109139U, 4606637115963965612U, + 13830009152818741420U, 4599844446633109139U, + 4606925748668145757U, 4597707695679609371U, + 13821079732534385179U, 4606925748668145757U, + 4602853162432841185U, 4605872393621214213U, + 13829244430475990021U, 4602853162432841185U, + 4605442656228245717U, 4603496309891590679U, + 13826868346746366487U, 4605442656228245717U, + 4594673119063280916U, 4607076652372832968U, + 13830448689227608776U, 4594673119063280916U, + 4607092871118901179U, 4594235767444503503U, + 13817607804299279311U, 4607092871118901179U, + 4603585091850767959U, 4605376811039722786U, + 13828748847894498594U, 4603585091850767959U, + 4605929219593405673U, 4602758354025980442U, + 13826130390880756250U, 4605929219593405673U, + 4598136582470364665U, 4606898891031025132U, + 13830270927885800940U, 4598136582470364665U, + 4606674353838411301U, 4599636300858866724U, + 13823008337713642532U, 4606674353838411301U, + 4601473544562720001U, 4606293848208650998U, + 13829665885063426806U, 4601473544562720001U, + 4604886103475043762U, 4604183020748362039U, + 13827555057603137847U, 4604886103475043762U, + 4588115294056142819U, 4607168688050493276U, + 13830540724905269084U, 4588115294056142819U, + 4607144295058764886U, 4591287158938884897U, + 13814659195793660705U, 4607144295058764886U, + 4603931940768740167U, 4605102686554936490U, + 13828474723409712298U, 4603931940768740167U, + 4606144763310860551U, 4602065906208722008U, + 13825437943063497816U, 4606144763310860551U, + 4599006600037663623U, 4606778366364612594U, + 13830150403219388402U, 4599006600037663623U, + 4606810452769876110U, 4598795050632330097U, + 13822167087487105905U, 4606810452769876110U, + 4602260871257280788U, 4606092657816072624U, + 13829464694670848432U, 4602260871257280788U, + 4605172808754305228U, 4603846496621587377U, + 13827218533476363185U, 4605172808754305228U, + 4592167175087283203U, 4607133460805585796U, + 13830505497660361604U, 4592167175087283203U, + 4607019963775302583U, 4595979936813835462U, + 13819351973668611270U, 4607019963775302583U, + 4603225210076562971U, 4605633586259814045U, + 13829005623114589853U, 4603225210076562971U, + 4605694995810664660U, 4603133304188877240U, + 13826505341043653048U, 4605694995810664660U, + 4596413578358834022U, 4606998399608725124U, + 13830370436463500932U, 4596413578358834022U, + 4606517779747998088U, 4600463181646572228U, + 13823835218501348036U, 4606517779747998088U, + 4600667422348321968U, 4606475480113671417U, + 13829847516968447225U, 4600667422348321968U, + 4604583231088591477U, 4604505071555817232U, + 13827877108410593040U, 4604583231088591477U, + 4573724215515480177U, 4607182249242036882U, + 13830554286096812690U, 4573724215515480177U, + 4607182376410422530U, 4569220649180767418U, + 13792592686035543226U, 4607182376410422530U, + 4604524701268679793U, 4604563781218984604U, + 13827935818073760412U, 4604524701268679793U, + 4606486172460753999U, 4600616459743653188U, + 13823988496598428996U, 4606486172460753999U, + 4600514338912178239U, 4606507322377452870U, + 13829879359232228678U, 4600514338912178239U, + 4607003915349878877U, 4596305267720071930U, + 13819677304574847738U, 4607003915349878877U, + 4603156351203636159U, 4605679749231851918U, + 13829051786086627726U, 4603156351203636159U, + 4605649044311923410U, 4603202304363743346U, + 13826574341218519154U, 4605649044311923410U, + 4596088445927168004U, 4607014697483910382U, + 13830386734338686190U, 4596088445927168004U, + 4607136295912168606U, 4591947271803021404U, + 13815319308657797212U, 4607136295912168606U, + 4603867938232615808U, 4605155376589456981U, + 13828527413444232789U, 4603867938232615808U, + 4606105796280968177U, 4602212250118051877U, + 13825584286972827685U, 4606105796280968177U, + 4598848011564831930U, 4606802552898869248U, + 13830174589753645056U, 4598848011564831930U, + 4606786509620734768U, 4598953786765296928U, + 13822325823620072736U, 4606786509620734768U, + 4602114767134999006U, 4606131849150971908U, + 13829503886005747716U, 4602114767134999006U, + 4605120315324767624U, 4603910660507251362U, + 13827282697362027170U, 4605120315324767624U, + 4591507261658050721U, 4607141713064252300U, + 13830513749919028108U, 4591507261658050721U, + 4607170170974224083U, 4587673791460508439U, + 13811045828315284247U, 4607170170974224083U, + 4604203581176243359U, 4604867640218014515U, + 13828239677072790323U, 4604203581176243359U, + 4606305777984577632U, 4601423692641949331U, + 13824795729496725139U, 4606305777984577632U, + 4599688422741010356U, 4606665164148251002U, + 13830037201003026810U, 4599688422741010356U, + 4606905728766014348U, 4598029484874872834U, + 13821401521729648642U, 4606905728766014348U, + 4602782121393764535U, 4605915122243179241U, + 13829287159097955049U, 4602782121393764535U, + 4605393374401988274U, 4603562972219549215U, + 13826935009074325023U, 4605393374401988274U, + 4594345179472540681U, 4607088942243446236U, + 13830460979098222044U, 4594345179472540681U, + 4607080832832247697U, 4594563856311064231U, + 13817935893165840039U, 4607080832832247697U, + 4603518581031047189U, 4605426297151190466U, + 13828798334005966274U, 4603518581031047189U, + 4605886709123365959U, 4602829525820289164U, + 13826201562675064972U, 4605886709123365959U, + 4597815040470278984U, 4606919157647773535U, + 13830291194502549343U, 4597815040470278984U, + 4606646545123403481U, 4599792496117920694U, + 13823164532972696502U, 4606646545123403481U, + 4601323770373937522U, 4606329407841126011U, + 13829701444695901819U, 4601323770373937522U, + 4604830524903495634U, 4604244531615310815U, + 13827616568470086623U, 4604830524903495634U, + 4586790578280679046U, 4607172882816799076U, + 13830544919671574884U, 4586790578280679046U, + 4607178985458280057U, 4583614727651146525U, + 13806986764505922333U, 4607178985458280057U, + 4604366005771528720U, 4604717681185626434U, + 13828089718040402242U, 4604366005771528720U, + 4606398451906509788U, 4601022290077223616U, + 13824394326931999424U, 4606398451906509788U, + 4600103317933788342U, 4606588777269136769U, + 13829960814123912577U, 4600103317933788342U, + 4606957467106717424U, 4597169786279785693U, + 13820541823134561501U, 4606957467106717424U, + 4602970680601913687U, 4605799732098147061U, + 13829171768952922869U, 4602970680601913687U, + 4605523422498301790U, 4603384207141321914U, + 13826756243996097722U, 4605523422498301790U, + 4595218635031890910U, 4607054494135176056U, + 13830426530989951864U, 4595218635031890910U, + 4607111255739239816U, 4593688012422887515U, + 13817060049277663323U, 4607111255739239816U, + 4603694922063032361U, 4605292980606880364U, + 13828665017461656172U, 4603694922063032361U, + 4605998608960791335U, 4602598930031891166U, + 13825970966886666974U, 4605998608960791335U, + 4598423001813699022U, 4606863472012527185U, + 13830235508867302993U, 4598423001813699022U, + 4606719100629313491U, 4599374859150636784U, + 13822746896005412592U, 4606719100629313491U, + 4601721693286060937U, 4606233055365547081U, + 13829605092220322889U, 4601721693286060937U, + 4604977468824438271U, 4604079374282302598U, + 13827451411137078406U, 4604977468824438271U, + 4589744810590291021U, 4607160003989618959U, + 13830532040844394767U, 4589744810590291021U, + 4607155938267770208U, 4590185751760970393U, + 13813557788615746201U, 4607155938267770208U, + 4604037525321326463U, 4605013567986435066U, + 13828385604841210874U, 4604037525321326463U, + 4606208206518262803U, 4601820425647934753U, + 13825192462502710561U, 4606208206518262803U, + 4599269903251194481U, 4606736437002195879U, + 13830108473856971687U, 4599269903251194481U, + 4606848731493011465U, 4598529532600161144U, + 13821901569454936952U, 4606848731493011465U, + 4602502755147763107U, 4606025850160239809U, + 13829397887015015617U, 4602502755147763107U, + 4605258978359093269U, 4603738491917026584U, + 13827110528771802392U, 4605258978359093269U, + 4593265590854265407U, 4607118021058468598U, + 13830490057913244406U, 4593265590854265407U, + 4607045045516813836U, 4595436449949385485U, + 13818808486804161293U, 4607045045516813836U, + 4603339021357904144U, 4605555245917486022U, + 13828927282772261830U, 4603339021357904144U, + 4605770164172969910U, 4603017373458244943U, + 13826389410313020751U, 4605770164172969910U, + 4596954088216812973U, 4606969576261663845U, + 13830341613116439653U, 4596954088216812973U, + 4606568886807728474U, 4600206446098256018U, + 13823578482953031826U, 4606568886807728474U, + 4600921238092511730U, 4606420848538580260U, + 13829792885393356068U, 4600921238092511730U, + 4604679572075463103U, 4604406033021674239U, + 13827778069876450047U, 4604679572075463103U, + 4581846703643734566U, 4607180341788068727U, + 13830552378642844535U, 4581846703643734566U, + 4607181359080094673U, 4579996072175835083U, + 13803368109030610891U, 4607181359080094673U, + 4604445825685214043U, 4604641218080103285U, + 13828013254934879093U, 4604445825685214043U, + 4606442934727379583U, 4600819913163773071U, + 13824191950018548879U, 4606442934727379583U, + 4600309328230211502U, 4606548680329491866U, + 13829920717184267674U, 4600309328230211502U, + 4606981354314050484U, 4596738097012783531U, + 13820110133867559339U, 4606981354314050484U, + 4603063884010218172U, 4605740310302420207U, + 13829112347157196015U, 4603063884010218172U, + 4605586791482848547U, 4603293641160266722U, + 13826665678015042530U, 4605586791482848547U, + 4595654028864046335U, 4607035262954517034U, + 13830407299809292842U, 4595654028864046335U, + 4607124449686274900U, 4592826452951465409U, + 13816198489806241217U, 4607124449686274900U, + 4603781852316960384U, 4605224709411790590U, + 13828596746266566398U, 4603781852316960384U, + 4606052795787882823U, 4602406247776385022U, + 13825778284631160830U, 4606052795787882823U, + 4598635880488956483U, 4606833664420673202U, + 13830205701275449010U, 4598635880488956483U, + 4606753451050079834U, 4599164736579548843U, + 13822536773434324651U, 4606753451050079834U, + 4601918851211878557U, 4606183055233559255U, + 13829555092088335063U, 4601918851211878557U, + 4605049409688478101U, 4603995455647851249U, + 13827367492502627057U, 4605049409688478101U, + 4590626485056654602U, 4607151534426937478U, + 13830523571281713286U, 4590626485056654602U, + 4607163731439411601U, 4589303678145802340U, + 13812675715000578148U, 4607163731439411601U, + 4604121000955189926U, 4604941113561600762U, + 13828313150416376570U, 4604121000955189926U, + 4606257600839867033U, 4601622657843474729U, + 13824994694698250537U, 4606257600839867033U, + 4599479600326345459U, 4606701442584137310U, + 13830073479438913118U, 4599479600326345459U, + 4606877885424248132U, 4598316292140394014U, + 13821688328995169822U, 4606877885424248132U, + 4602686793990243041U, 4605971073215153165U, + 13829343110069928973U, 4602686793990243041U, + 4605326714874986465U, 4603651144395358093U, + 13827023181250133901U, 4605326714874986465U, + 4593907249284540294U, 4607104153983298999U, + 13830476190838074807U, 4593907249284540294U, + 4607063608453868552U, 4595000592312171144U, + 13818372629166946952U, 4607063608453868552U, + 4603429196809300824U, 4605491322423429598U, + 13828863359278205406U, 4603429196809300824U, + 4605829012964735987U, 4602923807199184054U, + 13826295844053959862U, 4605829012964735987U, + 4597385183080791534U, 4606945027305114062U, + 13830317064159889870U, 4597385183080791534U, + 4606608350964852124U, 4599999947619525579U, + 13823371984474301387U, 4606608350964852124U, + 4601123065313358619U, 4606375745674388705U, + 13829747782529164513U, 4601123065313358619U, + 4604755543975806820U, 4604325745441780828U, + 13827697782296556636U, 4604755543975806820U, + 4585023436363055487U, 4607177290141793710U, + 13830549326996569518U, 4585023436363055487U, + 4607175255902437396U, 4585907115494236537U, + 13809279152349012345U, 4607175255902437396U, + 4604285253548209224U, 4604793159020491611U, + 13828165195875267419U, 4604285253548209224U, + 4606352730697093817U, 4601223560006786057U, + 13824595596861561865U, 4606352730697093817U, + 4599896339047301634U, 4606627607157935956U, + 13829999644012711764U, 4599896339047301634U, + 4606932257325205256U, 4597600270510262682U, + 13820972307365038490U, 4606932257325205256U, + 4602876755014813164U, 4605858005670328613U, + 13829230042525104421U, 4602876755014813164U, + 4605458946901419122U, 4603473988668005304U, + 13826846025522781112U, 4605458946901419122U, + 4594782329999411347U, 4607072388129742377U, + 13830444424984518185U, 4594782329999411347U, + 4607096716058023245U, 4594126307716900071U, + 13817498344571675879U, 4607096716058023245U, + 4603607160562208225U, 4605360179893335444U, + 13828732216748111252U, 4603607160562208225U, + 4605943243960030558U, 4602734543519989142U, + 13826106580374764950U, 4605943243960030558U, + 4598209407597805010U, 4606891971185517504U, + 13830264008040293312U, 4598209407597805010U, + 4606683463531482757U, 4599584122834874440U, + 13822956159689650248U, 4606683463531482757U, + 4601523323048804569U, 4606281842017099424U, + 13829653878871875232U, 4601523323048804569U, + 4604904503566677638U, 4604162403772767740U, + 13827534440627543548U, 4604904503566677638U, + 4588556721781247689U, 4607167120476811757U, + 13830539157331587565U, 4588556721781247689U, + 4607146792632922887U, 4591066993883984169U, + 13814439030738759977U, 4607146792632922887U, + 4603953166845776383U, 4605084992581147553U, + 13828457029435923361U, 4603953166845776383U, + 4606157602458368090U, 4602016966272225497U, + 13825389003127001305U, 4606157602458368090U, + 4599059363095165615U, 4606770142132396069U, + 13830142178987171877U, 4599059363095165615U, + 4606818271362779153U, 4598742041476147134U, + 13822114078330922942U, 4606818271362779153U, + 4602309411551204896U, 4606079444829232727U, + 13829451481684008535U, 4602309411551204896U, + 4605190175055178825U, 4603825001630339212U, + 13827197038485115020U, 4605190175055178825U, + 4592387007752762956U, 4607130541380624519U, + 13830502578235400327U, 4592387007752762956U, + 4607025146816593591U, 4595871363584150300U, + 13819243400438926108U, 4607025146816593591U, + 4603248068256948438U, 4605618058006716661U, + 13828990094861492469U, 4603248068256948438U, + 4605710171610479304U, 4603110210506737381U, + 13826482247361513189U, 4605710171610479304U, + 4596521820799644122U, 4606992800820440327U, + 13830364837675216135U, 4596521820799644122U, + 4606528158595189433U, 4600411960456200676U, + 13823783997310976484U, 4606528158595189433U, + 4600718319105833937U, 4606464709641375231U, + 13829836746496151039U, 4600718319105833937U, + 4604602620643553229U, 4604485382263976838U, + 13827857419118752646U, 4604602620643553229U, + 4576459225186735875U, 4607182037296057423U, + 13830554074150833231U, 4576459225186735875U, + 4607182037296057423U, 4576459225186735875U, + 13799831262041511683U, 4607182037296057423U, + 4604485382263976838U, 4604602620643553229U, + 13827974657498329037U, 4604485382263976838U, + 4606464709641375231U, 4600718319105833937U, + 13824090355960609745U, 4606464709641375231U, + 4600411960456200676U, 4606528158595189433U, + 13829900195449965241U, 4600411960456200676U, + 4606992800820440327U, 4596521820799644122U, + 13819893857654419930U, 4606992800820440327U, + 4603110210506737381U, 4605710171610479304U, + 13829082208465255112U, 4603110210506737381U, + 4605618058006716661U, 4603248068256948438U, + 13826620105111724246U, 4605618058006716661U, + 4595871363584150300U, 4607025146816593591U, + 13830397183671369399U, 4595871363584150300U, + 4607130541380624519U, 4592387007752762956U, + 13815759044607538764U, 4607130541380624519U, + 4603825001630339212U, 4605190175055178825U, + 13828562211909954633U, 4603825001630339212U, + 4606079444829232727U, 4602309411551204896U, + 13825681448405980704U, 4606079444829232727U, + 4598742041476147134U, 4606818271362779153U, + 13830190308217554961U, 4598742041476147134U, + 4606770142132396069U, 4599059363095165615U, + 13822431399949941423U, 4606770142132396069U, + 4602016966272225497U, 4606157602458368090U, + 13829529639313143898U, 4602016966272225497U, + 4605084992581147553U, 4603953166845776383U, + 13827325203700552191U, 4605084992581147553U, + 4591066993883984169U, 4607146792632922887U, + 13830518829487698695U, 4591066993883984169U, + 4607167120476811757U, 4588556721781247689U, + 13811928758636023497U, 4607167120476811757U, + 4604162403772767740U, 4604904503566677638U, + 13828276540421453446U, 4604162403772767740U, + 4606281842017099424U, 4601523323048804569U, + 13824895359903580377U, 4606281842017099424U, + 4599584122834874440U, 4606683463531482757U, + 13830055500386258565U, 4599584122834874440U, + 4606891971185517504U, 4598209407597805010U, + 13821581444452580818U, 4606891971185517504U, + 4602734543519989142U, 4605943243960030558U, + 13829315280814806366U, 4602734543519989142U, + 4605360179893335444U, 4603607160562208225U, + 13826979197416984033U, 4605360179893335444U, + 4594126307716900071U, 4607096716058023245U, + 13830468752912799053U, 4594126307716900071U, + 4607072388129742377U, 4594782329999411347U, + 13818154366854187155U, 4607072388129742377U, + 4603473988668005304U, 4605458946901419122U, + 13828830983756194930U, 4603473988668005304U, + 4605858005670328613U, 4602876755014813164U, + 13826248791869588972U, 4605858005670328613U, + 4597600270510262682U, 4606932257325205256U, + 13830304294179981064U, 4597600270510262682U, + 4606627607157935956U, 4599896339047301634U, + 13823268375902077442U, 4606627607157935956U, + 4601223560006786057U, 4606352730697093817U, + 13829724767551869625U, 4601223560006786057U, + 4604793159020491611U, 4604285253548209224U, + 13827657290402985032U, 4604793159020491611U, + 4585907115494236537U, 4607175255902437396U, + 13830547292757213204U, 4585907115494236537U, + 4607177290141793710U, 4585023436363055487U, + 13808395473217831295U, 4607177290141793710U, + 4604325745441780828U, 4604755543975806820U, + 13828127580830582628U, 4604325745441780828U, + 4606375745674388705U, 4601123065313358619U, + 13824495102168134427U, 4606375745674388705U, + 4599999947619525579U, 4606608350964852124U, + 13829980387819627932U, 4599999947619525579U, + 4606945027305114062U, 4597385183080791534U, + 13820757219935567342U, 4606945027305114062U, + 4602923807199184054U, 4605829012964735987U, + 13829201049819511795U, 4602923807199184054U, + 4605491322423429598U, 4603429196809300824U, + 13826801233664076632U, 4605491322423429598U, + 4595000592312171144U, 4607063608453868552U, + 13830435645308644360U, 4595000592312171144U, + 4607104153983298999U, 4593907249284540294U, + 13817279286139316102U, 4607104153983298999U, + 4603651144395358093U, 4605326714874986465U, + 13828698751729762273U, 4603651144395358093U, + 4605971073215153165U, 4602686793990243041U, + 13826058830845018849U, 4605971073215153165U, + 4598316292140394014U, 4606877885424248132U, + 13830249922279023940U, 4598316292140394014U, + 4606701442584137310U, 4599479600326345459U, + 13822851637181121267U, 4606701442584137310U, + 4601622657843474729U, 4606257600839867033U, + 13829629637694642841U, 4601622657843474729U, + 4604941113561600762U, 4604121000955189926U, + 13827493037809965734U, 4604941113561600762U, + 4589303678145802340U, 4607163731439411601U, + 13830535768294187409U, 4589303678145802340U, + 4607151534426937478U, 4590626485056654602U, + 13813998521911430410U, 4607151534426937478U, + 4603995455647851249U, 4605049409688478101U, + 13828421446543253909U, 4603995455647851249U, + 4606183055233559255U, 4601918851211878557U, + 13825290888066654365U, 4606183055233559255U, + 4599164736579548843U, 4606753451050079834U, + 13830125487904855642U, 4599164736579548843U, + 4606833664420673202U, 4598635880488956483U, + 13822007917343732291U, 4606833664420673202U, + 4602406247776385022U, 4606052795787882823U, + 13829424832642658631U, 4602406247776385022U, + 4605224709411790590U, 4603781852316960384U, + 13827153889171736192U, 4605224709411790590U, + 4592826452951465409U, 4607124449686274900U, + 13830496486541050708U, 4592826452951465409U, + 4607035262954517034U, 4595654028864046335U, + 13819026065718822143U, 4607035262954517034U, + 4603293641160266722U, 4605586791482848547U, + 13828958828337624355U, 4603293641160266722U, + 4605740310302420207U, 4603063884010218172U, + 13826435920864993980U, 4605740310302420207U, + 4596738097012783531U, 4606981354314050484U, + 13830353391168826292U, 4596738097012783531U, + 4606548680329491866U, 4600309328230211502U, + 13823681365084987310U, 4606548680329491866U, + 4600819913163773071U, 4606442934727379583U, + 13829814971582155391U, 4600819913163773071U, + 4604641218080103285U, 4604445825685214043U, + 13827817862539989851U, 4604641218080103285U, + 4579996072175835083U, 4607181359080094673U, + 13830553395934870481U, 4579996072175835083U, + 4607180341788068727U, 4581846703643734566U, + 13805218740498510374U, 4607180341788068727U, + 4604406033021674239U, 4604679572075463103U, + 13828051608930238911U, 4604406033021674239U, + 4606420848538580260U, 4600921238092511730U, + 13824293274947287538U, 4606420848538580260U, + 4600206446098256018U, 4606568886807728474U, + 13829940923662504282U, 4600206446098256018U, + 4606969576261663845U, 4596954088216812973U, + 13820326125071588781U, 4606969576261663845U, + 4603017373458244943U, 4605770164172969910U, + 13829142201027745718U, 4603017373458244943U, + 4605555245917486022U, 4603339021357904144U, + 13826711058212679952U, 4605555245917486022U, + 4595436449949385485U, 4607045045516813836U, + 13830417082371589644U, 4595436449949385485U, + 4607118021058468598U, 4593265590854265407U, + 13816637627709041215U, 4607118021058468598U, + 4603738491917026584U, 4605258978359093269U, + 13828631015213869077U, 4603738491917026584U, + 4606025850160239809U, 4602502755147763107U, + 13825874792002538915U, 4606025850160239809U, + 4598529532600161144U, 4606848731493011465U, + 13830220768347787273U, 4598529532600161144U, + 4606736437002195879U, 4599269903251194481U, + 13822641940105970289U, 4606736437002195879U, + 4601820425647934753U, 4606208206518262803U, + 13829580243373038611U, 4601820425647934753U, + 4605013567986435066U, 4604037525321326463U, + 13827409562176102271U, 4605013567986435066U, + 4590185751760970393U, 4607155938267770208U, + 13830527975122546016U, 4590185751760970393U, + 4607160003989618959U, 4589744810590291021U, + 13813116847445066829U, 4607160003989618959U, + 4604079374282302598U, 4604977468824438271U, + 13828349505679214079U, 4604079374282302598U, + 4606233055365547081U, 4601721693286060937U, + 13825093730140836745U, 4606233055365547081U, + 4599374859150636784U, 4606719100629313491U, + 13830091137484089299U, 4599374859150636784U, + 4606863472012527185U, 4598423001813699022U, + 13821795038668474830U, 4606863472012527185U, + 4602598930031891166U, 4605998608960791335U, + 13829370645815567143U, 4602598930031891166U, + 4605292980606880364U, 4603694922063032361U, + 13827066958917808169U, 4605292980606880364U, + 4593688012422887515U, 4607111255739239816U, + 13830483292594015624U, 4593688012422887515U, + 4607054494135176056U, 4595218635031890910U, + 13818590671886666718U, 4607054494135176056U, + 4603384207141321914U, 4605523422498301790U, + 13828895459353077598U, 4603384207141321914U, + 4605799732098147061U, 4602970680601913687U, + 13826342717456689495U, 4605799732098147061U, + 4597169786279785693U, 4606957467106717424U, + 13830329503961493232U, 4597169786279785693U, + 4606588777269136769U, 4600103317933788342U, + 13823475354788564150U, 4606588777269136769U, + 4601022290077223616U, 4606398451906509788U, + 13829770488761285596U, 4601022290077223616U, + 4604717681185626434U, 4604366005771528720U, + 13827738042626304528U, 4604717681185626434U, + 4583614727651146525U, 4607178985458280057U, + 13830551022313055865U, 4583614727651146525U, + 4607172882816799076U, 4586790578280679046U, + 13810162615135454854U, 4607172882816799076U, + 4604244531615310815U, 4604830524903495634U, + 13828202561758271442U, 4604244531615310815U, + 4606329407841126011U, 4601323770373937522U, + 13824695807228713330U, 4606329407841126011U, + 4599792496117920694U, 4606646545123403481U, + 13830018581978179289U, 4599792496117920694U, + 4606919157647773535U, 4597815040470278984U, + 13821187077325054792U, 4606919157647773535U, + 4602829525820289164U, 4605886709123365959U, + 13829258745978141767U, 4602829525820289164U, + 4605426297151190466U, 4603518581031047189U, + 13826890617885822997U, 4605426297151190466U, + 4594563856311064231U, 4607080832832247697U, + 13830452869687023505U, 4594563856311064231U, + 4607088942243446236U, 4594345179472540681U, + 13817717216327316489U, 4607088942243446236U, + 4603562972219549215U, 4605393374401988274U, + 13828765411256764082U, 4603562972219549215U, + 4605915122243179241U, 4602782121393764535U, + 13826154158248540343U, 4605915122243179241U, + 4598029484874872834U, 4606905728766014348U, + 13830277765620790156U, 4598029484874872834U, + 4606665164148251002U, 4599688422741010356U, + 13823060459595786164U, 4606665164148251002U, + 4601423692641949331U, 4606305777984577632U, + 13829677814839353440U, 4601423692641949331U, + 4604867640218014515U, 4604203581176243359U, + 13827575618031019167U, 4604867640218014515U, + 4587673791460508439U, 4607170170974224083U, + 13830542207828999891U, 4587673791460508439U, + 4607141713064252300U, 4591507261658050721U, + 13814879298512826529U, 4607141713064252300U, + 4603910660507251362U, 4605120315324767624U, + 13828492352179543432U, 4603910660507251362U, + 4606131849150971908U, 4602114767134999006U, + 13825486803989774814U, 4606131849150971908U, + 4598953786765296928U, 4606786509620734768U, + 13830158546475510576U, 4598953786765296928U, + 4606802552898869248U, 4598848011564831930U, + 13822220048419607738U, 4606802552898869248U, + 4602212250118051877U, 4606105796280968177U, + 13829477833135743985U, 4602212250118051877U, + 4605155376589456981U, 4603867938232615808U, + 13827239975087391616U, 4605155376589456981U, + 4591947271803021404U, 4607136295912168606U, + 13830508332766944414U, 4591947271803021404U, + 4607014697483910382U, 4596088445927168004U, + 13819460482781943812U, 4607014697483910382U, + 4603202304363743346U, 4605649044311923410U, + 13829021081166699218U, 4603202304363743346U, + 4605679749231851918U, 4603156351203636159U, + 13826528388058411967U, 4605679749231851918U, + 4596305267720071930U, 4607003915349878877U, + 13830375952204654685U, 4596305267720071930U, + 4606507322377452870U, 4600514338912178239U, + 13823886375766954047U, 4606507322377452870U, + 4600616459743653188U, 4606486172460753999U, + 13829858209315529807U, 4600616459743653188U, + 4604563781218984604U, 4604524701268679793U, + 13827896738123455601U, 4604563781218984604U, + 4569220649180767418U, 4607182376410422530U, + 13830554413265198338U, 4569220649180767418U +}; + +const fpr fpr_p2_tab[] = { + 4611686018427387904U, + 4607182418800017408U, + 4602678819172646912U, + 4598175219545276416U, + 4593671619917905920U, + 4589168020290535424U, + 4584664420663164928U, + 4580160821035794432U, + 4575657221408423936U, + 4571153621781053440U, + 4566650022153682944U +}; diff --git a/crypto_sign/falcon/falcon-1024/clean/fpr.h b/crypto_sign/falcon/falcon-1024/clean/fpr.h new file mode 100644 index 00000000..dd7e15c2 --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/clean/fpr.h @@ -0,0 +1,473 @@ +#ifndef PQCLEAN_FALCON1024_CLEAN_FPR_H +#define PQCLEAN_FALCON1024_CLEAN_FPR_H + +/* + * Floating-point operations. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +/* ====================================================================== */ +/* + * Custom floating-point implementation with integer arithmetics. We + * use IEEE-754 "binary64" format, with some simplifications: + * + * - Top bit is s = 1 for negative, 0 for positive. + * + * - Exponent e uses the next 11 bits (bits 52 to 62, inclusive). + * + * - Mantissa m uses the 52 low bits. + * + * Encoded value is, in general: (-1)^s * 2^(e-1023) * (1 + m*2^(-52)) + * i.e. the mantissa really is a 53-bit number (less than 2.0, but not + * less than 1.0), but the top bit (equal to 1 by definition) is omitted + * in the encoding. + * + * In IEEE-754, there are some special values: + * + * - If e = 2047, then the value is either an infinite (m = 0) or + * a NaN (m != 0). + * + * - If e = 0, then the value is either a zero (m = 0) or a subnormal, + * aka "denormalized number" (m != 0). + * + * Of these, we only need the zeros. The caller is responsible for not + * providing operands that would lead to infinites, NaNs or subnormals. + * If inputs are such that values go out of range, then indeterminate + * values are returned (it would still be deterministic, but no specific + * value may be relied upon). + * + * At the C level, the three parts are stored in a 64-bit unsigned + * word. + * + * One may note that a property of the IEEE-754 format is that order + * is preserved for positive values: if two positive floating-point + * values x and y are such that x < y, then their respective encodings + * as _signed_ 64-bit integers i64(x) and i64(y) will be such that + * i64(x) < i64(y). For negative values, order is reversed: if x < 0, + * y < 0, and x < y, then ia64(x) > ia64(y). + * + * IMPORTANT ASSUMPTIONS: + * ====================== + * + * For proper computations, and constant-time behaviour, we assume the + * following: + * + * - 32x32->64 multiplication (unsigned) has an execution time that + * is independent of its operands. This is true of most modern + * x86 and ARM cores. Notable exceptions are the ARM Cortex M0, M0+ + * and M3 (in the M0 and M0+, this is done in software, so it depends + * on that routine), and the PowerPC cores from the G3/G4 lines. + * For more info, see: https://www.bearssl.org/ctmul.html + * + * - Left-shifts and right-shifts of 32-bit values have an execution + * time which does not depend on the shifted value nor on the + * shift count. An historical exception is the Pentium IV, but most + * modern CPU have barrel shifters. Some small microcontrollers + * might have varying-time shifts (not the ARM Cortex M*, though). + * + * - Right-shift of a signed negative value performs a sign extension. + * As per the C standard, this operation returns an + * implementation-defined result (this is NOT an "undefined + * behaviour"). On most/all systems, an arithmetic shift is + * performed, because this is what makes most sense. + */ + +/* + * Normally we should declare the 'fpr' type to be a struct or union + * around the internal 64-bit value; however, we want to use the + * direct 64-bit integer type to enable a lighter call convention on + * ARM platforms. This means that direct (invalid) use of operators + * such as '*' or '+' will not be caught by the compiler. We rely on + * the "normal" (non-emulated) code to detect such instances. + */ +typedef uint64_t fpr; + +/* + * For computations, we split values into an integral mantissa in the + * 2^54..2^55 range, and an (adjusted) exponent. The lowest bit is + * "sticky" (it is set to 1 if any of the bits below it is 1); when + * re-encoding, the low two bits are dropped, but may induce an + * increment in the value for proper rounding. + */ + +/* + * Right-shift a 64-bit unsigned value by a possibly secret shift count. + * We assumed that the underlying architecture had a barrel shifter for + * 32-bit shifts, but for 64-bit shifts on a 32-bit system, this will + * typically invoke a software routine that is not necessarily + * constant-time; hence the function below. + * + * Shift count n MUST be in the 0..63 range. + */ +static inline uint64_t +fpr_ursh(uint64_t x, int n) { + x ^= (x ^ (x >> 32)) & -(uint64_t)(n >> 5); + return x >> (n & 31); +} + +/* + * Right-shift a 64-bit signed value by a possibly secret shift count + * (see fpr_ursh() for the rationale). + * + * Shift count n MUST be in the 0..63 range. + */ +static inline int64_t +fpr_irsh(int64_t x, int n) { + x ^= (x ^ (x >> 32)) & -(int64_t)(n >> 5); + return x >> (n & 31); +} + +/* + * Left-shift a 64-bit unsigned value by a possibly secret shift count + * (see fpr_ursh() for the rationale). + * + * Shift count n MUST be in the 0..63 range. + */ +static inline uint64_t +fpr_ulsh(uint64_t x, int n) { + x ^= (x ^ (x << 32)) & -(uint64_t)(n >> 5); + return x << (n & 31); +} + +/* + * Expectations: + * s = 0 or 1 + * exponent e is "arbitrary" and unbiased + * 2^54 <= m < 2^55 + * Numerical value is (-1)^2 * m * 2^e + * + * Exponents which are too low lead to value zero. If the exponent is + * too large, the returned value is indeterminate. + * + * If m = 0, then a zero is returned (using the provided sign). + * If e < -1076, then a zero is returned (regardless of the value of m). + * If e >= -1076 and e != 0, m must be within the expected range + * (2^54 to 2^55-1). + */ +static inline fpr +FPR(int s, int e, uint64_t m) { + fpr x; + uint32_t t; + unsigned f; + + /* + * If e >= -1076, then the value is "normal"; otherwise, it + * should be a subnormal, which we clamp down to zero. + */ + e += 1076; + t = (uint32_t)e >> 31; + m &= (uint64_t)t - 1; + + /* + * If m = 0 then we want a zero; make e = 0 too, but conserve + * the sign. + */ + t = (uint32_t)(m >> 54); + e &= -(int)t; + + /* + * The 52 mantissa bits come from m. Value m has its top bit set + * (unless it is a zero); we leave it "as is": the top bit will + * increment the exponent by 1, except when m = 0, which is + * exactly what we want. + */ + x = (((uint64_t)s << 63) | (m >> 2)) + ((uint64_t)(uint32_t)e << 52); + + /* + * Rounding: if the low three bits of m are 011, 110 or 111, + * then the value should be incremented to get the next + * representable value. This implements the usual + * round-to-nearest rule (with preference to even values in case + * of a tie). Note that the increment may make a carry spill + * into the exponent field, which is again exactly what we want + * in that case. + */ + f = (unsigned)m & 7U; + x += (0xC8U >> f) & 1; + return x; +} + +#define fpr_scaled PQCLEAN_FALCON1024_CLEAN_fpr_scaled +fpr fpr_scaled(int64_t i, int sc); + +static inline fpr +fpr_of(int64_t i) { + return fpr_scaled(i, 0); +} + +static const fpr fpr_q = 4667981563525332992; +static const fpr fpr_inverse_of_q = 4545632735260551042; +static const fpr fpr_inv_2sqrsigma0 = 4594603506513722306; +static const fpr fpr_inv_sigma = 4573359825155195350; +static const fpr fpr_sigma_min_9 = 4608495221497168882; +static const fpr fpr_sigma_min_10 = 4608586345619182117; +static const fpr fpr_log2 = 4604418534313441775; +static const fpr fpr_inv_log2 = 4609176140021203710; +static const fpr fpr_bnorm_max = 4670353323383631276; +static const fpr fpr_zero = 0; +static const fpr fpr_one = 4607182418800017408; +static const fpr fpr_two = 4611686018427387904; +static const fpr fpr_onehalf = 4602678819172646912; +static const fpr fpr_invsqrt2 = 4604544271217802189; +static const fpr fpr_invsqrt8 = 4600040671590431693; +static const fpr fpr_ptwo31 = 4746794007248502784; +static const fpr fpr_ptwo31m1 = 4746794007244308480; +static const fpr fpr_mtwo31m1 = 13970166044099084288U; +static const fpr fpr_ptwo63m1 = 4890909195324358656; +static const fpr fpr_mtwo63m1 = 14114281232179134464U; +static const fpr fpr_ptwo63 = 4890909195324358656; + +static inline int64_t +fpr_rint(fpr x) { + uint64_t m, d; + int e; + uint32_t s, dd, f; + + /* + * We assume that the value fits in -(2^63-1)..+(2^63-1). We can + * thus extract the mantissa as a 63-bit integer, then right-shift + * it as needed. + */ + m = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1); + e = 1085 - ((int)(x >> 52) & 0x7FF); + + /* + * If a shift of more than 63 bits is needed, then simply set m + * to zero. This also covers the case of an input operand equal + * to zero. + */ + m &= -(uint64_t)((uint32_t)(e - 64) >> 31); + e &= 63; + + /* + * Right-shift m as needed. Shift count is e. Proper rounding + * mandates that: + * - If the highest dropped bit is zero, then round low. + * - If the highest dropped bit is one, and at least one of the + * other dropped bits is one, then round up. + * - If the highest dropped bit is one, and all other dropped + * bits are zero, then round up if the lowest kept bit is 1, + * or low otherwise (i.e. ties are broken by "rounding to even"). + * + * We thus first extract a word consisting of all the dropped bit + * AND the lowest kept bit; then we shrink it down to three bits, + * the lowest being "sticky". + */ + d = fpr_ulsh(m, 63 - e); + dd = (uint32_t)d | ((uint32_t)(d >> 32) & 0x1FFFFFFF); + f = (uint32_t)(d >> 61) | ((dd | -dd) >> 31); + m = fpr_ursh(m, e) + (uint64_t)((0xC8U >> f) & 1U); + + /* + * Apply the sign bit. + */ + s = (uint32_t)(x >> 63); + return ((int64_t)m ^ -(int64_t)s) + (int64_t)s; +} + +static inline int64_t +fpr_floor(fpr x) { + uint64_t t; + int64_t xi; + int e, cc; + + /* + * We extract the integer as a _signed_ 64-bit integer with + * a scaling factor. Since we assume that the value fits + * in the -(2^63-1)..+(2^63-1) range, we can left-shift the + * absolute value to make it in the 2^62..2^63-1 range: we + * will only need a right-shift afterwards. + */ + e = (int)(x >> 52) & 0x7FF; + t = x >> 63; + xi = (int64_t)(((x << 10) | ((uint64_t)1 << 62)) + & (((uint64_t)1 << 63) - 1)); + xi = (xi ^ -(int64_t)t) + (int64_t)t; + cc = 1085 - e; + + /* + * We perform an arithmetic right-shift on the value. This + * applies floor() semantics on both positive and negative values + * (rounding toward minus infinity). + */ + xi = fpr_irsh(xi, cc & 63); + + /* + * If the true shift count was 64 or more, then we should instead + * replace xi with 0 (if nonnegative) or -1 (if negative). Edge + * case: -0 will be floored to -1, not 0 (whether this is correct + * is debatable; in any case, the other functions normalize zero + * to +0). + * + * For an input of zero, the non-shifted xi was incorrect (we used + * a top implicit bit of value 1, not 0), but this does not matter + * since this operation will clamp it down. + */ + xi ^= (xi ^ -(int64_t)t) & -(int64_t)((uint32_t)(63 - cc) >> 31); + return xi; +} + +static inline int64_t +fpr_trunc(fpr x) { + uint64_t t, xu; + int e, cc; + + /* + * Extract the absolute value. Since we assume that the value + * fits in the -(2^63-1)..+(2^63-1) range, we can left-shift + * the absolute value into the 2^62..2^63-1 range, and then + * do a right shift afterwards. + */ + e = (int)(x >> 52) & 0x7FF; + xu = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1); + cc = 1085 - e; + xu = fpr_ursh(xu, cc & 63); + + /* + * If the exponent is too low (cc > 63), then the shift was wrong + * and we must clamp the value to 0. This also covers the case + * of an input equal to zero. + */ + xu &= -(uint64_t)((uint32_t)(cc - 64) >> 31); + + /* + * Apply back the sign, if the source value is negative. + */ + t = x >> 63; + xu = (xu ^ -t) + t; + return *(int64_t *)&xu; +} + +#define fpr_add PQCLEAN_FALCON1024_CLEAN_fpr_add +fpr fpr_add(fpr x, fpr y); + +static inline fpr +fpr_sub(fpr x, fpr y) { + y ^= (uint64_t)1 << 63; + return fpr_add(x, y); +} + +static inline fpr +fpr_neg(fpr x) { + x ^= (uint64_t)1 << 63; + return x; +} + +static inline fpr +fpr_half(fpr x) { + /* + * To divide a value by 2, we just have to subtract 1 from its + * exponent, but we have to take care of zero. + */ + uint32_t t; + + x -= (uint64_t)1 << 52; + t = (((uint32_t)(x >> 52) & 0x7FF) + 1) >> 11; + x &= (uint64_t)t - 1; + return x; +} + +static inline fpr +fpr_double(fpr x) { + /* + * To double a value, we just increment by one the exponent. We + * don't care about infinites or NaNs; however, 0 is a + * special case. + */ + x += (uint64_t)((((unsigned)(x >> 52) & 0x7FFU) + 0x7FFU) >> 11) << 52; + return x; +} + +#define fpr_mul PQCLEAN_FALCON1024_CLEAN_fpr_mul +fpr fpr_mul(fpr x, fpr y); + +static inline fpr +fpr_sqr(fpr x) { + return fpr_mul(x, x); +} + +#define fpr_div PQCLEAN_FALCON1024_CLEAN_fpr_div +fpr fpr_div(fpr x, fpr y); + +static inline fpr +fpr_inv(fpr x) { + return fpr_div(4607182418800017408u, x); +} + +#define fpr_sqrt PQCLEAN_FALCON1024_CLEAN_fpr_sqrt +fpr fpr_sqrt(fpr x); + +static inline int +fpr_lt(fpr x, fpr y) { + /* + * If both x and y are positive, then a signed comparison yields + * the proper result: + * - For positive values, the order is preserved. + * - The sign bit is at the same place as in integers, so + * sign is preserved. + * Moreover, we can compute [x < y] as sgn(x-y) and the computation + * of x-y will not overflow. + * + * If the signs differ, then sgn(x) gives the proper result. + * + * If both x and y are negative, then the order is reversed. + * Hence [x < y] = sgn(y-x). We must compute this separately from + * sgn(x-y); simply inverting sgn(x-y) would not handle the edge + * case x = y properly. + */ + int cc0, cc1; + int64_t sx; + int64_t sy; + + sx = *(int64_t *)&x; + sy = *(int64_t *)&y; + sy &= ~((sx ^ sy) >> 63); /* set sy=0 if signs differ */ + + cc0 = (int)((sx - sy) >> 63) & 1; /* Neither subtraction overflows when */ + cc1 = (int)((sy - sx) >> 63) & 1; /* the signs are the same. */ + + return cc0 ^ ((cc0 ^ cc1) & (int)((x & y) >> 63)); +} + +/* + * Compute exp(x) for x such that |x| <= ln 2. We want a precision of 50 + * bits or so. + */ +#define fpr_expm_p63 PQCLEAN_FALCON1024_CLEAN_fpr_expm_p63 +uint64_t fpr_expm_p63(fpr x, fpr ccs); + +#define fpr_gm_tab PQCLEAN_FALCON1024_CLEAN_fpr_gm_tab +extern const fpr fpr_gm_tab[]; + +#define fpr_p2_tab PQCLEAN_FALCON1024_CLEAN_fpr_p2_tab +extern const fpr fpr_p2_tab[]; + +/* ====================================================================== */ +#endif diff --git a/crypto_sign/falcon/falcon-1024/clean/inner.h b/crypto_sign/falcon/falcon-1024/clean/inner.h new file mode 100644 index 00000000..5b0477ac --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/clean/inner.h @@ -0,0 +1,834 @@ +#ifndef PQCLEAN_FALCON1024_CLEAN_INNER_H +#define PQCLEAN_FALCON1024_CLEAN_INNER_H + + +/* + * Internal functions for Falcon. This is not the API intended to be + * used by applications; instead, this internal API provides all the + * primitives on which wrappers build to provide external APIs. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +/* + * IMPORTANT API RULES + * ------------------- + * + * This API has some non-trivial usage rules: + * + * + * - All public functions (i.e. the non-static ones) must be referenced + * with the PQCLEAN_FALCON1024_CLEAN_ macro (e.g. PQCLEAN_FALCON1024_CLEAN_verify_raw for the verify_raw() + * function). That macro adds a prefix to the name, which is + * configurable with the FALCON_PREFIX macro. This allows compiling + * the code into a specific "namespace" and potentially including + * several versions of this code into a single application (e.g. to + * have an AVX2 and a non-AVX2 variants and select the one to use at + * runtime based on availability of AVX2 opcodes). + * + * - Functions that need temporary buffers expects them as a final + * tmp[] array of type uint8_t*, with a size which is documented for + * each function. However, most have some alignment requirements, + * because they will use the array to store 16-bit, 32-bit or 64-bit + * values (e.g. uint64_t or double). The caller must ensure proper + * alignment. What happens on unaligned access depends on the + * underlying architecture, ranging from a slight time penalty + * to immediate termination of the process. + * + * - Some functions rely on specific rounding rules and precision for + * floating-point numbers. On some systems (in particular 32-bit x86 + * with the 387 FPU), this requires setting an hardware control + * word. The caller MUST use set_fpu_cw() to ensure proper precision: + * + * oldcw = set_fpu_cw(2); + * PQCLEAN_FALCON1024_CLEAN_sign_dyn(...); + * set_fpu_cw(oldcw); + * + * On systems where the native floating-point precision is already + * proper, or integer-based emulation is used, the set_fpu_cw() + * function does nothing, so it can be called systematically. + */ +#include "fips202.h" +#include "fpr.h" +#include +#include +#include + + + + + +/* + * Some computations with floating-point elements, in particular + * rounding to the nearest integer, rely on operations using _exactly_ + * the precision of IEEE-754 binary64 type (i.e. 52 bits). On 32-bit + * x86, the 387 FPU may be used (depending on the target OS) and, in + * that case, may use more precision bits (i.e. 64 bits, for an 80-bit + * total type length); to prevent miscomputations, we define an explicit + * function that modifies the precision in the FPU control word. + * + * set_fpu_cw() sets the precision to the provided value, and returns + * the previously set precision; callers are supposed to restore the + * previous precision on exit. The correct (52-bit) precision is + * configured with the value "2". On unsupported compilers, or on + * targets other than 32-bit x86, or when the native 'double' type is + * not used, the set_fpu_cw() function does nothing at all. + */ +static inline unsigned +set_fpu_cw(unsigned x) { + return x; +} + + + + +/* ==================================================================== */ +/* + * SHAKE256 implementation (shake.c). + * + * API is defined to be easily replaced with the fips202.h API defined + * as part of PQClean. + */ + + + +#define inner_shake256_context shake256incctx +#define inner_shake256_init(sc) shake256_inc_init(sc) +#define inner_shake256_inject(sc, in, len) shake256_inc_absorb(sc, in, len) +#define inner_shake256_flip(sc) shake256_inc_finalize(sc) +#define inner_shake256_extract(sc, out, len) shake256_inc_squeeze(out, len, sc) +#define inner_shake256_ctx_release(sc) shake256_inc_ctx_release(sc) + + +/* ==================================================================== */ +/* + * Encoding/decoding functions (codec.c). + * + * Encoding functions take as parameters an output buffer (out) with + * a given maximum length (max_out_len); returned value is the actual + * number of bytes which have been written. If the output buffer is + * not large enough, then 0 is returned (some bytes may have been + * written to the buffer). If 'out' is NULL, then 'max_out_len' is + * ignored; instead, the function computes and returns the actual + * required output length (in bytes). + * + * Decoding functions take as parameters an input buffer (in) with + * its maximum length (max_in_len); returned value is the actual number + * of bytes that have been read from the buffer. If the provided length + * is too short, then 0 is returned. + * + * Values to encode or decode are vectors of integers, with N = 2^logn + * elements. + * + * Three encoding formats are defined: + * + * - modq: sequence of values modulo 12289, each encoded over exactly + * 14 bits. The encoder and decoder verify that integers are within + * the valid range (0..12288). Values are arrays of uint16. + * + * - trim: sequence of signed integers, a specified number of bits + * each. The number of bits is provided as parameter and includes + * the sign bit. Each integer x must be such that |x| < 2^(bits-1) + * (which means that the -2^(bits-1) value is forbidden); encode and + * decode functions check that property. Values are arrays of + * int16_t or int8_t, corresponding to names 'trim_i16' and + * 'trim_i8', respectively. + * + * - comp: variable-length encoding for signed integers; each integer + * uses a minimum of 9 bits, possibly more. This is normally used + * only for signatures. + * + */ + +size_t PQCLEAN_FALCON1024_CLEAN_modq_encode(void *out, size_t max_out_len, + const uint16_t *x, unsigned logn); +size_t PQCLEAN_FALCON1024_CLEAN_trim_i16_encode(void *out, size_t max_out_len, + const int16_t *x, unsigned logn, unsigned bits); +size_t PQCLEAN_FALCON1024_CLEAN_trim_i8_encode(void *out, size_t max_out_len, + const int8_t *x, unsigned logn, unsigned bits); +size_t PQCLEAN_FALCON1024_CLEAN_comp_encode(void *out, size_t max_out_len, + const int16_t *x, unsigned logn); + +size_t PQCLEAN_FALCON1024_CLEAN_modq_decode(uint16_t *x, unsigned logn, + const void *in, size_t max_in_len); +size_t PQCLEAN_FALCON1024_CLEAN_trim_i16_decode(int16_t *x, unsigned logn, unsigned bits, + const void *in, size_t max_in_len); +size_t PQCLEAN_FALCON1024_CLEAN_trim_i8_decode(int8_t *x, unsigned logn, unsigned bits, + const void *in, size_t max_in_len); +size_t PQCLEAN_FALCON1024_CLEAN_comp_decode(int16_t *x, unsigned logn, + const void *in, size_t max_in_len); + +/* + * Number of bits for key elements, indexed by logn (1 to 10). This + * is at most 8 bits for all degrees, but some degrees may have shorter + * elements. + */ +extern const uint8_t PQCLEAN_FALCON1024_CLEAN_max_fg_bits[]; +extern const uint8_t PQCLEAN_FALCON1024_CLEAN_max_FG_bits[]; + +/* + * Maximum size, in bits, of elements in a signature, indexed by logn + * (1 to 10). The size includes the sign bit. + */ +extern const uint8_t PQCLEAN_FALCON1024_CLEAN_max_sig_bits[]; + +/* ==================================================================== */ +/* + * Support functions used for both signature generation and signature + * verification (common.c). + */ + +/* + * From a SHAKE256 context (must be already flipped), produce a new + * point. This is the non-constant-time version, which may leak enough + * information to serve as a stop condition on a brute force attack on + * the hashed message (provided that the nonce value is known). + */ +void PQCLEAN_FALCON1024_CLEAN_hash_to_point_vartime(inner_shake256_context *sc, + uint16_t *x, unsigned logn); + +/* + * From a SHAKE256 context (must be already flipped), produce a new + * point. The temporary buffer (tmp) must have room for 2*2^logn bytes. + * This function is constant-time but is typically more expensive than + * PQCLEAN_FALCON1024_CLEAN_hash_to_point_vartime(). + * + * tmp[] must have 16-bit alignment. + */ +void PQCLEAN_FALCON1024_CLEAN_hash_to_point_ct(inner_shake256_context *sc, + uint16_t *x, unsigned logn, uint8_t *tmp); + +/* + * Tell whether a given vector (2N coordinates, in two halves) is + * acceptable as a signature. This compares the appropriate norm of the + * vector with the acceptance bound. Returned value is 1 on success + * (vector is short enough to be acceptable), 0 otherwise. + */ +int PQCLEAN_FALCON1024_CLEAN_is_short(const int16_t *s1, const int16_t *s2, unsigned logn); + +/* + * Tell whether a given vector (2N coordinates, in two halves) is + * acceptable as a signature. Instead of the first half s1, this + * function receives the "saturated squared norm" of s1, i.e. the + * sum of the squares of the coordinates of s1 (saturated at 2^32-1 + * if the sum exceeds 2^31-1). + * + * Returned value is 1 on success (vector is short enough to be + * acceptable), 0 otherwise. + */ +int PQCLEAN_FALCON1024_CLEAN_is_short_half(uint32_t sqn, const int16_t *s2, unsigned logn); + +/* ==================================================================== */ +/* + * Signature verification functions (vrfy.c). + */ + +/* + * Convert a public key to NTT + Montgomery format. Conversion is done + * in place. + */ +void PQCLEAN_FALCON1024_CLEAN_to_ntt_monty(uint16_t *h, unsigned logn); + +/* + * Internal signature verification code: + * c0[] contains the hashed nonce+message + * s2[] is the decoded signature + * h[] contains the public key, in NTT + Montgomery format + * logn is the degree log + * tmp[] temporary, must have at least 2*2^logn bytes + * Returned value is 1 on success, 0 on error. + * + * tmp[] must have 16-bit alignment. + */ +int PQCLEAN_FALCON1024_CLEAN_verify_raw(const uint16_t *c0, const int16_t *s2, + const uint16_t *h, unsigned logn, uint8_t *tmp); + +/* + * Compute the public key h[], given the private key elements f[] and + * g[]. This computes h = g/f mod phi mod q, where phi is the polynomial + * modulus. This function returns 1 on success, 0 on error (an error is + * reported if f is not invertible mod phi mod q). + * + * The tmp[] array must have room for at least 2*2^logn elements. + * tmp[] must have 16-bit alignment. + */ +int PQCLEAN_FALCON1024_CLEAN_compute_public(uint16_t *h, + const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp); + +/* + * Recompute the fourth private key element. Private key consists in + * four polynomials with small coefficients f, g, F and G, which are + * such that fG - gF = q mod phi; furthermore, f is invertible modulo + * phi and modulo q. This function recomputes G from f, g and F. + * + * The tmp[] array must have room for at least 4*2^logn bytes. + * + * Returned value is 1 in success, 0 on error (f not invertible). + * tmp[] must have 16-bit alignment. + */ +int PQCLEAN_FALCON1024_CLEAN_complete_private(int8_t *G, + const int8_t *f, const int8_t *g, const int8_t *F, + unsigned logn, uint8_t *tmp); + +/* + * Test whether a given polynomial is invertible modulo phi and q. + * Polynomial coefficients are small integers. + * + * tmp[] must have 16-bit alignment. + */ +int PQCLEAN_FALCON1024_CLEAN_is_invertible( + const int16_t *s2, unsigned logn, uint8_t *tmp); + +/* + * Count the number of elements of value zero in the NTT representation + * of the given polynomial: this is the number of primitive 2n-th roots + * of unity (modulo q = 12289) that are roots of the provided polynomial + * (taken modulo q). + * + * tmp[] must have 16-bit alignment. + */ +int PQCLEAN_FALCON1024_CLEAN_count_nttzero(const int16_t *sig, unsigned logn, uint8_t *tmp); + +/* + * Internal signature verification with public key recovery: + * h[] receives the public key (NOT in NTT/Montgomery format) + * c0[] contains the hashed nonce+message + * s1[] is the first signature half + * s2[] is the second signature half + * logn is the degree log + * tmp[] temporary, must have at least 2*2^logn bytes + * Returned value is 1 on success, 0 on error. Success is returned if + * the signature is a short enough vector; in that case, the public + * key has been written to h[]. However, the caller must still + * verify that h[] is the correct value (e.g. with regards to a known + * hash of the public key). + * + * h[] may not overlap with any of the other arrays. + * + * tmp[] must have 16-bit alignment. + */ +int PQCLEAN_FALCON1024_CLEAN_verify_recover(uint16_t *h, + const uint16_t *c0, const int16_t *s1, const int16_t *s2, + unsigned logn, uint8_t *tmp); + +/* ==================================================================== */ +/* + * Implementation of floating-point real numbers (fpr.h, fpr.c). + */ + +/* + * Real numbers are implemented by an extra header file, included below. + * This is meant to support pluggable implementations. The default + * implementation relies on the C type 'double'. + * + * The included file must define the following types, functions and + * constants: + * + * fpr + * type for a real number + * + * fpr fpr_of(int64_t i) + * cast an integer into a real number; source must be in the + * -(2^63-1)..+(2^63-1) range + * + * fpr fpr_scaled(int64_t i, int sc) + * compute i*2^sc as a real number; source 'i' must be in the + * -(2^63-1)..+(2^63-1) range + * + * fpr fpr_ldexp(fpr x, int e) + * compute x*2^e + * + * int64_t fpr_rint(fpr x) + * round x to the nearest integer; x must be in the -(2^63-1) + * to +(2^63-1) range + * + * int64_t fpr_trunc(fpr x) + * round to an integer; this rounds towards zero; value must + * be in the -(2^63-1) to +(2^63-1) range + * + * fpr fpr_add(fpr x, fpr y) + * compute x + y + * + * fpr fpr_sub(fpr x, fpr y) + * compute x - y + * + * fpr fpr_neg(fpr x) + * compute -x + * + * fpr fpr_half(fpr x) + * compute x/2 + * + * fpr fpr_double(fpr x) + * compute x*2 + * + * fpr fpr_mul(fpr x, fpr y) + * compute x * y + * + * fpr fpr_sqr(fpr x) + * compute x * x + * + * fpr fpr_inv(fpr x) + * compute 1/x + * + * fpr fpr_div(fpr x, fpr y) + * compute x/y + * + * fpr fpr_sqrt(fpr x) + * compute the square root of x + * + * int fpr_lt(fpr x, fpr y) + * return 1 if x < y, 0 otherwise + * + * uint64_t fpr_expm_p63(fpr x) + * return exp(x), assuming that 0 <= x < log(2). Returned value + * is scaled to 63 bits (i.e. it really returns 2^63*exp(-x), + * rounded to the nearest integer). Computation should have a + * precision of at least 45 bits. + * + * const fpr fpr_gm_tab[] + * array of constants for FFT / iFFT + * + * const fpr fpr_p2_tab[] + * precomputed powers of 2 (by index, 0 to 10) + * + * Constants of type 'fpr': + * + * fpr fpr_q 12289 + * fpr fpr_inverse_of_q 1/12289 + * fpr fpr_inv_2sqrsigma0 1/(2*(1.8205^2)) + * fpr fpr_inv_sigma 1/(1.55*sqrt(12289)) + * fpr fpr_sigma_min_9 1.291500756233514568549480827642 + * fpr fpr_sigma_min_10 1.311734375905083682667395805765 + * fpr fpr_log2 log(2) + * fpr fpr_inv_log2 1/log(2) + * fpr fpr_bnorm_max 16822.4121 + * fpr fpr_zero 0 + * fpr fpr_one 1 + * fpr fpr_two 2 + * fpr fpr_onehalf 0.5 + * fpr fpr_ptwo31 2^31 + * fpr fpr_ptwo31m1 2^31-1 + * fpr fpr_mtwo31m1 -(2^31-1) + * fpr fpr_ptwo63m1 2^63-1 + * fpr fpr_mtwo63m1 -(2^63-1) + * fpr fpr_ptwo63 2^63 + */ + +/* ==================================================================== */ +/* + * RNG (rng.c). + * + * A PRNG based on ChaCha20 is implemented; it is seeded from a SHAKE256 + * context (flipped) and is used for bulk pseudorandom generation. + * A system-dependent seed generator is also provided. + */ + +/* + * Obtain a random seed from the system RNG. + * + * Returned value is 1 on success, 0 on error. + */ +int PQCLEAN_FALCON1024_CLEAN_get_seed(void *seed, size_t seed_len); + +/* + * Structure for a PRNG. This includes a large buffer so that values + * get generated in advance. The 'state' is used to keep the current + * PRNG algorithm state (contents depend on the selected algorithm). + * + * The unions with 'dummy_u64' are there to ensure proper alignment for + * 64-bit direct access. + */ +typedef struct { + union { + uint8_t d[512]; /* MUST be 512, exactly */ + uint64_t dummy_u64; + } buf; + size_t ptr; + union { + uint8_t d[256]; + uint64_t dummy_u64; + } state; + int type; +} prng; + +/* + * Instantiate a PRNG. That PRNG will feed over the provided SHAKE256 + * context (in "flipped" state) to obtain its initial state. + */ +void PQCLEAN_FALCON1024_CLEAN_prng_init(prng *p, inner_shake256_context *src); + +/* + * Refill the PRNG buffer. This is normally invoked automatically, and + * is declared here only so that prng_get_u64() may be inlined. + */ +void PQCLEAN_FALCON1024_CLEAN_prng_refill(prng *p); + +/* + * Get some bytes from a PRNG. + */ +void PQCLEAN_FALCON1024_CLEAN_prng_get_bytes(prng *p, void *dst, size_t len); + +/* + * Get a 64-bit random value from a PRNG. + */ +static inline uint64_t +prng_get_u64(prng *p) { + size_t u; + + /* + * If there are less than 9 bytes in the buffer, we refill it. + * This means that we may drop the last few bytes, but this allows + * for faster extraction code. Also, it means that we never leave + * an empty buffer. + */ + u = p->ptr; + if (u >= (sizeof p->buf.d) - 9) { + PQCLEAN_FALCON1024_CLEAN_prng_refill(p); + u = 0; + } + p->ptr = u + 8; + + /* + * On systems that use little-endian encoding and allow + * unaligned accesses, we can simply read the data where it is. + */ + return (uint64_t)p->buf.d[u + 0] + | ((uint64_t)p->buf.d[u + 1] << 8) + | ((uint64_t)p->buf.d[u + 2] << 16) + | ((uint64_t)p->buf.d[u + 3] << 24) + | ((uint64_t)p->buf.d[u + 4] << 32) + | ((uint64_t)p->buf.d[u + 5] << 40) + | ((uint64_t)p->buf.d[u + 6] << 48) + | ((uint64_t)p->buf.d[u + 7] << 56); +} + +/* + * Get an 8-bit random value from a PRNG. + */ +static inline unsigned +prng_get_u8(prng *p) { + unsigned v; + + v = p->buf.d[p->ptr ++]; + if (p->ptr == sizeof p->buf.d) { + PQCLEAN_FALCON1024_CLEAN_prng_refill(p); + } + return v; +} + +/* ==================================================================== */ +/* + * FFT (falcon-fft.c). + * + * A real polynomial is represented as an array of N 'fpr' elements. + * The FFT representation of a real polynomial contains N/2 complex + * elements; each is stored as two real numbers, for the real and + * imaginary parts, respectively. See falcon-fft.c for details on the + * internal representation. + */ + +/* + * Compute FFT in-place: the source array should contain a real + * polynomial (N coefficients); its storage area is reused to store + * the FFT representation of that polynomial (N/2 complex numbers). + * + * 'logn' MUST lie between 1 and 10 (inclusive). + */ +void PQCLEAN_FALCON1024_CLEAN_FFT(fpr *f, unsigned logn); + +/* + * Compute the inverse FFT in-place: the source array should contain the + * FFT representation of a real polynomial (N/2 elements); the resulting + * real polynomial (N coefficients of type 'fpr') is written over the + * array. + * + * 'logn' MUST lie between 1 and 10 (inclusive). + */ +void PQCLEAN_FALCON1024_CLEAN_iFFT(fpr *f, unsigned logn); + +/* + * Add polynomial b to polynomial a. a and b MUST NOT overlap. This + * function works in both normal and FFT representations. + */ +void PQCLEAN_FALCON1024_CLEAN_poly_add(fpr *a, const fpr *b, unsigned logn); + +/* + * Subtract polynomial b from polynomial a. a and b MUST NOT overlap. This + * function works in both normal and FFT representations. + */ +void PQCLEAN_FALCON1024_CLEAN_poly_sub(fpr *a, const fpr *b, unsigned logn); + +/* + * Negate polynomial a. This function works in both normal and FFT + * representations. + */ +void PQCLEAN_FALCON1024_CLEAN_poly_neg(fpr *a, unsigned logn); + +/* + * Compute adjoint of polynomial a. This function works only in FFT + * representation. + */ +void PQCLEAN_FALCON1024_CLEAN_poly_adj_fft(fpr *a, unsigned logn); + +/* + * Multiply polynomial a with polynomial b. a and b MUST NOT overlap. + * This function works only in FFT representation. + */ +void PQCLEAN_FALCON1024_CLEAN_poly_mul_fft(fpr *a, const fpr *b, unsigned logn); + +/* + * Multiply polynomial a with the adjoint of polynomial b. a and b MUST NOT + * overlap. This function works only in FFT representation. + */ +void PQCLEAN_FALCON1024_CLEAN_poly_muladj_fft(fpr *a, const fpr *b, unsigned logn); + +/* + * Multiply polynomial with its own adjoint. This function works only in FFT + * representation. + */ +void PQCLEAN_FALCON1024_CLEAN_poly_mulselfadj_fft(fpr *a, unsigned logn); + +/* + * Multiply polynomial with a real constant. This function works in both + * normal and FFT representations. + */ +void PQCLEAN_FALCON1024_CLEAN_poly_mulconst(fpr *a, fpr x, unsigned logn); + +/* + * Divide polynomial a by polynomial b, modulo X^N+1 (FFT representation). + * a and b MUST NOT overlap. + */ +void PQCLEAN_FALCON1024_CLEAN_poly_div_fft(fpr *a, const fpr *b, unsigned logn); + +/* + * Given f and g (in FFT representation), compute 1/(f*adj(f)+g*adj(g)) + * (also in FFT representation). Since the result is auto-adjoint, all its + * coordinates in FFT representation are real; as such, only the first N/2 + * values of d[] are filled (the imaginary parts are skipped). + * + * Array d MUST NOT overlap with either a or b. + */ +void PQCLEAN_FALCON1024_CLEAN_poly_invnorm2_fft(fpr *d, + const fpr *a, const fpr *b, unsigned logn); + +/* + * Given F, G, f and g (in FFT representation), compute F*adj(f)+G*adj(g) + * (also in FFT representation). Destination d MUST NOT overlap with + * any of the source arrays. + */ +void PQCLEAN_FALCON1024_CLEAN_poly_add_muladj_fft(fpr *d, + const fpr *F, const fpr *G, + const fpr *f, const fpr *g, unsigned logn); + +/* + * Multiply polynomial a by polynomial b, where b is autoadjoint. Both + * a and b are in FFT representation. Since b is autoadjoint, all its + * FFT coefficients are real, and the array b contains only N/2 elements. + * a and b MUST NOT overlap. + */ +void PQCLEAN_FALCON1024_CLEAN_poly_mul_autoadj_fft(fpr *a, + const fpr *b, unsigned logn); + +/* + * Divide polynomial a by polynomial b, where b is autoadjoint. Both + * a and b are in FFT representation. Since b is autoadjoint, all its + * FFT coefficients are real, and the array b contains only N/2 elements. + * a and b MUST NOT overlap. + */ +void PQCLEAN_FALCON1024_CLEAN_poly_div_autoadj_fft(fpr *a, + const fpr *b, unsigned logn); + +/* + * Perform an LDL decomposition of an auto-adjoint matrix G, in FFT + * representation. On input, g00, g01 and g11 are provided (where the + * matrix G = [[g00, g01], [adj(g01), g11]]). On output, the d00, l10 + * and d11 values are written in g00, g01 and g11, respectively + * (with D = [[d00, 0], [0, d11]] and L = [[1, 0], [l10, 1]]). + * (In fact, d00 = g00, so the g00 operand is left unmodified.) + */ +void PQCLEAN_FALCON1024_CLEAN_poly_LDL_fft(const fpr *g00, + fpr *g01, fpr *g11, unsigned logn); + +/* + * Perform an LDL decomposition of an auto-adjoint matrix G, in FFT + * representation. This is identical to poly_LDL_fft() except that + * g00, g01 and g11 are unmodified; the outputs d11 and l10 are written + * in two other separate buffers provided as extra parameters. + */ +void PQCLEAN_FALCON1024_CLEAN_poly_LDLmv_fft(fpr *d11, fpr *l10, + const fpr *g00, const fpr *g01, + const fpr *g11, unsigned logn); + +/* + * Apply "split" operation on a polynomial in FFT representation: + * f = f0(x^2) + x*f1(x^2), for half-size polynomials f0 and f1 + * (polynomials modulo X^(N/2)+1). f0, f1 and f MUST NOT overlap. + */ +void PQCLEAN_FALCON1024_CLEAN_poly_split_fft(fpr *f0, fpr *f1, + const fpr *f, unsigned logn); + +/* + * Apply "merge" operation on two polynomials in FFT representation: + * given f0 and f1, polynomials moduo X^(N/2)+1, this function computes + * f = f0(x^2) + x*f1(x^2), in FFT representation modulo X^N+1. + * f MUST NOT overlap with either f0 or f1. + */ +void PQCLEAN_FALCON1024_CLEAN_poly_merge_fft(fpr *f, + const fpr *f0, const fpr *f1, unsigned logn); + +/* ==================================================================== */ +/* + * Key pair generation. + */ + +/* + * Required sizes of the temporary buffer (in bytes). + * + * This size is 28*2^logn bytes, except for degrees 2 and 4 (logn = 1 + * or 2) where it is slightly greater. + */ +#define FALCON_KEYGEN_TEMP_1 136 +#define FALCON_KEYGEN_TEMP_2 272 +#define FALCON_KEYGEN_TEMP_3 224 +#define FALCON_KEYGEN_TEMP_4 448 +#define FALCON_KEYGEN_TEMP_5 896 +#define FALCON_KEYGEN_TEMP_6 1792 +#define FALCON_KEYGEN_TEMP_7 3584 +#define FALCON_KEYGEN_TEMP_8 7168 +#define FALCON_KEYGEN_TEMP_9 14336 +#define FALCON_KEYGEN_TEMP_10 28672 + +/* + * Generate a new key pair. Randomness is extracted from the provided + * SHAKE256 context, which must have already been seeded and flipped. + * The tmp[] array must have suitable size (see FALCON_KEYGEN_TEMP_* + * macros) and be aligned for the uint32_t, uint64_t and fpr types. + * + * The private key elements are written in f, g, F and G, and the + * public key is written in h. Either or both of G and h may be NULL, + * in which case the corresponding element is not returned (they can + * be recomputed from f, g and F). + * + * tmp[] must have 64-bit alignment. + * This function uses floating-point rounding (see set_fpu_cw()). + */ +void PQCLEAN_FALCON1024_CLEAN_keygen(inner_shake256_context *rng, + int8_t *f, int8_t *g, int8_t *F, int8_t *G, uint16_t *h, + unsigned logn, uint8_t *tmp); + +/* ==================================================================== */ +/* + * Signature generation. + */ + +/* + * Expand a private key into the B0 matrix in FFT representation and + * the LDL tree. All the values are written in 'expanded_key', for + * a total of (8*logn+40)*2^logn bytes. + * + * The tmp[] array must have room for at least 48*2^logn bytes. + * + * tmp[] must have 64-bit alignment. + * This function uses floating-point rounding (see set_fpu_cw()). + */ +void PQCLEAN_FALCON1024_CLEAN_expand_privkey(fpr *expanded_key, + const int8_t *f, const int8_t *g, const int8_t *F, const int8_t *G, + unsigned logn, uint8_t *tmp); + +/* + * Compute a signature over the provided hashed message (hm); the + * signature value is one short vector. This function uses an + * expanded key (as generated by PQCLEAN_FALCON1024_CLEAN_expand_privkey()). + * + * The sig[] and hm[] buffers may overlap. + * + * On successful output, the start of the tmp[] buffer contains the s1 + * vector (as int16_t elements). + * + * The minimal size (in bytes) of tmp[] is 48*2^logn bytes. + * + * tmp[] must have 64-bit alignment. + * This function uses floating-point rounding (see set_fpu_cw()). + */ +void PQCLEAN_FALCON1024_CLEAN_sign_tree(int16_t *sig, inner_shake256_context *rng, + const fpr *expanded_key, + const uint16_t *hm, unsigned logn, uint8_t *tmp); + +/* + * Compute a signature over the provided hashed message (hm); the + * signature value is one short vector. This function uses a raw + * key and dynamically recompute the B0 matrix and LDL tree; this + * saves RAM since there is no needed for an expanded key, but + * increases the signature cost. + * + * The sig[] and hm[] buffers may overlap. + * + * On successful output, the start of the tmp[] buffer contains the s1 + * vector (as int16_t elements). + * + * The minimal size (in bytes) of tmp[] is 72*2^logn bytes. + * + * tmp[] must have 64-bit alignment. + * This function uses floating-point rounding (see set_fpu_cw()). + */ +void PQCLEAN_FALCON1024_CLEAN_sign_dyn(int16_t *sig, inner_shake256_context *rng, + const int8_t *f, const int8_t *g, + const int8_t *F, const int8_t *G, + const uint16_t *hm, unsigned logn, uint8_t *tmp); + +/* + * Internal sampler engine. Exported for tests. + * + * sampler_context wraps around a source of random numbers (PRNG) and + * the sigma_min value (nominally dependent on the degree). + * + * sampler() takes as parameters: + * ctx pointer to the sampler_context structure + * mu center for the distribution + * isigma inverse of the distribution standard deviation + * It returns an integer sampled along the Gaussian distribution centered + * on mu and of standard deviation sigma = 1/isigma. + * + * gaussian0_sampler() takes as parameter a pointer to a PRNG, and + * returns an integer sampled along a half-Gaussian with standard + * deviation sigma0 = 1.8205 (center is 0, returned value is + * nonnegative). + */ + +typedef struct { + prng p; + fpr sigma_min; +} sampler_context; + +int PQCLEAN_FALCON1024_CLEAN_sampler(void *ctx, fpr mu, fpr isigma); + +int PQCLEAN_FALCON1024_CLEAN_gaussian0_sampler(prng *p); + +/* ==================================================================== */ + +#endif diff --git a/crypto_sign/falcon/falcon-1024/clean/keygen.c b/crypto_sign/falcon/falcon-1024/clean/keygen.c new file mode 100644 index 00000000..2d47412d --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/clean/keygen.c @@ -0,0 +1,4231 @@ +#include "inner.h" + +/* + * Falcon key pair generation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +#define MKN(logn) ((size_t)1 << (logn)) + +/* ==================================================================== */ +/* + * Modular arithmetics. + * + * We implement a few functions for computing modulo a small integer p. + * + * All functions require that 2^30 < p < 2^31. Moreover, operands must + * be in the 0..p-1 range. + * + * Modular addition and subtraction work for all such p. + * + * Montgomery multiplication requires that p is odd, and must be provided + * with an additional value p0i = -1/p mod 2^31. See below for some basics + * on Montgomery multiplication. + * + * Division computes an inverse modulo p by an exponentiation (with + * exponent p-2): this works only if p is prime. Multiplication + * requirements also apply, i.e. p must be odd and p0i must be provided. + * + * The NTT and inverse NTT need all of the above, and also that + * p = 1 mod 2048. + * + * ----------------------------------------------------------------------- + * + * We use Montgomery representation with 31-bit values: + * + * Let R = 2^31 mod p. When 2^30 < p < 2^31, R = 2^31 - p. + * Montgomery representation of an integer x modulo p is x*R mod p. + * + * Montgomery multiplication computes (x*y)/R mod p for + * operands x and y. Therefore: + * + * - if operands are x*R and y*R (Montgomery representations of x and + * y), then Montgomery multiplication computes (x*R*y*R)/R = (x*y)*R + * mod p, which is the Montgomery representation of the product x*y; + * + * - if operands are x*R and y (or x and y*R), then Montgomery + * multiplication returns x*y mod p: mixed-representation + * multiplications yield results in normal representation. + * + * To convert to Montgomery representation, we multiply by R, which is done + * by Montgomery-multiplying by R^2. Stand-alone conversion back from + * Montgomery representation is Montgomery-multiplication by 1. + */ + +/* + * Precomputed small primes. Each element contains the following: + * + * p The prime itself. + * + * g A primitive root of phi = X^N+1 (in field Z_p). + * + * s The inverse of the product of all previous primes in the array, + * computed modulo p and in Montgomery representation. + * + * All primes are such that p = 1 mod 2048, and are lower than 2^31. They + * are listed in decreasing order. + */ + +typedef struct { + uint32_t p; + uint32_t g; + uint32_t s; +} small_prime; + +static const small_prime PRIMES[] = { + { 2147473409, 383167813, 10239 }, + { 2147389441, 211808905, 471403745 }, + { 2147387393, 37672282, 1329335065 }, + { 2147377153, 1977035326, 968223422 }, + { 2147358721, 1067163706, 132460015 }, + { 2147352577, 1606082042, 598693809 }, + { 2147346433, 2033915641, 1056257184 }, + { 2147338241, 1653770625, 421286710 }, + { 2147309569, 631200819, 1111201074 }, + { 2147297281, 2038364663, 1042003613 }, + { 2147295233, 1962540515, 19440033 }, + { 2147239937, 2100082663, 353296760 }, + { 2147235841, 1991153006, 1703918027 }, + { 2147217409, 516405114, 1258919613 }, + { 2147205121, 409347988, 1089726929 }, + { 2147196929, 927788991, 1946238668 }, + { 2147178497, 1136922411, 1347028164 }, + { 2147100673, 868626236, 701164723 }, + { 2147082241, 1897279176, 617820870 }, + { 2147074049, 1888819123, 158382189 }, + { 2147051521, 25006327, 522758543 }, + { 2147043329, 327546255, 37227845 }, + { 2147039233, 766324424, 1133356428 }, + { 2146988033, 1862817362, 73861329 }, + { 2146963457, 404622040, 653019435 }, + { 2146959361, 1936581214, 995143093 }, + { 2146938881, 1559770096, 634921513 }, + { 2146908161, 422623708, 1985060172 }, + { 2146885633, 1751189170, 298238186 }, + { 2146871297, 578919515, 291810829 }, + { 2146846721, 1114060353, 915902322 }, + { 2146834433, 2069565474, 47859524 }, + { 2146818049, 1552824584, 646281055 }, + { 2146775041, 1906267847, 1597832891 }, + { 2146756609, 1847414714, 1228090888 }, + { 2146744321, 1818792070, 1176377637 }, + { 2146738177, 1118066398, 1054971214 }, + { 2146736129, 52057278, 933422153 }, + { 2146713601, 592259376, 1406621510 }, + { 2146695169, 263161877, 1514178701 }, + { 2146656257, 685363115, 384505091 }, + { 2146650113, 927727032, 537575289 }, + { 2146646017, 52575506, 1799464037 }, + { 2146643969, 1276803876, 1348954416 }, + { 2146603009, 814028633, 1521547704 }, + { 2146572289, 1846678872, 1310832121 }, + { 2146547713, 919368090, 1019041349 }, + { 2146508801, 671847612, 38582496 }, + { 2146492417, 283911680, 532424562 }, + { 2146490369, 1780044827, 896447978 }, + { 2146459649, 327980850, 1327906900 }, + { 2146447361, 1310561493, 958645253 }, + { 2146441217, 412148926, 287271128 }, + { 2146437121, 293186449, 2009822534 }, + { 2146430977, 179034356, 1359155584 }, + { 2146418689, 1517345488, 1790248672 }, + { 2146406401, 1615820390, 1584833571 }, + { 2146404353, 826651445, 607120498 }, + { 2146379777, 3816988, 1897049071 }, + { 2146363393, 1221409784, 1986921567 }, + { 2146355201, 1388081168, 849968120 }, + { 2146336769, 1803473237, 1655544036 }, + { 2146312193, 1023484977, 273671831 }, + { 2146293761, 1074591448, 467406983 }, + { 2146283521, 831604668, 1523950494 }, + { 2146203649, 712865423, 1170834574 }, + { 2146154497, 1764991362, 1064856763 }, + { 2146142209, 627386213, 1406840151 }, + { 2146127873, 1638674429, 2088393537 }, + { 2146099201, 1516001018, 690673370 }, + { 2146093057, 1294931393, 315136610 }, + { 2146091009, 1942399533, 973539425 }, + { 2146078721, 1843461814, 2132275436 }, + { 2146060289, 1098740778, 360423481 }, + { 2146048001, 1617213232, 1951981294 }, + { 2146041857, 1805783169, 2075683489 }, + { 2146019329, 272027909, 1753219918 }, + { 2145986561, 1206530344, 2034028118 }, + { 2145976321, 1243769360, 1173377644 }, + { 2145964033, 887200839, 1281344586 }, + { 2145906689, 1651026455, 906178216 }, + { 2145875969, 1673238256, 1043521212 }, + { 2145871873, 1226591210, 1399796492 }, + { 2145841153, 1465353397, 1324527802 }, + { 2145832961, 1150638905, 554084759 }, + { 2145816577, 221601706, 427340863 }, + { 2145785857, 608896761, 316590738 }, + { 2145755137, 1712054942, 1684294304 }, + { 2145742849, 1302302867, 724873116 }, + { 2145728513, 516717693, 431671476 }, + { 2145699841, 524575579, 1619722537 }, + { 2145691649, 1925625239, 982974435 }, + { 2145687553, 463795662, 1293154300 }, + { 2145673217, 771716636, 881778029 }, + { 2145630209, 1509556977, 837364988 }, + { 2145595393, 229091856, 851648427 }, + { 2145587201, 1796903241, 635342424 }, + { 2145525761, 715310882, 1677228081 }, + { 2145495041, 1040930522, 200685896 }, + { 2145466369, 949804237, 1809146322 }, + { 2145445889, 1673903706, 95316881 }, + { 2145390593, 806941852, 1428671135 }, + { 2145372161, 1402525292, 159350694 }, + { 2145361921, 2124760298, 1589134749 }, + { 2145359873, 1217503067, 1561543010 }, + { 2145355777, 338341402, 83865711 }, + { 2145343489, 1381532164, 641430002 }, + { 2145325057, 1883895478, 1528469895 }, + { 2145318913, 1335370424, 65809740 }, + { 2145312769, 2000008042, 1919775760 }, + { 2145300481, 961450962, 1229540578 }, + { 2145282049, 910466767, 1964062701 }, + { 2145232897, 816527501, 450152063 }, + { 2145218561, 1435128058, 1794509700 }, + { 2145187841, 33505311, 1272467582 }, + { 2145181697, 269767433, 1380363849 }, + { 2145175553, 56386299, 1316870546 }, + { 2145079297, 2106880293, 1391797340 }, + { 2145021953, 1347906152, 720510798 }, + { 2145015809, 206769262, 1651459955 }, + { 2145003521, 1885513236, 1393381284 }, + { 2144960513, 1810381315, 31937275 }, + { 2144944129, 1306487838, 2019419520 }, + { 2144935937, 37304730, 1841489054 }, + { 2144894977, 1601434616, 157985831 }, + { 2144888833, 98749330, 2128592228 }, + { 2144880641, 1772327002, 2076128344 }, + { 2144864257, 1404514762, 2029969964 }, + { 2144827393, 801236594, 406627220 }, + { 2144806913, 349217443, 1501080290 }, + { 2144796673, 1542656776, 2084736519 }, + { 2144778241, 1210734884, 1746416203 }, + { 2144759809, 1146598851, 716464489 }, + { 2144757761, 286328400, 1823728177 }, + { 2144729089, 1347555695, 1836644881 }, + { 2144727041, 1795703790, 520296412 }, + { 2144696321, 1302475157, 852964281 }, + { 2144667649, 1075877614, 504992927 }, + { 2144573441, 198765808, 1617144982 }, + { 2144555009, 321528767, 155821259 }, + { 2144550913, 814139516, 1819937644 }, + { 2144536577, 571143206, 962942255 }, + { 2144524289, 1746733766, 2471321 }, + { 2144512001, 1821415077, 124190939 }, + { 2144468993, 917871546, 1260072806 }, + { 2144458753, 378417981, 1569240563 }, + { 2144421889, 175229668, 1825620763 }, + { 2144409601, 1699216963, 351648117 }, + { 2144370689, 1071885991, 958186029 }, + { 2144348161, 1763151227, 540353574 }, + { 2144335873, 1060214804, 919598847 }, + { 2144329729, 663515846, 1448552668 }, + { 2144327681, 1057776305, 590222840 }, + { 2144309249, 1705149168, 1459294624 }, + { 2144296961, 325823721, 1649016934 }, + { 2144290817, 738775789, 447427206 }, + { 2144243713, 962347618, 893050215 }, + { 2144237569, 1655257077, 900860862 }, + { 2144161793, 242206694, 1567868672 }, + { 2144155649, 769415308, 1247993134 }, + { 2144137217, 320492023, 515841070 }, + { 2144120833, 1639388522, 770877302 }, + { 2144071681, 1761785233, 964296120 }, + { 2144065537, 419817825, 204564472 }, + { 2144028673, 666050597, 2091019760 }, + { 2144010241, 1413657615, 1518702610 }, + { 2143952897, 1238327946, 475672271 }, + { 2143940609, 307063413, 1176750846 }, + { 2143918081, 2062905559, 786785803 }, + { 2143899649, 1338112849, 1562292083 }, + { 2143891457, 68149545, 87166451 }, + { 2143885313, 921750778, 394460854 }, + { 2143854593, 719766593, 133877196 }, + { 2143836161, 1149399850, 1861591875 }, + { 2143762433, 1848739366, 1335934145 }, + { 2143756289, 1326674710, 102999236 }, + { 2143713281, 808061791, 1156900308 }, + { 2143690753, 388399459, 1926468019 }, + { 2143670273, 1427891374, 1756689401 }, + { 2143666177, 1912173949, 986629565 }, + { 2143645697, 2041160111, 371842865 }, + { 2143641601, 1279906897, 2023974350 }, + { 2143635457, 720473174, 1389027526 }, + { 2143621121, 1298309455, 1732632006 }, + { 2143598593, 1548762216, 1825417506 }, + { 2143567873, 620475784, 1073787233 }, + { 2143561729, 1932954575, 949167309 }, + { 2143553537, 354315656, 1652037534 }, + { 2143541249, 577424288, 1097027618 }, + { 2143531009, 357862822, 478640055 }, + { 2143522817, 2017706025, 1550531668 }, + { 2143506433, 2078127419, 1824320165 }, + { 2143488001, 613475285, 1604011510 }, + { 2143469569, 1466594987, 502095196 }, + { 2143426561, 1115430331, 1044637111 }, + { 2143383553, 9778045, 1902463734 }, + { 2143377409, 1557401276, 2056861771 }, + { 2143363073, 652036455, 1965915971 }, + { 2143260673, 1464581171, 1523257541 }, + { 2143246337, 1876119649, 764541916 }, + { 2143209473, 1614992673, 1920672844 }, + { 2143203329, 981052047, 2049774209 }, + { 2143160321, 1847355533, 728535665 }, + { 2143129601, 965558457, 603052992 }, + { 2143123457, 2140817191, 8348679 }, + { 2143100929, 1547263683, 694209023 }, + { 2143092737, 643459066, 1979934533 }, + { 2143082497, 188603778, 2026175670 }, + { 2143062017, 1657329695, 377451099 }, + { 2143051777, 114967950, 979255473 }, + { 2143025153, 1698431342, 1449196896 }, + { 2143006721, 1862741675, 1739650365 }, + { 2142996481, 756660457, 996160050 }, + { 2142976001, 927864010, 1166847574 }, + { 2142965761, 905070557, 661974566 }, + { 2142916609, 40932754, 1787161127 }, + { 2142892033, 1987985648, 675335382 }, + { 2142885889, 797497211, 1323096997 }, + { 2142871553, 2068025830, 1411877159 }, + { 2142861313, 1217177090, 1438410687 }, + { 2142830593, 409906375, 1767860634 }, + { 2142803969, 1197788993, 359782919 }, + { 2142785537, 643817365, 513932862 }, + { 2142779393, 1717046338, 218943121 }, + { 2142724097, 89336830, 416687049 }, + { 2142707713, 5944581, 1356813523 }, + { 2142658561, 887942135, 2074011722 }, + { 2142638081, 151851972, 1647339939 }, + { 2142564353, 1691505537, 1483107336 }, + { 2142533633, 1989920200, 1135938817 }, + { 2142529537, 959263126, 1531961857 }, + { 2142527489, 453251129, 1725566162 }, + { 2142502913, 1536028102, 182053257 }, + { 2142498817, 570138730, 701443447 }, + { 2142416897, 326965800, 411931819 }, + { 2142363649, 1675665410, 1517191733 }, + { 2142351361, 968529566, 1575712703 }, + { 2142330881, 1384953238, 1769087884 }, + { 2142314497, 1977173242, 1833745524 }, + { 2142289921, 95082313, 1714775493 }, + { 2142283777, 109377615, 1070584533 }, + { 2142277633, 16960510, 702157145 }, + { 2142263297, 553850819, 431364395 }, + { 2142208001, 241466367, 2053967982 }, + { 2142164993, 1795661326, 1031836848 }, + { 2142097409, 1212530046, 712772031 }, + { 2142087169, 1763869720, 822276067 }, + { 2142078977, 644065713, 1765268066 }, + { 2142074881, 112671944, 643204925 }, + { 2142044161, 1387785471, 1297890174 }, + { 2142025729, 783885537, 1000425730 }, + { 2142011393, 905662232, 1679401033 }, + { 2141974529, 799788433, 468119557 }, + { 2141943809, 1932544124, 449305555 }, + { 2141933569, 1527403256, 841867925 }, + { 2141931521, 1247076451, 743823916 }, + { 2141902849, 1199660531, 401687910 }, + { 2141890561, 150132350, 1720336972 }, + { 2141857793, 1287438162, 663880489 }, + { 2141833217, 618017731, 1819208266 }, + { 2141820929, 999578638, 1403090096 }, + { 2141786113, 81834325, 1523542501 }, + { 2141771777, 120001928, 463556492 }, + { 2141759489, 122455485, 2124928282 }, + { 2141749249, 141986041, 940339153 }, + { 2141685761, 889088734, 477141499 }, + { 2141673473, 324212681, 1122558298 }, + { 2141669377, 1175806187, 1373818177 }, + { 2141655041, 1113654822, 296887082 }, + { 2141587457, 991103258, 1585913875 }, + { 2141583361, 1401451409, 1802457360 }, + { 2141575169, 1571977166, 712760980 }, + { 2141546497, 1107849376, 1250270109 }, + { 2141515777, 196544219, 356001130 }, + { 2141495297, 1733571506, 1060744866 }, + { 2141483009, 321552363, 1168297026 }, + { 2141458433, 505818251, 733225819 }, + { 2141360129, 1026840098, 948342276 }, + { 2141325313, 945133744, 2129965998 }, + { 2141317121, 1871100260, 1843844634 }, + { 2141286401, 1790639498, 1750465696 }, + { 2141267969, 1376858592, 186160720 }, + { 2141255681, 2129698296, 1876677959 }, + { 2141243393, 2138900688, 1340009628 }, + { 2141214721, 1933049835, 1087819477 }, + { 2141212673, 1898664939, 1786328049 }, + { 2141202433, 990234828, 940682169 }, + { 2141175809, 1406392421, 993089586 }, + { 2141165569, 1263518371, 289019479 }, + { 2141073409, 1485624211, 507864514 }, + { 2141052929, 1885134788, 311252465 }, + { 2141040641, 1285021247, 280941862 }, + { 2141028353, 1527610374, 375035110 }, + { 2141011969, 1400626168, 164696620 }, + { 2140999681, 632959608, 966175067 }, + { 2140997633, 2045628978, 1290889438 }, + { 2140993537, 1412755491, 375366253 }, + { 2140942337, 719477232, 785367828 }, + { 2140925953, 45224252, 836552317 }, + { 2140917761, 1157376588, 1001839569 }, + { 2140887041, 278480752, 2098732796 }, + { 2140837889, 1663139953, 924094810 }, + { 2140788737, 802501511, 2045368990 }, + { 2140766209, 1820083885, 1800295504 }, + { 2140764161, 1169561905, 2106792035 }, + { 2140696577, 127781498, 1885987531 }, + { 2140684289, 16014477, 1098116827 }, + { 2140653569, 665960598, 1796728247 }, + { 2140594177, 1043085491, 377310938 }, + { 2140579841, 1732838211, 1504505945 }, + { 2140569601, 302071939, 358291016 }, + { 2140567553, 192393733, 1909137143 }, + { 2140557313, 406595731, 1175330270 }, + { 2140549121, 1748850918, 525007007 }, + { 2140477441, 499436566, 1031159814 }, + { 2140469249, 1886004401, 1029951320 }, + { 2140426241, 1483168100, 1676273461 }, + { 2140420097, 1779917297, 846024476 }, + { 2140413953, 522948893, 1816354149 }, + { 2140383233, 1931364473, 1296921241 }, + { 2140366849, 1917356555, 147196204 }, + { 2140354561, 16466177, 1349052107 }, + { 2140348417, 1875366972, 1860485634 }, + { 2140323841, 456498717, 1790256483 }, + { 2140321793, 1629493973, 150031888 }, + { 2140315649, 1904063898, 395510935 }, + { 2140280833, 1784104328, 831417909 }, + { 2140250113, 256087139, 697349101 }, + { 2140229633, 388553070, 243875754 }, + { 2140223489, 747459608, 1396270850 }, + { 2140200961, 507423743, 1895572209 }, + { 2140162049, 580106016, 2045297469 }, + { 2140149761, 712426444, 785217995 }, + { 2140137473, 1441607584, 536866543 }, + { 2140119041, 346538902, 1740434653 }, + { 2140090369, 282642885, 21051094 }, + { 2140076033, 1407456228, 319910029 }, + { 2140047361, 1619330500, 1488632070 }, + { 2140041217, 2089408064, 2012026134 }, + { 2140008449, 1705524800, 1613440760 }, + { 2139924481, 1846208233, 1280649481 }, + { 2139906049, 989438755, 1185646076 }, + { 2139867137, 1522314850, 372783595 }, + { 2139842561, 1681587377, 216848235 }, + { 2139826177, 2066284988, 1784999464 }, + { 2139824129, 480888214, 1513323027 }, + { 2139789313, 847937200, 858192859 }, + { 2139783169, 1642000434, 1583261448 }, + { 2139770881, 940699589, 179702100 }, + { 2139768833, 315623242, 964612676 }, + { 2139666433, 331649203, 764666914 }, + { 2139641857, 2118730799, 1313764644 }, + { 2139635713, 519149027, 519212449 }, + { 2139598849, 1526413634, 1769667104 }, + { 2139574273, 551148610, 820739925 }, + { 2139568129, 1386800242, 472447405 }, + { 2139549697, 813760130, 1412328531 }, + { 2139537409, 1615286260, 1609362979 }, + { 2139475969, 1352559299, 1696720421 }, + { 2139455489, 1048691649, 1584935400 }, + { 2139432961, 836025845, 950121150 }, + { 2139424769, 1558281165, 1635486858 }, + { 2139406337, 1728402143, 1674423301 }, + { 2139396097, 1727715782, 1483470544 }, + { 2139383809, 1092853491, 1741699084 }, + { 2139369473, 690776899, 1242798709 }, + { 2139351041, 1768782380, 2120712049 }, + { 2139334657, 1739968247, 1427249225 }, + { 2139332609, 1547189119, 623011170 }, + { 2139310081, 1346827917, 1605466350 }, + { 2139303937, 369317948, 828392831 }, + { 2139301889, 1560417239, 1788073219 }, + { 2139283457, 1303121623, 595079358 }, + { 2139248641, 1354555286, 573424177 }, + { 2139240449, 60974056, 885781403 }, + { 2139222017, 355573421, 1221054839 }, + { 2139215873, 566477826, 1724006500 }, + { 2139150337, 871437673, 1609133294 }, + { 2139144193, 1478130914, 1137491905 }, + { 2139117569, 1854880922, 964728507 }, + { 2139076609, 202405335, 756508944 }, + { 2139062273, 1399715741, 884826059 }, + { 2139045889, 1051045798, 1202295476 }, + { 2139033601, 1707715206, 632234634 }, + { 2139006977, 2035853139, 231626690 }, + { 2138951681, 183867876, 838350879 }, + { 2138945537, 1403254661, 404460202 }, + { 2138920961, 310865011, 1282911681 }, + { 2138910721, 1328496553, 103472415 }, + { 2138904577, 78831681, 993513549 }, + { 2138902529, 1319697451, 1055904361 }, + { 2138816513, 384338872, 1706202469 }, + { 2138810369, 1084868275, 405677177 }, + { 2138787841, 401181788, 1964773901 }, + { 2138775553, 1850532988, 1247087473 }, + { 2138767361, 874261901, 1576073565 }, + { 2138757121, 1187474742, 993541415 }, + { 2138748929, 1782458888, 1043206483 }, + { 2138744833, 1221500487, 800141243 }, + { 2138738689, 413465368, 1450660558 }, + { 2138695681, 739045140, 342611472 }, + { 2138658817, 1355845756, 672674190 }, + { 2138644481, 608379162, 1538874380 }, + { 2138632193, 1444914034, 686911254 }, + { 2138607617, 484707818, 1435142134 }, + { 2138591233, 539460669, 1290458549 }, + { 2138572801, 2093538990, 2011138646 }, + { 2138552321, 1149786988, 1076414907 }, + { 2138546177, 840688206, 2108985273 }, + { 2138533889, 209669619, 198172413 }, + { 2138523649, 1975879426, 1277003968 }, + { 2138490881, 1351891144, 1976858109 }, + { 2138460161, 1817321013, 1979278293 }, + { 2138429441, 1950077177, 203441928 }, + { 2138400769, 908970113, 628395069 }, + { 2138398721, 219890864, 758486760 }, + { 2138376193, 1306654379, 977554090 }, + { 2138351617, 298822498, 2004708503 }, + { 2138337281, 441457816, 1049002108 }, + { 2138320897, 1517731724, 1442269609 }, + { 2138290177, 1355911197, 1647139103 }, + { 2138234881, 531313247, 1746591962 }, + { 2138214401, 1899410930, 781416444 }, + { 2138202113, 1813477173, 1622508515 }, + { 2138191873, 1086458299, 1025408615 }, + { 2138183681, 1998800427, 827063290 }, + { 2138173441, 1921308898, 749670117 }, + { 2138103809, 1620902804, 2126787647 }, + { 2138099713, 828647069, 1892961817 }, + { 2138085377, 179405355, 1525506535 }, + { 2138060801, 615683235, 1259580138 }, + { 2138044417, 2030277840, 1731266562 }, + { 2138042369, 2087222316, 1627902259 }, + { 2138032129, 126388712, 1108640984 }, + { 2138011649, 715026550, 1017980050 }, + { 2137993217, 1693714349, 1351778704 }, + { 2137888769, 1289762259, 1053090405 }, + { 2137853953, 199991890, 1254192789 }, + { 2137833473, 941421685, 896995556 }, + { 2137817089, 750416446, 1251031181 }, + { 2137792513, 798075119, 368077456 }, + { 2137786369, 878543495, 1035375025 }, + { 2137767937, 9351178, 1156563902 }, + { 2137755649, 1382297614, 1686559583 }, + { 2137724929, 1345472850, 1681096331 }, + { 2137704449, 834666929, 630551727 }, + { 2137673729, 1646165729, 1892091571 }, + { 2137620481, 778943821, 48456461 }, + { 2137618433, 1730837875, 1713336725 }, + { 2137581569, 805610339, 1378891359 }, + { 2137538561, 204342388, 1950165220 }, + { 2137526273, 1947629754, 1500789441 }, + { 2137516033, 719902645, 1499525372 }, + { 2137491457, 230451261, 556382829 }, + { 2137440257, 979573541, 412760291 }, + { 2137374721, 927841248, 1954137185 }, + { 2137362433, 1243778559, 861024672 }, + { 2137313281, 1341338501, 980638386 }, + { 2137311233, 937415182, 1793212117 }, + { 2137255937, 795331324, 1410253405 }, + { 2137243649, 150756339, 1966999887 }, + { 2137182209, 163346914, 1939301431 }, + { 2137171969, 1952552395, 758913141 }, + { 2137159681, 570788721, 218668666 }, + { 2137147393, 1896656810, 2045670345 }, + { 2137141249, 358493842, 518199643 }, + { 2137139201, 1505023029, 674695848 }, + { 2137133057, 27911103, 830956306 }, + { 2137122817, 439771337, 1555268614 }, + { 2137116673, 790988579, 1871449599 }, + { 2137110529, 432109234, 811805080 }, + { 2137102337, 1357900653, 1184997641 }, + { 2137098241, 515119035, 1715693095 }, + { 2137090049, 408575203, 2085660657 }, + { 2137085953, 2097793407, 1349626963 }, + { 2137055233, 1556739954, 1449960883 }, + { 2137030657, 1545758650, 1369303716 }, + { 2136987649, 332602570, 103875114 }, + { 2136969217, 1499989506, 1662964115 }, + { 2136924161, 857040753, 4738842 }, + { 2136895489, 1948872712, 570436091 }, + { 2136893441, 58969960, 1568349634 }, + { 2136887297, 2127193379, 273612548 }, + { 2136850433, 111208983, 1181257116 }, + { 2136809473, 1627275942, 1680317971 }, + { 2136764417, 1574888217, 14011331 }, + { 2136741889, 14011055, 1129154251 }, + { 2136727553, 35862563, 1838555253 }, + { 2136721409, 310235666, 1363928244 }, + { 2136698881, 1612429202, 1560383828 }, + { 2136649729, 1138540131, 800014364 }, + { 2136606721, 602323503, 1433096652 }, + { 2136563713, 182209265, 1919611038 }, + { 2136555521, 324156477, 165591039 }, + { 2136549377, 195513113, 217165345 }, + { 2136526849, 1050768046, 939647887 }, + { 2136508417, 1886286237, 1619926572 }, + { 2136477697, 609647664, 35065157 }, + { 2136471553, 679352216, 1452259468 }, + { 2136457217, 128630031, 824816521 }, + { 2136422401, 19787464, 1526049830 }, + { 2136420353, 698316836, 1530623527 }, + { 2136371201, 1651862373, 1804812805 }, + { 2136334337, 326596005, 336977082 }, + { 2136322049, 63253370, 1904972151 }, + { 2136297473, 312176076, 172182411 }, + { 2136248321, 381261841, 369032670 }, + { 2136242177, 358688773, 1640007994 }, + { 2136229889, 512677188, 75585225 }, + { 2136219649, 2095003250, 1970086149 }, + { 2136207361, 1909650722, 537760675 }, + { 2136176641, 1334616195, 1533487619 }, + { 2136158209, 2096285632, 1793285210 }, + { 2136143873, 1897347517, 293843959 }, + { 2136133633, 923586222, 1022655978 }, + { 2136096769, 1464868191, 1515074410 }, + { 2136094721, 2020679520, 2061636104 }, + { 2136076289, 290798503, 1814726809 }, + { 2136041473, 156415894, 1250757633 }, + { 2135996417, 297459940, 1132158924 }, + { 2135955457, 538755304, 1688831340 }, + { 0, 0, 0 } +}; + +/* + * Reduce a small signed integer modulo a small prime. The source + * value x MUST be such that -p < x < p. + */ +static inline uint32_t +modp_set(int32_t x, uint32_t p) { + uint32_t w; + + w = (uint32_t)x; + w += p & -(w >> 31); + return w; +} + +/* + * Normalize a modular integer around 0. + */ +static inline int32_t +modp_norm(uint32_t x, uint32_t p) { + return (int32_t)(x - (p & (((x - ((p + 1) >> 1)) >> 31) - 1))); +} + +/* + * Compute -1/p mod 2^31. This works for all odd integers p that fit + * on 31 bits. + */ +static uint32_t +modp_ninv31(uint32_t p) { + uint32_t y; + + y = 2 - p; + y *= 2 - p * y; + y *= 2 - p * y; + y *= 2 - p * y; + y *= 2 - p * y; + return (uint32_t)0x7FFFFFFF & -y; +} + +/* + * Compute R = 2^31 mod p. + */ +static inline uint32_t +modp_R(uint32_t p) { + /* + * Since 2^30 < p < 2^31, we know that 2^31 mod p is simply + * 2^31 - p. + */ + return ((uint32_t)1 << 31) - p; +} + +/* + * Addition modulo p. + */ +static inline uint32_t +modp_add(uint32_t a, uint32_t b, uint32_t p) { + uint32_t d; + + d = a + b - p; + d += p & -(d >> 31); + return d; +} + +/* + * Subtraction modulo p. + */ +static inline uint32_t +modp_sub(uint32_t a, uint32_t b, uint32_t p) { + uint32_t d; + + d = a - b; + d += p & -(d >> 31); + return d; +} + +/* + * Halving modulo p. + */ +/* unused +static inline uint32_t +modp_half(uint32_t a, uint32_t p) +{ + a += p & -(a & 1); + return a >> 1; +} +*/ + +/* + * Montgomery multiplication modulo p. The 'p0i' value is -1/p mod 2^31. + * It is required that p is an odd integer. + */ +static inline uint32_t +modp_montymul(uint32_t a, uint32_t b, uint32_t p, uint32_t p0i) { + uint64_t z, w; + uint32_t d; + + z = (uint64_t)a * (uint64_t)b; + w = ((z * p0i) & (uint64_t)0x7FFFFFFF) * p; + d = (uint32_t)((z + w) >> 31) - p; + d += p & -(d >> 31); + return d; +} + +/* + * Compute R2 = 2^62 mod p. + */ +static uint32_t +modp_R2(uint32_t p, uint32_t p0i) { + uint32_t z; + + /* + * Compute z = 2^31 mod p (this is the value 1 in Montgomery + * representation), then double it with an addition. + */ + z = modp_R(p); + z = modp_add(z, z, p); + + /* + * Square it five times to obtain 2^32 in Montgomery representation + * (i.e. 2^63 mod p). + */ + z = modp_montymul(z, z, p, p0i); + z = modp_montymul(z, z, p, p0i); + z = modp_montymul(z, z, p, p0i); + z = modp_montymul(z, z, p, p0i); + z = modp_montymul(z, z, p, p0i); + + /* + * Halve the value mod p to get 2^62. + */ + z = (z + (p & -(z & 1))) >> 1; + return z; +} + +/* + * Compute 2^(31*x) modulo p. This works for integers x up to 2^11. + * p must be prime such that 2^30 < p < 2^31; p0i must be equal to + * -1/p mod 2^31; R2 must be equal to 2^62 mod p. + */ +static inline uint32_t +modp_Rx(unsigned x, uint32_t p, uint32_t p0i, uint32_t R2) { + int i; + uint32_t r, z; + + /* + * 2^(31*x) = (2^31)*(2^(31*(x-1))); i.e. we want the Montgomery + * representation of (2^31)^e mod p, where e = x-1. + * R2 is 2^31 in Montgomery representation. + */ + x --; + r = R2; + z = modp_R(p); + for (i = 0; (1U << i) <= x; i ++) { + if ((x & (1U << i)) != 0) { + z = modp_montymul(z, r, p, p0i); + } + r = modp_montymul(r, r, p, p0i); + } + return z; +} + +/* + * Division modulo p. If the divisor (b) is 0, then 0 is returned. + * This function computes proper results only when p is prime. + * Parameters: + * a dividend + * b divisor + * p odd prime modulus + * p0i -1/p mod 2^31 + * R 2^31 mod R + */ +static uint32_t +modp_div(uint32_t a, uint32_t b, uint32_t p, uint32_t p0i, uint32_t R) { + uint32_t z, e; + int i; + + e = p - 2; + z = R; + for (i = 30; i >= 0; i --) { + uint32_t z2; + + z = modp_montymul(z, z, p, p0i); + z2 = modp_montymul(z, b, p, p0i); + z ^= (z ^ z2) & -(uint32_t)((e >> i) & 1); + } + + /* + * The loop above just assumed that b was in Montgomery + * representation, i.e. really contained b*R; under that + * assumption, it returns 1/b in Montgomery representation, + * which is R/b. But we gave it b in normal representation, + * so the loop really returned R/(b/R) = R^2/b. + * + * We want a/b, so we need one Montgomery multiplication with a, + * which also remove one of the R factors, and another such + * multiplication to remove the second R factor. + */ + z = modp_montymul(z, 1, p, p0i); + return modp_montymul(a, z, p, p0i); +} + +/* + * Bit-reversal index table. + */ +static const uint16_t REV10[] = { + 0, 512, 256, 768, 128, 640, 384, 896, 64, 576, 320, 832, + 192, 704, 448, 960, 32, 544, 288, 800, 160, 672, 416, 928, + 96, 608, 352, 864, 224, 736, 480, 992, 16, 528, 272, 784, + 144, 656, 400, 912, 80, 592, 336, 848, 208, 720, 464, 976, + 48, 560, 304, 816, 176, 688, 432, 944, 112, 624, 368, 880, + 240, 752, 496, 1008, 8, 520, 264, 776, 136, 648, 392, 904, + 72, 584, 328, 840, 200, 712, 456, 968, 40, 552, 296, 808, + 168, 680, 424, 936, 104, 616, 360, 872, 232, 744, 488, 1000, + 24, 536, 280, 792, 152, 664, 408, 920, 88, 600, 344, 856, + 216, 728, 472, 984, 56, 568, 312, 824, 184, 696, 440, 952, + 120, 632, 376, 888, 248, 760, 504, 1016, 4, 516, 260, 772, + 132, 644, 388, 900, 68, 580, 324, 836, 196, 708, 452, 964, + 36, 548, 292, 804, 164, 676, 420, 932, 100, 612, 356, 868, + 228, 740, 484, 996, 20, 532, 276, 788, 148, 660, 404, 916, + 84, 596, 340, 852, 212, 724, 468, 980, 52, 564, 308, 820, + 180, 692, 436, 948, 116, 628, 372, 884, 244, 756, 500, 1012, + 12, 524, 268, 780, 140, 652, 396, 908, 76, 588, 332, 844, + 204, 716, 460, 972, 44, 556, 300, 812, 172, 684, 428, 940, + 108, 620, 364, 876, 236, 748, 492, 1004, 28, 540, 284, 796, + 156, 668, 412, 924, 92, 604, 348, 860, 220, 732, 476, 988, + 60, 572, 316, 828, 188, 700, 444, 956, 124, 636, 380, 892, + 252, 764, 508, 1020, 2, 514, 258, 770, 130, 642, 386, 898, + 66, 578, 322, 834, 194, 706, 450, 962, 34, 546, 290, 802, + 162, 674, 418, 930, 98, 610, 354, 866, 226, 738, 482, 994, + 18, 530, 274, 786, 146, 658, 402, 914, 82, 594, 338, 850, + 210, 722, 466, 978, 50, 562, 306, 818, 178, 690, 434, 946, + 114, 626, 370, 882, 242, 754, 498, 1010, 10, 522, 266, 778, + 138, 650, 394, 906, 74, 586, 330, 842, 202, 714, 458, 970, + 42, 554, 298, 810, 170, 682, 426, 938, 106, 618, 362, 874, + 234, 746, 490, 1002, 26, 538, 282, 794, 154, 666, 410, 922, + 90, 602, 346, 858, 218, 730, 474, 986, 58, 570, 314, 826, + 186, 698, 442, 954, 122, 634, 378, 890, 250, 762, 506, 1018, + 6, 518, 262, 774, 134, 646, 390, 902, 70, 582, 326, 838, + 198, 710, 454, 966, 38, 550, 294, 806, 166, 678, 422, 934, + 102, 614, 358, 870, 230, 742, 486, 998, 22, 534, 278, 790, + 150, 662, 406, 918, 86, 598, 342, 854, 214, 726, 470, 982, + 54, 566, 310, 822, 182, 694, 438, 950, 118, 630, 374, 886, + 246, 758, 502, 1014, 14, 526, 270, 782, 142, 654, 398, 910, + 78, 590, 334, 846, 206, 718, 462, 974, 46, 558, 302, 814, + 174, 686, 430, 942, 110, 622, 366, 878, 238, 750, 494, 1006, + 30, 542, 286, 798, 158, 670, 414, 926, 94, 606, 350, 862, + 222, 734, 478, 990, 62, 574, 318, 830, 190, 702, 446, 958, + 126, 638, 382, 894, 254, 766, 510, 1022, 1, 513, 257, 769, + 129, 641, 385, 897, 65, 577, 321, 833, 193, 705, 449, 961, + 33, 545, 289, 801, 161, 673, 417, 929, 97, 609, 353, 865, + 225, 737, 481, 993, 17, 529, 273, 785, 145, 657, 401, 913, + 81, 593, 337, 849, 209, 721, 465, 977, 49, 561, 305, 817, + 177, 689, 433, 945, 113, 625, 369, 881, 241, 753, 497, 1009, + 9, 521, 265, 777, 137, 649, 393, 905, 73, 585, 329, 841, + 201, 713, 457, 969, 41, 553, 297, 809, 169, 681, 425, 937, + 105, 617, 361, 873, 233, 745, 489, 1001, 25, 537, 281, 793, + 153, 665, 409, 921, 89, 601, 345, 857, 217, 729, 473, 985, + 57, 569, 313, 825, 185, 697, 441, 953, 121, 633, 377, 889, + 249, 761, 505, 1017, 5, 517, 261, 773, 133, 645, 389, 901, + 69, 581, 325, 837, 197, 709, 453, 965, 37, 549, 293, 805, + 165, 677, 421, 933, 101, 613, 357, 869, 229, 741, 485, 997, + 21, 533, 277, 789, 149, 661, 405, 917, 85, 597, 341, 853, + 213, 725, 469, 981, 53, 565, 309, 821, 181, 693, 437, 949, + 117, 629, 373, 885, 245, 757, 501, 1013, 13, 525, 269, 781, + 141, 653, 397, 909, 77, 589, 333, 845, 205, 717, 461, 973, + 45, 557, 301, 813, 173, 685, 429, 941, 109, 621, 365, 877, + 237, 749, 493, 1005, 29, 541, 285, 797, 157, 669, 413, 925, + 93, 605, 349, 861, 221, 733, 477, 989, 61, 573, 317, 829, + 189, 701, 445, 957, 125, 637, 381, 893, 253, 765, 509, 1021, + 3, 515, 259, 771, 131, 643, 387, 899, 67, 579, 323, 835, + 195, 707, 451, 963, 35, 547, 291, 803, 163, 675, 419, 931, + 99, 611, 355, 867, 227, 739, 483, 995, 19, 531, 275, 787, + 147, 659, 403, 915, 83, 595, 339, 851, 211, 723, 467, 979, + 51, 563, 307, 819, 179, 691, 435, 947, 115, 627, 371, 883, + 243, 755, 499, 1011, 11, 523, 267, 779, 139, 651, 395, 907, + 75, 587, 331, 843, 203, 715, 459, 971, 43, 555, 299, 811, + 171, 683, 427, 939, 107, 619, 363, 875, 235, 747, 491, 1003, + 27, 539, 283, 795, 155, 667, 411, 923, 91, 603, 347, 859, + 219, 731, 475, 987, 59, 571, 315, 827, 187, 699, 443, 955, + 123, 635, 379, 891, 251, 763, 507, 1019, 7, 519, 263, 775, + 135, 647, 391, 903, 71, 583, 327, 839, 199, 711, 455, 967, + 39, 551, 295, 807, 167, 679, 423, 935, 103, 615, 359, 871, + 231, 743, 487, 999, 23, 535, 279, 791, 151, 663, 407, 919, + 87, 599, 343, 855, 215, 727, 471, 983, 55, 567, 311, 823, + 183, 695, 439, 951, 119, 631, 375, 887, 247, 759, 503, 1015, + 15, 527, 271, 783, 143, 655, 399, 911, 79, 591, 335, 847, + 207, 719, 463, 975, 47, 559, 303, 815, 175, 687, 431, 943, + 111, 623, 367, 879, 239, 751, 495, 1007, 31, 543, 287, 799, + 159, 671, 415, 927, 95, 607, 351, 863, 223, 735, 479, 991, + 63, 575, 319, 831, 191, 703, 447, 959, 127, 639, 383, 895, + 255, 767, 511, 1023 +}; + +/* + * Compute the roots for NTT and inverse NTT (binary case). Input + * parameter g is a primitive 2048-th root of 1 modulo p (i.e. g^1024 = + * -1 mod p). This fills gm[] and igm[] with powers of g and 1/g: + * gm[rev(i)] = g^i mod p + * igm[rev(i)] = (1/g)^i mod p + * where rev() is the "bit reversal" function over 10 bits. It fills + * the arrays only up to N = 2^logn values. + * + * The values stored in gm[] and igm[] are in Montgomery representation. + * + * p must be a prime such that p = 1 mod 2048. + */ +static void +modp_mkgm2(uint32_t *gm, uint32_t *igm, unsigned logn, + uint32_t g, uint32_t p, uint32_t p0i) { + size_t u, n; + unsigned k; + uint32_t ig, x1, x2, R2; + + n = (size_t)1 << logn; + + /* + * We want g such that g^(2N) = 1 mod p, but the provided + * generator has order 2048. We must square it a few times. + */ + R2 = modp_R2(p, p0i); + g = modp_montymul(g, R2, p, p0i); + for (k = logn; k < 10; k ++) { + g = modp_montymul(g, g, p, p0i); + } + + ig = modp_div(R2, g, p, p0i, modp_R(p)); + k = 10 - logn; + x1 = x2 = modp_R(p); + for (u = 0; u < n; u ++) { + size_t v; + + v = REV10[u << k]; + gm[v] = x1; + igm[v] = x2; + x1 = modp_montymul(x1, g, p, p0i); + x2 = modp_montymul(x2, ig, p, p0i); + } +} + +/* + * Compute the NTT over a polynomial (binary case). Polynomial elements + * are a[0], a[stride], a[2 * stride]... + */ +static void +modp_NTT2_ext(uint32_t *a, size_t stride, const uint32_t *gm, unsigned logn, + uint32_t p, uint32_t p0i) { + size_t t, m, n; + + if (logn == 0) { + return; + } + n = (size_t)1 << logn; + t = n; + for (m = 1; m < n; m <<= 1) { + size_t ht, u, v1; + + ht = t >> 1; + for (u = 0, v1 = 0; u < m; u ++, v1 += t) { + uint32_t s; + size_t v; + uint32_t *r1, *r2; + + s = gm[m + u]; + r1 = a + v1 * stride; + r2 = r1 + ht * stride; + for (v = 0; v < ht; v ++, r1 += stride, r2 += stride) { + uint32_t x, y; + + x = *r1; + y = modp_montymul(*r2, s, p, p0i); + *r1 = modp_add(x, y, p); + *r2 = modp_sub(x, y, p); + } + } + t = ht; + } +} + +/* + * Compute the inverse NTT over a polynomial (binary case). + */ +static void +modp_iNTT2_ext(uint32_t *a, size_t stride, const uint32_t *igm, unsigned logn, + uint32_t p, uint32_t p0i) { + size_t t, m, n, k; + uint32_t ni; + uint32_t *r; + + if (logn == 0) { + return; + } + n = (size_t)1 << logn; + t = 1; + for (m = n; m > 1; m >>= 1) { + size_t hm, dt, u, v1; + + hm = m >> 1; + dt = t << 1; + for (u = 0, v1 = 0; u < hm; u ++, v1 += dt) { + uint32_t s; + size_t v; + uint32_t *r1, *r2; + + s = igm[hm + u]; + r1 = a + v1 * stride; + r2 = r1 + t * stride; + for (v = 0; v < t; v ++, r1 += stride, r2 += stride) { + uint32_t x, y; + + x = *r1; + y = *r2; + *r1 = modp_add(x, y, p); + *r2 = modp_montymul( + modp_sub(x, y, p), s, p, p0i);; + } + } + t = dt; + } + + /* + * We need 1/n in Montgomery representation, i.e. R/n. Since + * 1 <= logn <= 10, R/n is an integer; morever, R/n <= 2^30 < p, + * thus a simple shift will do. + */ + ni = (uint32_t)1 << (31 - logn); + for (k = 0, r = a; k < n; k ++, r += stride) { + *r = modp_montymul(*r, ni, p, p0i); + } +} + +/* + * Simplified macros for NTT and iNTT (binary case) when the elements + * are consecutive in RAM. + */ +#define modp_NTT2(a, gm, logn, p, p0i) modp_NTT2_ext(a, 1, gm, logn, p, p0i) +#define modp_iNTT2(a, igm, logn, p, p0i) modp_iNTT2_ext(a, 1, igm, logn, p, p0i) + +/* + * Given polynomial f in NTT representation modulo p, compute f' of degree + * less than N/2 such that f' = f0^2 - X*f1^2, where f0 and f1 are + * polynomials of degree less than N/2 such that f = f0(X^2) + X*f1(X^2). + * + * The new polynomial is written "in place" over the first N/2 elements + * of f. + * + * If applied logn times successively on a given polynomial, the resulting + * degree-0 polynomial is the resultant of f and X^N+1 modulo p. + * + * This function applies only to the binary case; it is invoked from + * solve_NTRU_binary_depth1(). + */ +static void +modp_poly_rec_res(uint32_t *f, unsigned logn, + uint32_t p, uint32_t p0i, uint32_t R2) { + size_t hn, u; + + hn = (size_t)1 << (logn - 1); + for (u = 0; u < hn; u ++) { + uint32_t w0, w1; + + w0 = f[(u << 1) + 0]; + w1 = f[(u << 1) + 1]; + f[u] = modp_montymul(modp_montymul(w0, w1, p, p0i), R2, p, p0i); + } +} + +/* ==================================================================== */ +/* + * Custom bignum implementation. + * + * This is a very reduced set of functionalities. We need to do the + * following operations: + * + * - Rebuild the resultant and the polynomial coefficients from their + * values modulo small primes (of length 31 bits each). + * + * - Compute an extended GCD between the two computed resultants. + * + * - Extract top bits and add scaled values during the successive steps + * of Babai rounding. + * + * When rebuilding values using CRT, we must also recompute the product + * of the small prime factors. We always do it one small factor at a + * time, so the "complicated" operations can be done modulo the small + * prime with the modp_* functions. CRT coefficients (inverses) are + * precomputed. + * + * All values are positive until the last step: when the polynomial + * coefficients have been rebuilt, we normalize them around 0. But then, + * only additions and subtractions on the upper few bits are needed + * afterwards. + * + * We keep big integers as arrays of 31-bit words (in uint32_t values); + * the top bit of each uint32_t is kept equal to 0. Using 31-bit words + * makes it easier to keep track of carries. When negative values are + * used, two's complement is used. + */ + +/* + * Subtract integer b from integer a. Both integers are supposed to have + * the same size. The carry (0 or 1) is returned. Source arrays a and b + * MUST be distinct. + * + * The operation is performed as described above if ctr = 1. If + * ctl = 0, the value a[] is unmodified, but all memory accesses are + * still performed, and the carry is computed and returned. + */ +static uint32_t +zint_sub(uint32_t *a, const uint32_t *b, size_t len, + uint32_t ctl) { + size_t u; + uint32_t cc, m; + + cc = 0; + m = -ctl; + for (u = 0; u < len; u ++) { + uint32_t aw, w; + + aw = a[u]; + w = aw - b[u] - cc; + cc = w >> 31; + aw ^= ((w & 0x7FFFFFFF) ^ aw) & m; + a[u] = aw; + } + return cc; +} + +/* + * Mutiply the provided big integer m with a small value x. + * This function assumes that x < 2^31. The carry word is returned. + */ +static uint32_t +zint_mul_small(uint32_t *m, size_t mlen, uint32_t x) { + size_t u; + uint32_t cc; + + cc = 0; + for (u = 0; u < mlen; u ++) { + uint64_t z; + + z = (uint64_t)m[u] * (uint64_t)x + cc; + m[u] = (uint32_t)z & 0x7FFFFFFF; + cc = (uint32_t)(z >> 31); + } + return cc; +} + +/* + * Reduce a big integer d modulo a small integer p. + * Rules: + * d is unsigned + * p is prime + * 2^30 < p < 2^31 + * p0i = -(1/p) mod 2^31 + * R2 = 2^62 mod p + */ +static uint32_t +zint_mod_small_unsigned(const uint32_t *d, size_t dlen, + uint32_t p, uint32_t p0i, uint32_t R2) { + uint32_t x; + size_t u; + + /* + * Algorithm: we inject words one by one, starting with the high + * word. Each step is: + * - multiply x by 2^31 + * - add new word + */ + x = 0; + u = dlen; + while (u -- > 0) { + uint32_t w; + + x = modp_montymul(x, R2, p, p0i); + w = d[u] - p; + w += p & -(w >> 31); + x = modp_add(x, w, p); + } + return x; +} + +/* + * Similar to zint_mod_small_unsigned(), except that d may be signed. + * Extra parameter is Rx = 2^(31*dlen) mod p. + */ +static uint32_t +zint_mod_small_signed(const uint32_t *d, size_t dlen, + uint32_t p, uint32_t p0i, uint32_t R2, uint32_t Rx) { + uint32_t z; + + if (dlen == 0) { + return 0; + } + z = zint_mod_small_unsigned(d, dlen, p, p0i, R2); + z = modp_sub(z, Rx & -(d[dlen - 1] >> 30), p); + return z; +} + +/* + * Add y*s to x. x and y initially have length 'len' words; the new x + * has length 'len+1' words. 's' must fit on 31 bits. x[] and y[] must + * not overlap. + */ +static void +zint_add_mul_small(uint32_t *x, + const uint32_t *y, size_t len, uint32_t s) { + size_t u; + uint32_t cc; + + cc = 0; + for (u = 0; u < len; u ++) { + uint32_t xw, yw; + uint64_t z; + + xw = x[u]; + yw = y[u]; + z = (uint64_t)yw * (uint64_t)s + (uint64_t)xw + (uint64_t)cc; + x[u] = (uint32_t)z & 0x7FFFFFFF; + cc = (uint32_t)(z >> 31); + } + x[len] = cc; +} + +/* + * Normalize a modular integer around 0: if x > p/2, then x is replaced + * with x - p (signed encoding with two's complement); otherwise, x is + * untouched. The two integers x and p are encoded over the same length. + */ +static void +zint_norm_zero(uint32_t *x, const uint32_t *p, size_t len) { + size_t u; + uint32_t r, bb; + + /* + * Compare x with p/2. We use the shifted version of p, and p + * is odd, so we really compare with (p-1)/2; we want to perform + * the subtraction if and only if x > (p-1)/2. + */ + r = 0; + bb = 0; + u = len; + while (u -- > 0) { + uint32_t wx, wp, cc; + + /* + * Get the two words to compare in wx and wp (both over + * 31 bits exactly). + */ + wx = x[u]; + wp = (p[u] >> 1) | (bb << 30); + bb = p[u] & 1; + + /* + * We set cc to -1, 0 or 1, depending on whether wp is + * lower than, equal to, or greater than wx. + */ + cc = wp - wx; + cc = ((-cc) >> 31) | -(cc >> 31); + + /* + * If r != 0 then it is either 1 or -1, and we keep its + * value. Otherwise, if r = 0, then we replace it with cc. + */ + r |= cc & ((r & 1) - 1); + } + + /* + * At this point, r = -1, 0 or 1, depending on whether (p-1)/2 + * is lower than, equal to, or greater than x. We thus want to + * do the subtraction only if r = -1. + */ + zint_sub(x, p, len, r >> 31); +} + +/* + * Rebuild integers from their RNS representation. There are 'num' + * integers, and each consists in 'xlen' words. 'xx' points at that + * first word of the first integer; subsequent integers are accessed + * by adding 'xstride' repeatedly. + * + * The words of an integer are the RNS representation of that integer, + * using the provided 'primes' are moduli. This function replaces + * each integer with its multi-word value (little-endian order). + * + * If "normalize_signed" is non-zero, then the returned value is + * normalized to the -m/2..m/2 interval (where m is the product of all + * small prime moduli); two's complement is used for negative values. + */ +static void +zint_rebuild_CRT(uint32_t *xx, size_t xlen, size_t xstride, + size_t num, const small_prime *primes, int normalize_signed, + uint32_t *tmp) { + size_t u; + uint32_t *x; + + tmp[0] = primes[0].p; + for (u = 1; u < xlen; u ++) { + /* + * At the entry of each loop iteration: + * - the first u words of each array have been + * reassembled; + * - the first u words of tmp[] contains the + * product of the prime moduli processed so far. + * + * We call 'q' the product of all previous primes. + */ + uint32_t p, p0i, s, R2; + size_t v; + + p = primes[u].p; + s = primes[u].s; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + + for (v = 0, x = xx; v < num; v ++, x += xstride) { + uint32_t xp, xq, xr; + /* + * xp = the integer x modulo the prime p for this + * iteration + * xq = (x mod q) mod p + */ + xp = x[u]; + xq = zint_mod_small_unsigned(x, u, p, p0i, R2); + + /* + * New value is (x mod q) + q * (s * (xp - xq) mod p) + */ + xr = modp_montymul(s, modp_sub(xp, xq, p), p, p0i); + zint_add_mul_small(x, tmp, u, xr); + } + + /* + * Update product of primes in tmp[]. + */ + tmp[u] = zint_mul_small(tmp, u, p); + } + + /* + * Normalize the reconstructed values around 0. + */ + if (normalize_signed) { + for (u = 0, x = xx; u < num; u ++, x += xstride) { + zint_norm_zero(x, tmp, xlen); + } + } +} + +/* + * Negate a big integer conditionally: value a is replaced with -a if + * and only if ctl = 1. Control value ctl must be 0 or 1. + */ +static void +zint_negate(uint32_t *a, size_t len, uint32_t ctl) { + size_t u; + uint32_t cc, m; + + /* + * If ctl = 1 then we flip the bits of a by XORing with + * 0x7FFFFFFF, and we add 1 to the value. If ctl = 0 then we XOR + * with 0 and add 0, which leaves the value unchanged. + */ + cc = ctl; + m = -ctl >> 1; + for (u = 0; u < len; u ++) { + uint32_t aw; + + aw = a[u]; + aw = (aw ^ m) + cc; + a[u] = aw & 0x7FFFFFFF; + cc = aw >> 31; + } +} + +/* + * Replace a with (a*xa+b*xb)/(2^31) and b with (a*ya+b*yb)/(2^31). + * The low bits are dropped (the caller should compute the coefficients + * such that these dropped bits are all zeros). If either or both + * yields a negative value, then the value is negated. + * + * Returned value is: + * 0 both values were positive + * 1 new a had to be negated + * 2 new b had to be negated + * 3 both new a and new b had to be negated + * + * Coefficients xa, xb, ya and yb may use the full signed 32-bit range. + */ +static uint32_t +zint_co_reduce(uint32_t *a, uint32_t *b, size_t len, + int64_t xa, int64_t xb, int64_t ya, int64_t yb) { + size_t u; + int64_t cca, ccb; + uint32_t nega, negb; + + cca = 0; + ccb = 0; + for (u = 0; u < len; u ++) { + uint32_t wa, wb; + uint64_t za, zb; + + wa = a[u]; + wb = b[u]; + za = wa * (uint64_t)xa + wb * (uint64_t)xb + (uint64_t)cca; + zb = wa * (uint64_t)ya + wb * (uint64_t)yb + (uint64_t)ccb; + if (u > 0) { + a[u - 1] = (uint32_t)za & 0x7FFFFFFF; + b[u - 1] = (uint32_t)zb & 0x7FFFFFFF; + } + cca = *(int64_t *)&za >> 31; + ccb = *(int64_t *)&zb >> 31; + } + a[len - 1] = (uint32_t)cca; + b[len - 1] = (uint32_t)ccb; + + nega = (uint32_t)((uint64_t)cca >> 63); + negb = (uint32_t)((uint64_t)ccb >> 63); + zint_negate(a, len, nega); + zint_negate(b, len, negb); + return nega | (negb << 1); +} + +/* + * Finish modular reduction. Rules on input parameters: + * + * if neg = 1, then -m <= a < 0 + * if neg = 0, then 0 <= a < 2*m + * + * If neg = 0, then the top word of a[] is allowed to use 32 bits. + * + * Modulus m must be odd. + */ +static void +zint_finish_mod(uint32_t *a, size_t len, const uint32_t *m, uint32_t neg) { + size_t u; + uint32_t cc, xm, ym; + + /* + * First pass: compare a (assumed nonnegative) with m. Note that + * if the top word uses 32 bits, subtracting m must yield a + * value less than 2^31 since a < 2*m. + */ + cc = 0; + for (u = 0; u < len; u ++) { + cc = (a[u] - m[u] - cc) >> 31; + } + + /* + * If neg = 1 then we must add m (regardless of cc) + * If neg = 0 and cc = 0 then we must subtract m + * If neg = 0 and cc = 1 then we must do nothing + * + * In the loop below, we conditionally subtract either m or -m + * from a. Word xm is a word of m (if neg = 0) or -m (if neg = 1); + * but if neg = 0 and cc = 1, then ym = 0 and it forces mw to 0. + */ + xm = -neg >> 1; + ym = -(neg | (1 - cc)); + cc = neg; + for (u = 0; u < len; u ++) { + uint32_t aw, mw; + + aw = a[u]; + mw = (m[u] ^ xm) & ym; + aw = aw - mw - cc; + a[u] = aw & 0x7FFFFFFF; + cc = aw >> 31; + } +} + +/* + * Replace a with (a*xa+b*xb)/(2^31) mod m, and b with + * (a*ya+b*yb)/(2^31) mod m. Modulus m must be odd; m0i = -1/m[0] mod 2^31. + */ +static void +zint_co_reduce_mod(uint32_t *a, uint32_t *b, const uint32_t *m, size_t len, + uint32_t m0i, int64_t xa, int64_t xb, int64_t ya, int64_t yb) { + size_t u; + int64_t cca, ccb; + uint32_t fa, fb; + + /* + * These are actually four combined Montgomery multiplications. + */ + cca = 0; + ccb = 0; + fa = ((a[0] * (uint32_t)xa + b[0] * (uint32_t)xb) * m0i) & 0x7FFFFFFF; + fb = ((a[0] * (uint32_t)ya + b[0] * (uint32_t)yb) * m0i) & 0x7FFFFFFF; + for (u = 0; u < len; u ++) { + uint32_t wa, wb; + uint64_t za, zb; + + wa = a[u]; + wb = b[u]; + za = wa * (uint64_t)xa + wb * (uint64_t)xb + + m[u] * (uint64_t)fa + (uint64_t)cca; + zb = wa * (uint64_t)ya + wb * (uint64_t)yb + + m[u] * (uint64_t)fb + (uint64_t)ccb; + if (u > 0) { + a[u - 1] = (uint32_t)za & 0x7FFFFFFF; + b[u - 1] = (uint32_t)zb & 0x7FFFFFFF; + } + cca = *(int64_t *)&za >> 31; + ccb = *(int64_t *)&zb >> 31; + } + a[len - 1] = (uint32_t)cca; + b[len - 1] = (uint32_t)ccb; + + /* + * At this point: + * -m <= a < 2*m + * -m <= b < 2*m + * (this is a case of Montgomery reduction) + * The top words of 'a' and 'b' may have a 32-th bit set. + * We want to add or subtract the modulus, as required. + */ + zint_finish_mod(a, len, m, (uint32_t)((uint64_t)cca >> 63)); + zint_finish_mod(b, len, m, (uint32_t)((uint64_t)ccb >> 63)); +} + +/* + * Compute a GCD between two positive big integers x and y. The two + * integers must be odd. Returned value is 1 if the GCD is 1, 0 + * otherwise. When 1 is returned, arrays u and v are filled with values + * such that: + * 0 <= u <= y + * 0 <= v <= x + * x*u - y*v = 1 + * x[] and y[] are unmodified. Both input values must have the same + * encoded length. Temporary array must be large enough to accommodate 4 + * extra values of that length. Arrays u, v and tmp may not overlap with + * each other, or with either x or y. + */ +static int +zint_bezout(uint32_t *u, uint32_t *v, + const uint32_t *x, const uint32_t *y, + size_t len, uint32_t *tmp) { + /* + * Algorithm is an extended binary GCD. We maintain 6 values + * a, b, u0, u1, v0 and v1 with the following invariants: + * + * a = x*u0 - y*v0 + * b = x*u1 - y*v1 + * 0 <= a <= x + * 0 <= b <= y + * 0 <= u0 < y + * 0 <= v0 < x + * 0 <= u1 <= y + * 0 <= v1 < x + * + * Initial values are: + * + * a = x u0 = 1 v0 = 0 + * b = y u1 = y v1 = x-1 + * + * Each iteration reduces either a or b, and maintains the + * invariants. Algorithm stops when a = b, at which point their + * common value is GCD(a,b) and (u0,v0) (or (u1,v1)) contains + * the values (u,v) we want to return. + * + * The formal definition of the algorithm is a sequence of steps: + * + * - If a is even, then: + * a <- a/2 + * u0 <- u0/2 mod y + * v0 <- v0/2 mod x + * + * - Otherwise, if b is even, then: + * b <- b/2 + * u1 <- u1/2 mod y + * v1 <- v1/2 mod x + * + * - Otherwise, if a > b, then: + * a <- (a-b)/2 + * u0 <- (u0-u1)/2 mod y + * v0 <- (v0-v1)/2 mod x + * + * - Otherwise: + * b <- (b-a)/2 + * u1 <- (u1-u0)/2 mod y + * v1 <- (v1-v0)/2 mod y + * + * We can show that the operations above preserve the invariants: + * + * - If a is even, then u0 and v0 are either both even or both + * odd (since a = x*u0 - y*v0, and x and y are both odd). + * If u0 and v0 are both even, then (u0,v0) <- (u0/2,v0/2). + * Otherwise, (u0,v0) <- ((u0+y)/2,(v0+x)/2). Either way, + * the a = x*u0 - y*v0 invariant is preserved. + * + * - The same holds for the case where b is even. + * + * - If a and b are odd, and a > b, then: + * + * a-b = x*(u0-u1) - y*(v0-v1) + * + * In that situation, if u0 < u1, then x*(u0-u1) < 0, but + * a-b > 0; therefore, it must be that v0 < v1, and the + * first part of the update is: (u0,v0) <- (u0-u1+y,v0-v1+x), + * which preserves the invariants. Otherwise, if u0 > u1, + * then u0-u1 >= 1, thus x*(u0-u1) >= x. But a <= x and + * b >= 0, hence a-b <= x. It follows that, in that case, + * v0-v1 >= 0. The first part of the update is then: + * (u0,v0) <- (u0-u1,v0-v1), which again preserves the + * invariants. + * + * Either way, once the subtraction is done, the new value of + * a, which is the difference of two odd values, is even, + * and the remaining of this step is a subcase of the + * first algorithm case (i.e. when a is even). + * + * - If a and b are odd, and b > a, then the a similar + * argument holds. + * + * The values a and b start at x and y, respectively. Since x + * and y are odd, their GCD is odd, and it is easily seen that + * all steps conserve the GCD (GCD(a-b,b) = GCD(a, b); + * GCD(a/2,b) = GCD(a,b) if GCD(a,b) is odd). Moreover, either a + * or b is reduced by at least one bit at each iteration, so + * the algorithm necessarily converges on the case a = b, at + * which point the common value is the GCD. + * + * In the algorithm expressed above, when a = b, the fourth case + * applies, and sets b = 0. Since a contains the GCD of x and y, + * which are both odd, a must be odd, and subsequent iterations + * (if any) will simply divide b by 2 repeatedly, which has no + * consequence. Thus, the algorithm can run for more iterations + * than necessary; the final GCD will be in a, and the (u,v) + * coefficients will be (u0,v0). + * + * + * The presentation above is bit-by-bit. It can be sped up by + * noticing that all decisions are taken based on the low bits + * and high bits of a and b. We can extract the two top words + * and low word of each of a and b, and compute reduction + * parameters pa, pb, qa and qb such that the new values for + * a and b are: + * a' = (a*pa + b*pb) / (2^31) + * b' = (a*qa + b*qb) / (2^31) + * the two divisions being exact. The coefficients are obtained + * just from the extracted words, and may be slightly off, requiring + * an optional correction: if a' < 0, then we replace pa with -pa + * and pb with -pb. Each such step will reduce the total length + * (sum of lengths of a and b) by at least 30 bits at each + * iteration. + */ + uint32_t *u0, *u1, *v0, *v1, *a, *b; + uint32_t x0i, y0i; + uint32_t num, rc; + size_t j; + + if (len == 0) { + return 0; + } + + /* + * u0 and v0 are the u and v result buffers; the four other + * values (u1, v1, a and b) are taken from tmp[]. + */ + u0 = u; + v0 = v; + u1 = tmp; + v1 = u1 + len; + a = v1 + len; + b = a + len; + + /* + * We'll need the Montgomery reduction coefficients. + */ + x0i = modp_ninv31(x[0]); + y0i = modp_ninv31(y[0]); + + /* + * Initialize a, b, u0, u1, v0 and v1. + * a = x u0 = 1 v0 = 0 + * b = y u1 = y v1 = x-1 + * Note that x is odd, so computing x-1 is easy. + */ + memcpy(a, x, len * sizeof * x); + memcpy(b, y, len * sizeof * y); + u0[0] = 1; + memset(u0 + 1, 0, (len - 1) * sizeof * u0); + memset(v0, 0, len * sizeof * v0); + memcpy(u1, y, len * sizeof * u1); + memcpy(v1, x, len * sizeof * v1); + v1[0] --; + + /* + * Each input operand may be as large as 31*len bits, and we + * reduce the total length by at least 30 bits at each iteration. + */ + for (num = 62 * (uint32_t)len + 30; num >= 30; num -= 30) { + uint32_t c0, c1; + uint32_t a0, a1, b0, b1; + uint64_t a_hi, b_hi; + uint32_t a_lo, b_lo; + int64_t pa, pb, qa, qb; + int i; + uint32_t r; + + /* + * Extract the top words of a and b. If j is the highest + * index >= 1 such that a[j] != 0 or b[j] != 0, then we + * want (a[j] << 31) + a[j-1] and (b[j] << 31) + b[j-1]. + * If a and b are down to one word each, then we use + * a[0] and b[0]. + */ + c0 = (uint32_t) -1; + c1 = (uint32_t) -1; + a0 = 0; + a1 = 0; + b0 = 0; + b1 = 0; + j = len; + while (j -- > 0) { + uint32_t aw, bw; + + aw = a[j]; + bw = b[j]; + a0 ^= (a0 ^ aw) & c0; + a1 ^= (a1 ^ aw) & c1; + b0 ^= (b0 ^ bw) & c0; + b1 ^= (b1 ^ bw) & c1; + c1 = c0; + c0 &= (((aw | bw) + 0x7FFFFFFF) >> 31) - (uint32_t)1; + } + + /* + * If c1 = 0, then we grabbed two words for a and b. + * If c1 != 0 but c0 = 0, then we grabbed one word. It + * is not possible that c1 != 0 and c0 != 0, because that + * would mean that both integers are zero. + */ + a1 |= a0 & c1; + a0 &= ~c1; + b1 |= b0 & c1; + b0 &= ~c1; + a_hi = ((uint64_t)a0 << 31) + a1; + b_hi = ((uint64_t)b0 << 31) + b1; + a_lo = a[0]; + b_lo = b[0]; + + /* + * Compute reduction factors: + * + * a' = a*pa + b*pb + * b' = a*qa + b*qb + * + * such that a' and b' are both multiple of 2^31, but are + * only marginally larger than a and b. + */ + pa = 1; + pb = 0; + qa = 0; + qb = 1; + for (i = 0; i < 31; i ++) { + /* + * At each iteration: + * + * a <- (a-b)/2 if: a is odd, b is odd, a_hi > b_hi + * b <- (b-a)/2 if: a is odd, b is odd, a_hi <= b_hi + * a <- a/2 if: a is even + * b <- b/2 if: a is odd, b is even + * + * We multiply a_lo and b_lo by 2 at each + * iteration, thus a division by 2 really is a + * non-multiplication by 2. + */ + uint32_t rt, oa, ob, cAB, cBA, cA; + uint64_t rz; + + /* + * rt = 1 if a_hi > b_hi, 0 otherwise. + */ + rz = b_hi - a_hi; + rt = (uint32_t)((rz ^ ((a_hi ^ b_hi) + & (a_hi ^ rz))) >> 63); + + /* + * cAB = 1 if b must be subtracted from a + * cBA = 1 if a must be subtracted from b + * cA = 1 if a must be divided by 2 + * + * Rules: + * + * cAB and cBA cannot both be 1. + * If a is not divided by 2, b is. + */ + oa = (a_lo >> i) & 1; + ob = (b_lo >> i) & 1; + cAB = oa & ob & rt; + cBA = oa & ob & ~rt; + cA = cAB | (oa ^ 1); + + /* + * Conditional subtractions. + */ + a_lo -= b_lo & -cAB; + a_hi -= b_hi & -(uint64_t)cAB; + pa -= qa & -(int64_t)cAB; + pb -= qb & -(int64_t)cAB; + b_lo -= a_lo & -cBA; + b_hi -= a_hi & -(uint64_t)cBA; + qa -= pa & -(int64_t)cBA; + qb -= pb & -(int64_t)cBA; + + /* + * Shifting. + */ + a_lo += a_lo & (cA - 1); + pa += pa & ((int64_t)cA - 1); + pb += pb & ((int64_t)cA - 1); + a_hi ^= (a_hi ^ (a_hi >> 1)) & -(uint64_t)cA; + b_lo += b_lo & -cA; + qa += qa & -(int64_t)cA; + qb += qb & -(int64_t)cA; + b_hi ^= (b_hi ^ (b_hi >> 1)) & ((uint64_t)cA - 1); + } + + /* + * Apply the computed parameters to our values. We + * may have to correct pa and pb depending on the + * returned value of zint_co_reduce() (when a and/or b + * had to be negated). + */ + r = zint_co_reduce(a, b, len, pa, pb, qa, qb); + pa -= (pa + pa) & -(int64_t)(r & 1); + pb -= (pb + pb) & -(int64_t)(r & 1); + qa -= (qa + qa) & -(int64_t)(r >> 1); + qb -= (qb + qb) & -(int64_t)(r >> 1); + zint_co_reduce_mod(u0, u1, y, len, y0i, pa, pb, qa, qb); + zint_co_reduce_mod(v0, v1, x, len, x0i, pa, pb, qa, qb); + } + + /* + * At that point, array a[] should contain the GCD, and the + * results (u,v) should already be set. We check that the GCD + * is indeed 1. We also check that the two operands x and y + * are odd. + */ + rc = a[0] ^ 1; + for (j = 1; j < len; j ++) { + rc |= a[j]; + } + return (int)((1 - ((rc | -rc) >> 31)) & x[0] & y[0]); +} + +/* + * Add k*y*2^sc to x. The result is assumed to fit in the array of + * size xlen (truncation is applied if necessary). + * Scale factor 'sc' is provided as sch and scl, such that: + * sch = sc / 31 + * scl = sc % 31 + * xlen MUST NOT be lower than ylen. + * + * x[] and y[] are both signed integers, using two's complement for + * negative values. + */ +static void +zint_add_scaled_mul_small(uint32_t *x, size_t xlen, + const uint32_t *y, size_t ylen, int32_t k, + uint32_t sch, uint32_t scl) { + size_t u; + uint32_t ysign, tw; + int32_t cc; + + if (ylen == 0) { + return; + } + + ysign = -(y[ylen - 1] >> 30) >> 1; + tw = 0; + cc = 0; + for (u = sch; u < xlen; u ++) { + size_t v; + uint32_t wy, wys, ccu; + uint64_t z; + + /* + * Get the next word of y (scaled). + */ + v = u - sch; + if (v < ylen) { + wy = y[v]; + } else { + wy = ysign; + } + wys = ((wy << scl) & 0x7FFFFFFF) | tw; + tw = wy >> (31 - scl); + + /* + * The expression below does not overflow. + */ + z = (uint64_t)((int64_t)wys * (int64_t)k + (int64_t)x[u] + cc); + x[u] = (uint32_t)z & 0x7FFFFFFF; + + /* + * Right-shifting the signed value z would yield + * implementation-defined results (arithmetic shift is + * not guaranteed). However, we can cast to unsigned, + * and get the next carry as an unsigned word. We can + * then convert it back to signed by using the guaranteed + * fact that 'int32_t' uses two's complement with no + * trap representation or padding bit, and with a layout + * compatible with that of 'uint32_t'. + */ + ccu = (uint32_t)(z >> 31); + cc = *(int32_t *)&ccu; + } +} + +/* + * Subtract y*2^sc from x. The result is assumed to fit in the array of + * size xlen (truncation is applied if necessary). + * Scale factor 'sc' is provided as sch and scl, such that: + * sch = sc / 31 + * scl = sc % 31 + * xlen MUST NOT be lower than ylen. + * + * x[] and y[] are both signed integers, using two's complement for + * negative values. + */ +static void +zint_sub_scaled(uint32_t *x, size_t xlen, + const uint32_t *y, size_t ylen, uint32_t sch, uint32_t scl) { + size_t u; + uint32_t ysign, tw; + uint32_t cc; + + if (ylen == 0) { + return; + } + + ysign = -(y[ylen - 1] >> 30) >> 1; + tw = 0; + cc = 0; + for (u = sch; u < xlen; u ++) { + size_t v; + uint32_t w, wy, wys; + + /* + * Get the next word of y (scaled). + */ + v = u - sch; + if (v < ylen) { + wy = y[v]; + } else { + wy = ysign; + } + wys = ((wy << scl) & 0x7FFFFFFF) | tw; + tw = wy >> (31 - scl); + + w = x[u] - wys - cc; + x[u] = w & 0x7FFFFFFF; + cc = w >> 31; + } +} + +/* + * Convert a one-word signed big integer into a signed value. + */ +static inline int32_t +zint_one_to_plain(const uint32_t *x) { + uint32_t w; + + w = x[0]; + w |= (w & 0x40000000) << 1; + return *(int32_t *)&w; +} + +/* ==================================================================== */ + +/* + * Convert a polynomial to floating-point values. + * + * Each coefficient has length flen words, and starts fstride words after + * the previous. + * + * IEEE-754 binary64 values can represent values in a finite range, + * roughly 2^(-1023) to 2^(+1023); thus, if coefficients are too large, + * they should be "trimmed" by pointing not to the lowest word of each, + * but upper. + */ +static void +poly_big_to_fp(fpr *d, const uint32_t *f, size_t flen, size_t fstride, + unsigned logn) { + size_t n, u; + + n = MKN(logn); + if (flen == 0) { + for (u = 0; u < n; u ++) { + d[u] = fpr_zero; + } + return; + } + for (u = 0; u < n; u ++, f += fstride) { + size_t v; + uint32_t neg, cc, xm; + fpr x, fsc; + + /* + * Get sign of the integer; if it is negative, then we + * will load its absolute value instead, and negate the + * result. + */ + neg = -(f[flen - 1] >> 30); + xm = neg >> 1; + cc = neg & 1; + x = fpr_zero; + fsc = fpr_one; + for (v = 0; v < flen; v ++, fsc = fpr_mul(fsc, fpr_ptwo31)) { + uint32_t w; + + w = (f[v] ^ xm) + cc; + cc = w >> 31; + w &= 0x7FFFFFFF; + w -= (w << 1) & neg; + x = fpr_add(x, fpr_mul(fpr_of(*(int32_t *)&w), fsc)); + } + d[u] = x; + } +} + +/* + * Convert a polynomial to small integers. Source values are supposed + * to be one-word integers, signed over 31 bits. Returned value is 0 + * if any of the coefficients exceeds the provided limit (in absolute + * value), or 1 on success. + * + * This is not constant-time; this is not a problem here, because on + * any failure, the NTRU-solving process will be deemed to have failed + * and the (f,g) polynomials will be discarded. + */ +static int +poly_big_to_small(int8_t *d, const uint32_t *s, int lim, unsigned logn) { + size_t n, u; + + n = MKN(logn); + for (u = 0; u < n; u ++) { + int32_t z; + + z = zint_one_to_plain(s + u); + if (z < -lim || z > lim) { + return 0; + } + d[u] = (int8_t)z; + } + return 1; +} + +/* + * Subtract k*f from F, where F, f and k are polynomials modulo X^N+1. + * Coefficients of polynomial k are small integers (signed values in the + * -2^31..2^31 range) scaled by 2^sc. Value sc is provided as sch = sc / 31 + * and scl = sc % 31. + * + * This function implements the basic quadratic multiplication algorithm, + * which is efficient in space (no extra buffer needed) but slow at + * high degree. + */ +static void +poly_sub_scaled(uint32_t *F, size_t Flen, size_t Fstride, + const uint32_t *f, size_t flen, size_t fstride, + const int32_t *k, uint32_t sch, uint32_t scl, unsigned logn) { + size_t n, u; + + n = MKN(logn); + for (u = 0; u < n; u ++) { + int32_t kf; + size_t v; + uint32_t *x; + const uint32_t *y; + + kf = -k[u]; + x = F + u * Fstride; + y = f; + for (v = 0; v < n; v ++) { + zint_add_scaled_mul_small( + x, Flen, y, flen, kf, sch, scl); + if (u + v == n - 1) { + x = F; + kf = -kf; + } else { + x += Fstride; + } + y += fstride; + } + } +} + +/* + * Subtract k*f from F. Coefficients of polynomial k are small integers + * (signed values in the -2^31..2^31 range) scaled by 2^sc. This function + * assumes that the degree is large, and integers relatively small. + * The value sc is provided as sch = sc / 31 and scl = sc % 31. + */ +static void +poly_sub_scaled_ntt(uint32_t *F, size_t Flen, size_t Fstride, + const uint32_t *f, size_t flen, size_t fstride, + const int32_t *k, uint32_t sch, uint32_t scl, unsigned logn, + uint32_t *tmp) { + uint32_t *gm, *igm, *fk, *t1, *x; + const uint32_t *y; + size_t n, u, tlen; + const small_prime *primes; + + n = MKN(logn); + tlen = flen + 1; + gm = tmp; + igm = gm + MKN(logn); + fk = igm + MKN(logn); + t1 = fk + n * tlen; + + primes = PRIMES; + + /* + * Compute k*f in fk[], in RNS notation. + */ + for (u = 0; u < tlen; u ++) { + uint32_t p, p0i, R2, Rx; + size_t v; + + p = primes[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + Rx = modp_Rx((unsigned)flen, p, p0i, R2); + modp_mkgm2(gm, igm, logn, primes[u].g, p, p0i); + + for (v = 0; v < n; v ++) { + t1[v] = modp_set(k[v], p); + } + modp_NTT2(t1, gm, logn, p, p0i); + for (v = 0, y = f, x = fk + u; + v < n; v ++, y += fstride, x += tlen) { + *x = zint_mod_small_signed(y, flen, p, p0i, R2, Rx); + } + modp_NTT2_ext(fk + u, tlen, gm, logn, p, p0i); + for (v = 0, x = fk + u; v < n; v ++, x += tlen) { + *x = modp_montymul( + modp_montymul(t1[v], *x, p, p0i), R2, p, p0i); + } + modp_iNTT2_ext(fk + u, tlen, igm, logn, p, p0i); + } + + /* + * Rebuild k*f. + */ + zint_rebuild_CRT(fk, tlen, tlen, n, primes, 1, t1); + + /* + * Subtract k*f, scaled, from F. + */ + for (u = 0, x = F, y = fk; u < n; u ++, x += Fstride, y += tlen) { + zint_sub_scaled(x, Flen, y, tlen, sch, scl); + } +} + +/* ==================================================================== */ + + +#define RNG_CONTEXT inner_shake256_context + +/* + * Get a random 8-byte integer from a SHAKE-based RNG. This function + * ensures consistent interpretation of the SHAKE output so that + * the same values will be obtained over different platforms, in case + * a known seed is used. + */ +static inline uint64_t +get_rng_u64(inner_shake256_context *rng) { + /* + * We enforce little-endian representation. + */ + + uint8_t tmp[8]; + + inner_shake256_extract(rng, tmp, sizeof tmp); + return (uint64_t)tmp[0] + | ((uint64_t)tmp[1] << 8) + | ((uint64_t)tmp[2] << 16) + | ((uint64_t)tmp[3] << 24) + | ((uint64_t)tmp[4] << 32) + | ((uint64_t)tmp[5] << 40) + | ((uint64_t)tmp[6] << 48) + | ((uint64_t)tmp[7] << 56); +} + +/* + * Table below incarnates a discrete Gaussian distribution: + * D(x) = exp(-(x^2)/(2*sigma^2)) + * where sigma = 1.17*sqrt(q/(2*N)), q = 12289, and N = 1024. + * Element 0 of the table is P(x = 0). + * For k > 0, element k is P(x >= k+1 | x > 0). + * Probabilities are scaled up by 2^63. + */ +static const uint64_t gauss_1024_12289[] = { + 1283868770400643928u, 6416574995475331444u, 4078260278032692663u, + 2353523259288686585u, 1227179971273316331u, 575931623374121527u, + 242543240509105209u, 91437049221049666u, 30799446349977173u, + 9255276791179340u, 2478152334826140u, 590642893610164u, + 125206034929641u, 23590435911403u, 3948334035941u, + 586753615614u, 77391054539u, 9056793210u, + 940121950u, 86539696u, 7062824u, + 510971u, 32764u, 1862u, + 94u, 4u, 0u +}; + +/* + * Generate a random value with a Gaussian distribution centered on 0. + * The RNG must be ready for extraction (already flipped). + * + * Distribution has standard deviation 1.17*sqrt(q/(2*N)). The + * precomputed table is for N = 1024. Since the sum of two independent + * values of standard deviation sigma has standard deviation + * sigma*sqrt(2), then we can just generate more values and add them + * together for lower dimensions. + */ +static int +mkgauss(RNG_CONTEXT *rng, unsigned logn) { + unsigned u, g; + int val; + + g = 1U << (10 - logn); + val = 0; + for (u = 0; u < g; u ++) { + /* + * Each iteration generates one value with the + * Gaussian distribution for N = 1024. + * + * We use two random 64-bit values. First value + * decides on whether the generated value is 0, and, + * if not, the sign of the value. Second random 64-bit + * word is used to generate the non-zero value. + * + * For constant-time code we have to read the complete + * table. This has negligible cost, compared with the + * remainder of the keygen process (solving the NTRU + * equation). + */ + uint64_t r; + uint32_t f, v, k, neg; + + /* + * First value: + * - flag 'neg' is randomly selected to be 0 or 1. + * - flag 'f' is set to 1 if the generated value is zero, + * or set to 0 otherwise. + */ + r = get_rng_u64(rng); + neg = (uint32_t)(r >> 63); + r &= ~((uint64_t)1 << 63); + f = (uint32_t)((r - gauss_1024_12289[0]) >> 63); + + /* + * We produce a new random 63-bit integer r, and go over + * the array, starting at index 1. We store in v the + * index of the first array element which is not greater + * than r, unless the flag f was already 1. + */ + v = 0; + r = get_rng_u64(rng); + r &= ~((uint64_t)1 << 63); + for (k = 1; k < (uint32_t)((sizeof gauss_1024_12289) + / (sizeof gauss_1024_12289[0])); k ++) { + uint32_t t; + + t = (uint32_t)((r - gauss_1024_12289[k]) >> 63) ^ 1; + v |= k & -(t & (f ^ 1)); + f |= t; + } + + /* + * We apply the sign ('neg' flag). If the value is zero, + * the sign has no effect. + */ + v = (v ^ -neg) + neg; + + /* + * Generated value is added to val. + */ + val += *(int32_t *)&v; + } + return val; +} + +/* + * The MAX_BL_SMALL[] and MAX_BL_LARGE[] contain the lengths, in 31-bit + * words, of intermediate values in the computation: + * + * MAX_BL_SMALL[depth]: length for the input f and g at that depth + * MAX_BL_LARGE[depth]: length for the unreduced F and G at that depth + * + * Rules: + * + * - Within an array, values grow. + * + * - The 'SMALL' array must have an entry for maximum depth, corresponding + * to the size of values used in the binary GCD. There is no such value + * for the 'LARGE' array (the binary GCD yields already reduced + * coefficients). + * + * - MAX_BL_LARGE[depth] >= MAX_BL_SMALL[depth + 1]. + * + * - Values must be large enough to handle the common cases, with some + * margins. + * + * - Values must not be "too large" either because we will convert some + * integers into floating-point values by considering the top 10 words, + * i.e. 310 bits; hence, for values of length more than 10 words, we + * should take care to have the length centered on the expected size. + * + * The following average lengths, in bits, have been measured on thousands + * of random keys (fg = max length of the absolute value of coefficients + * of f and g at that depth; FG = idem for the unreduced F and G; for the + * maximum depth, F and G are the output of binary GCD, multiplied by q; + * for each value, the average and standard deviation are provided). + * + * Binary case: + * depth: 10 fg: 6307.52 (24.48) FG: 6319.66 (24.51) + * depth: 9 fg: 3138.35 (12.25) FG: 9403.29 (27.55) + * depth: 8 fg: 1576.87 ( 7.49) FG: 4703.30 (14.77) + * depth: 7 fg: 794.17 ( 4.98) FG: 2361.84 ( 9.31) + * depth: 6 fg: 400.67 ( 3.10) FG: 1188.68 ( 6.04) + * depth: 5 fg: 202.22 ( 1.87) FG: 599.81 ( 3.87) + * depth: 4 fg: 101.62 ( 1.02) FG: 303.49 ( 2.38) + * depth: 3 fg: 50.37 ( 0.53) FG: 153.65 ( 1.39) + * depth: 2 fg: 24.07 ( 0.25) FG: 78.20 ( 0.73) + * depth: 1 fg: 10.99 ( 0.08) FG: 39.82 ( 0.41) + * depth: 0 fg: 4.00 ( 0.00) FG: 19.61 ( 0.49) + * + * Integers are actually represented either in binary notation over + * 31-bit words (signed, using two's complement), or in RNS, modulo + * many small primes. These small primes are close to, but slightly + * lower than, 2^31. Use of RNS loses less than two bits, even for + * the largest values. + * + * IMPORTANT: if these values are modified, then the temporary buffer + * sizes (FALCON_KEYGEN_TEMP_*, in inner.h) must be recomputed + * accordingly. + */ + +static const size_t MAX_BL_SMALL[] = { + 1, 1, 2, 2, 4, 7, 14, 27, 53, 106, 209 +}; + +static const size_t MAX_BL_LARGE[] = { + 2, 2, 5, 7, 12, 21, 40, 78, 157, 308 +}; + +/* + * Average and standard deviation for the maximum size (in bits) of + * coefficients of (f,g), depending on depth. These values are used + * to compute bounds for Babai's reduction. + */ +static const struct { + int avg; + int std; +} BITLENGTH[] = { + { 4, 0 }, + { 11, 1 }, + { 24, 1 }, + { 50, 1 }, + { 102, 1 }, + { 202, 2 }, + { 401, 4 }, + { 794, 5 }, + { 1577, 8 }, + { 3138, 13 }, + { 6308, 25 } +}; + +/* + * Minimal recursion depth at which we rebuild intermediate values + * when reconstructing f and g. + */ +#define DEPTH_INT_FG 4 + +/* + * Compute squared norm of a short vector. Returned value is saturated to + * 2^32-1 if it is not lower than 2^31. + */ +static uint32_t +poly_small_sqnorm(const int8_t *f, unsigned logn) { + size_t n, u; + uint32_t s, ng; + + n = MKN(logn); + s = 0; + ng = 0; + for (u = 0; u < n; u ++) { + int32_t z; + + z = f[u]; + s += (uint32_t)(z * z); + ng |= s; + } + return s | -(ng >> 31); +} + +/* + * Align (upwards) the provided 'data' pointer with regards to 'base' + * so that the offset is a multiple of the size of 'fpr'. + */ +static fpr * +align_fpr(void *base, void *data) { + uint8_t *cb, *cd; + size_t k, km; + + cb = base; + cd = data; + k = (size_t)(cd - cb); + km = k % sizeof(fpr); + if (km) { + k += (sizeof(fpr)) - km; + } + return (fpr *)(cb + k); +} + +/* + * Align (upwards) the provided 'data' pointer with regards to 'base' + * so that the offset is a multiple of the size of 'uint32_t'. + */ +static uint32_t * +align_u32(void *base, void *data) { + uint8_t *cb, *cd; + size_t k, km; + + cb = base; + cd = data; + k = (size_t)(cd - cb); + km = k % sizeof(uint32_t); + if (km) { + k += (sizeof(uint32_t)) - km; + } + return (uint32_t *)(cb + k); +} + +/* + * Convert a small vector to floating point. + */ +static void +poly_small_to_fp(fpr *x, const int8_t *f, unsigned logn) { + size_t n, u; + + n = MKN(logn); + for (u = 0; u < n; u ++) { + x[u] = fpr_of(f[u]); + } +} + +/* + * Input: f,g of degree N = 2^logn; 'depth' is used only to get their + * individual length. + * + * Output: f',g' of degree N/2, with the length for 'depth+1'. + * + * Values are in RNS; input and/or output may also be in NTT. + */ +static void +make_fg_step(uint32_t *data, unsigned logn, unsigned depth, + int in_ntt, int out_ntt) { + size_t n, hn, u; + size_t slen, tlen; + uint32_t *fd, *gd, *fs, *gs, *gm, *igm, *t1; + const small_prime *primes; + + n = (size_t)1 << logn; + hn = n >> 1; + slen = MAX_BL_SMALL[depth]; + tlen = MAX_BL_SMALL[depth + 1]; + primes = PRIMES; + + /* + * Prepare room for the result. + */ + fd = data; + gd = fd + hn * tlen; + fs = gd + hn * tlen; + gs = fs + n * slen; + gm = gs + n * slen; + igm = gm + n; + t1 = igm + n; + memmove(fs, data, 2 * n * slen * sizeof * data); + + /* + * First slen words: we use the input values directly, and apply + * inverse NTT as we go. + */ + for (u = 0; u < slen; u ++) { + uint32_t p, p0i, R2; + size_t v; + uint32_t *x; + + p = primes[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + modp_mkgm2(gm, igm, logn, primes[u].g, p, p0i); + + for (v = 0, x = fs + u; v < n; v ++, x += slen) { + t1[v] = *x; + } + if (!in_ntt) { + modp_NTT2(t1, gm, logn, p, p0i); + } + for (v = 0, x = fd + u; v < hn; v ++, x += tlen) { + uint32_t w0, w1; + + w0 = t1[(v << 1) + 0]; + w1 = t1[(v << 1) + 1]; + *x = modp_montymul( + modp_montymul(w0, w1, p, p0i), R2, p, p0i); + } + if (in_ntt) { + modp_iNTT2_ext(fs + u, slen, igm, logn, p, p0i); + } + + for (v = 0, x = gs + u; v < n; v ++, x += slen) { + t1[v] = *x; + } + if (!in_ntt) { + modp_NTT2(t1, gm, logn, p, p0i); + } + for (v = 0, x = gd + u; v < hn; v ++, x += tlen) { + uint32_t w0, w1; + + w0 = t1[(v << 1) + 0]; + w1 = t1[(v << 1) + 1]; + *x = modp_montymul( + modp_montymul(w0, w1, p, p0i), R2, p, p0i); + } + if (in_ntt) { + modp_iNTT2_ext(gs + u, slen, igm, logn, p, p0i); + } + + if (!out_ntt) { + modp_iNTT2_ext(fd + u, tlen, igm, logn - 1, p, p0i); + modp_iNTT2_ext(gd + u, tlen, igm, logn - 1, p, p0i); + } + } + + /* + * Since the fs and gs words have been de-NTTized, we can use the + * CRT to rebuild the values. + */ + zint_rebuild_CRT(fs, slen, slen, n, primes, 1, gm); + zint_rebuild_CRT(gs, slen, slen, n, primes, 1, gm); + + /* + * Remaining words: use modular reductions to extract the values. + */ + for (u = slen; u < tlen; u ++) { + uint32_t p, p0i, R2, Rx; + size_t v; + uint32_t *x; + + p = primes[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + Rx = modp_Rx((unsigned)slen, p, p0i, R2); + modp_mkgm2(gm, igm, logn, primes[u].g, p, p0i); + for (v = 0, x = fs; v < n; v ++, x += slen) { + t1[v] = zint_mod_small_signed(x, slen, p, p0i, R2, Rx); + } + modp_NTT2(t1, gm, logn, p, p0i); + for (v = 0, x = fd + u; v < hn; v ++, x += tlen) { + uint32_t w0, w1; + + w0 = t1[(v << 1) + 0]; + w1 = t1[(v << 1) + 1]; + *x = modp_montymul( + modp_montymul(w0, w1, p, p0i), R2, p, p0i); + } + for (v = 0, x = gs; v < n; v ++, x += slen) { + t1[v] = zint_mod_small_signed(x, slen, p, p0i, R2, Rx); + } + modp_NTT2(t1, gm, logn, p, p0i); + for (v = 0, x = gd + u; v < hn; v ++, x += tlen) { + uint32_t w0, w1; + + w0 = t1[(v << 1) + 0]; + w1 = t1[(v << 1) + 1]; + *x = modp_montymul( + modp_montymul(w0, w1, p, p0i), R2, p, p0i); + } + + if (!out_ntt) { + modp_iNTT2_ext(fd + u, tlen, igm, logn - 1, p, p0i); + modp_iNTT2_ext(gd + u, tlen, igm, logn - 1, p, p0i); + } + } +} + +/* + * Compute f and g at a specific depth, in RNS notation. + * + * Returned values are stored in the data[] array, at slen words per integer. + * + * Conditions: + * 0 <= depth <= logn + * + * Space use in data[]: enough room for any two successive values (f', g', + * f and g). + */ +static void +make_fg(uint32_t *data, const int8_t *f, const int8_t *g, + unsigned logn, unsigned depth, int out_ntt) { + size_t n, u; + uint32_t *ft, *gt, p0; + unsigned d; + const small_prime *primes; + + n = MKN(logn); + ft = data; + gt = ft + n; + primes = PRIMES; + p0 = primes[0].p; + for (u = 0; u < n; u ++) { + ft[u] = modp_set(f[u], p0); + gt[u] = modp_set(g[u], p0); + } + + if (depth == 0 && out_ntt) { + uint32_t *gm, *igm; + uint32_t p, p0i; + + p = primes[0].p; + p0i = modp_ninv31(p); + gm = gt + n; + igm = gm + MKN(logn); + modp_mkgm2(gm, igm, logn, primes[0].g, p, p0i); + modp_NTT2(ft, gm, logn, p, p0i); + modp_NTT2(gt, gm, logn, p, p0i); + return; + } + + if (depth == 0) { + return; + } + if (depth == 1) { + make_fg_step(data, logn, 0, 0, out_ntt); + return; + } + make_fg_step(data, logn, 0, 0, 1); + for (d = 1; d + 1 < depth; d ++) { + make_fg_step(data, logn - d, d, 1, 1); + } + make_fg_step(data, logn - depth + 1, depth - 1, 1, out_ntt); +} + +/* + * Solving the NTRU equation, deepest level: compute the resultants of + * f and g with X^N+1, and use binary GCD. The F and G values are + * returned in tmp[]. + * + * Returned value: 1 on success, 0 on error. + */ +static int +solve_NTRU_deepest(unsigned logn_top, + const int8_t *f, const int8_t *g, uint32_t *tmp) { + size_t len; + uint32_t *Fp, *Gp, *fp, *gp, *t1, q; + const small_prime *primes; + + len = MAX_BL_SMALL[logn_top]; + primes = PRIMES; + + Fp = tmp; + Gp = Fp + len; + fp = Gp + len; + gp = fp + len; + t1 = gp + len; + + make_fg(fp, f, g, logn_top, logn_top, 0); + + /* + * We use the CRT to rebuild the resultants as big integers. + * There are two such big integers. The resultants are always + * nonnegative. + */ + zint_rebuild_CRT(fp, len, len, 2, primes, 0, t1); + + /* + * Apply the binary GCD. The zint_bezout() function works only + * if both inputs are odd. + * + * We can test on the result and return 0 because that would + * imply failure of the NTRU solving equation, and the (f,g) + * values will be abandoned in that case. + */ + if (!zint_bezout(Gp, Fp, fp, gp, len, t1)) { + return 0; + } + + /* + * Multiply the two values by the target value q. Values must + * fit in the destination arrays. + * We can again test on the returned words: a non-zero output + * of zint_mul_small() means that we exceeded our array + * capacity, and that implies failure and rejection of (f,g). + */ + q = 12289; + if (zint_mul_small(Fp, len, q) != 0 + || zint_mul_small(Gp, len, q) != 0) { + return 0; + } + + return 1; +} + +/* + * Solving the NTRU equation, intermediate level. Upon entry, the F and G + * from the previous level should be in the tmp[] array. + * This function MAY be invoked for the top-level (in which case depth = 0). + * + * Returned value: 1 on success, 0 on error. + */ +static int +solve_NTRU_intermediate(unsigned logn_top, + const int8_t *f, const int8_t *g, unsigned depth, uint32_t *tmp) { + /* + * In this function, 'logn' is the log2 of the degree for + * this step. If N = 2^logn, then: + * - the F and G values already in fk->tmp (from the deeper + * levels) have degree N/2; + * - this function should return F and G of degree N. + */ + unsigned logn; + size_t n, hn, slen, dlen, llen, rlen, FGlen, u; + uint32_t *Fd, *Gd, *Ft, *Gt, *ft, *gt, *t1; + fpr *rt1, *rt2, *rt3, *rt4, *rt5; + int scale_fg, minbl_fg, maxbl_fg, maxbl_FG, scale_k; + uint32_t *x, *y; + int32_t *k; + const small_prime *primes; + + logn = logn_top - depth; + n = (size_t)1 << logn; + hn = n >> 1; + + /* + * slen = size for our input f and g; also size of the reduced + * F and G we return (degree N) + * + * dlen = size of the F and G obtained from the deeper level + * (degree N/2 or N/3) + * + * llen = size for intermediary F and G before reduction (degree N) + * + * We build our non-reduced F and G as two independent halves each, + * of degree N/2 (F = F0 + X*F1, G = G0 + X*G1). + */ + slen = MAX_BL_SMALL[depth]; + dlen = MAX_BL_SMALL[depth + 1]; + llen = MAX_BL_LARGE[depth]; + primes = PRIMES; + + /* + * Fd and Gd are the F and G from the deeper level. + */ + Fd = tmp; + Gd = Fd + dlen * hn; + + /* + * Compute the input f and g for this level. Note that we get f + * and g in RNS + NTT representation. + */ + ft = Gd + dlen * hn; + make_fg(ft, f, g, logn_top, depth, 1); + + /* + * Move the newly computed f and g to make room for our candidate + * F and G (unreduced). + */ + Ft = tmp; + Gt = Ft + n * llen; + t1 = Gt + n * llen; + memmove(t1, ft, 2 * n * slen * sizeof * ft); + ft = t1; + gt = ft + slen * n; + t1 = gt + slen * n; + + /* + * Move Fd and Gd _after_ f and g. + */ + memmove(t1, Fd, 2 * hn * dlen * sizeof * Fd); + Fd = t1; + Gd = Fd + hn * dlen; + + /* + * We reduce Fd and Gd modulo all the small primes we will need, + * and store the values in Ft and Gt (only n/2 values in each). + */ + for (u = 0; u < llen; u ++) { + uint32_t p, p0i, R2, Rx; + size_t v; + uint32_t *xs, *ys, *xd, *yd; + + p = primes[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + Rx = modp_Rx((unsigned)dlen, p, p0i, R2); + for (v = 0, xs = Fd, ys = Gd, xd = Ft + u, yd = Gt + u; + v < hn; + v ++, xs += dlen, ys += dlen, xd += llen, yd += llen) { + *xd = zint_mod_small_signed(xs, dlen, p, p0i, R2, Rx); + *yd = zint_mod_small_signed(ys, dlen, p, p0i, R2, Rx); + } + } + + /* + * We do not need Fd and Gd after that point. + */ + + /* + * Compute our F and G modulo sufficiently many small primes. + */ + for (u = 0; u < llen; u ++) { + uint32_t p, p0i, R2; + uint32_t *gm, *igm, *fx, *gx, *Fp, *Gp; + size_t v; + + /* + * All computations are done modulo p. + */ + p = primes[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + + /* + * If we processed slen words, then f and g have been + * de-NTTized, and are in RNS; we can rebuild them. + */ + if (u == slen) { + zint_rebuild_CRT(ft, slen, slen, n, primes, 1, t1); + zint_rebuild_CRT(gt, slen, slen, n, primes, 1, t1); + } + + gm = t1; + igm = gm + n; + fx = igm + n; + gx = fx + n; + + modp_mkgm2(gm, igm, logn, primes[u].g, p, p0i); + + if (u < slen) { + for (v = 0, x = ft + u, y = gt + u; + v < n; v ++, x += slen, y += slen) { + fx[v] = *x; + gx[v] = *y; + } + modp_iNTT2_ext(ft + u, slen, igm, logn, p, p0i); + modp_iNTT2_ext(gt + u, slen, igm, logn, p, p0i); + } else { + uint32_t Rx; + + Rx = modp_Rx((unsigned)slen, p, p0i, R2); + for (v = 0, x = ft, y = gt; + v < n; v ++, x += slen, y += slen) { + fx[v] = zint_mod_small_signed(x, slen, + p, p0i, R2, Rx); + gx[v] = zint_mod_small_signed(y, slen, + p, p0i, R2, Rx); + } + modp_NTT2(fx, gm, logn, p, p0i); + modp_NTT2(gx, gm, logn, p, p0i); + } + + /* + * Get F' and G' modulo p and in NTT representation + * (they have degree n/2). These values were computed in + * a previous step, and stored in Ft and Gt. + */ + Fp = gx + n; + Gp = Fp + hn; + for (v = 0, x = Ft + u, y = Gt + u; + v < hn; v ++, x += llen, y += llen) { + Fp[v] = *x; + Gp[v] = *y; + } + modp_NTT2(Fp, gm, logn - 1, p, p0i); + modp_NTT2(Gp, gm, logn - 1, p, p0i); + + /* + * Compute our F and G modulo p. + * + * General case: + * + * we divide degree by d = 2 or 3 + * f'(x^d) = N(f)(x^d) = f * adj(f) + * g'(x^d) = N(g)(x^d) = g * adj(g) + * f'*G' - g'*F' = q + * F = F'(x^d) * adj(g) + * G = G'(x^d) * adj(f) + * + * We compute things in the NTT. We group roots of phi + * such that all roots x in a group share the same x^d. + * If the roots in a group are x_1, x_2... x_d, then: + * + * N(f)(x_1^d) = f(x_1)*f(x_2)*...*f(x_d) + * + * Thus, we have: + * + * G(x_1) = f(x_2)*f(x_3)*...*f(x_d)*G'(x_1^d) + * G(x_2) = f(x_1)*f(x_3)*...*f(x_d)*G'(x_1^d) + * ... + * G(x_d) = f(x_1)*f(x_2)*...*f(x_{d-1})*G'(x_1^d) + * + * In all cases, we can thus compute F and G in NTT + * representation by a few simple multiplications. + * Moreover, in our chosen NTT representation, roots + * from the same group are consecutive in RAM. + */ + for (v = 0, x = Ft + u, y = Gt + u; v < hn; + v ++, x += (llen << 1), y += (llen << 1)) { + uint32_t ftA, ftB, gtA, gtB; + uint32_t mFp, mGp; + + ftA = fx[(v << 1) + 0]; + ftB = fx[(v << 1) + 1]; + gtA = gx[(v << 1) + 0]; + gtB = gx[(v << 1) + 1]; + mFp = modp_montymul(Fp[v], R2, p, p0i); + mGp = modp_montymul(Gp[v], R2, p, p0i); + x[0] = modp_montymul(gtB, mFp, p, p0i); + x[llen] = modp_montymul(gtA, mFp, p, p0i); + y[0] = modp_montymul(ftB, mGp, p, p0i); + y[llen] = modp_montymul(ftA, mGp, p, p0i); + } + modp_iNTT2_ext(Ft + u, llen, igm, logn, p, p0i); + modp_iNTT2_ext(Gt + u, llen, igm, logn, p, p0i); + } + + /* + * Rebuild F and G with the CRT. + */ + zint_rebuild_CRT(Ft, llen, llen, n, primes, 1, t1); + zint_rebuild_CRT(Gt, llen, llen, n, primes, 1, t1); + + /* + * At that point, Ft, Gt, ft and gt are consecutive in RAM (in that + * order). + */ + + /* + * Apply Babai reduction to bring back F and G to size slen. + * + * We use the FFT to compute successive approximations of the + * reduction coefficient. We first isolate the top bits of + * the coefficients of f and g, and convert them to floating + * point; with the FFT, we compute adj(f), adj(g), and + * 1/(f*adj(f)+g*adj(g)). + * + * Then, we repeatedly apply the following: + * + * - Get the top bits of the coefficients of F and G into + * floating point, and use the FFT to compute: + * (F*adj(f)+G*adj(g))/(f*adj(f)+g*adj(g)) + * + * - Convert back that value into normal representation, and + * round it to the nearest integers, yielding a polynomial k. + * Proper scaling is applied to f, g, F and G so that the + * coefficients fit on 32 bits (signed). + * + * - Subtract k*f from F and k*g from G. + * + * Under normal conditions, this process reduces the size of F + * and G by some bits at each iteration. For constant-time + * operation, we do not want to measure the actual length of + * F and G; instead, we do the following: + * + * - f and g are converted to floating-point, with some scaling + * if necessary to keep values in the representable range. + * + * - For each iteration, we _assume_ a maximum size for F and G, + * and use the values at that size. If we overreach, then + * we get zeros, which is harmless: the resulting coefficients + * of k will be 0 and the value won't be reduced. + * + * - We conservatively assume that F and G will be reduced by + * at least 25 bits at each iteration. + * + * Even when reaching the bottom of the reduction, reduction + * coefficient will remain low. If it goes out-of-range, then + * something wrong occurred and the whole NTRU solving fails. + */ + + /* + * Memory layout: + * - We need to compute and keep adj(f), adj(g), and + * 1/(f*adj(f)+g*adj(g)) (sizes N, N and N/2 fp numbers, + * respectively). + * - At each iteration we need two extra fp buffer (N fp values), + * and produce a k (N 32-bit words). k will be shared with one + * of the fp buffers. + * - To compute k*f and k*g efficiently (with the NTT), we need + * some extra room; we reuse the space of the temporary buffers. + * + * Arrays of 'fpr' are obtained from the temporary array itself. + * We ensure that the base is at a properly aligned offset (the + * source array tmp[] is supposed to be already aligned). + */ + + rt3 = align_fpr(tmp, t1); + rt4 = rt3 + n; + rt5 = rt4 + n; + rt1 = rt5 + (n >> 1); + k = (int32_t *)align_u32(tmp, rt1); + rt2 = align_fpr(tmp, k + n); + if (rt2 < (rt1 + n)) { + rt2 = rt1 + n; + } + t1 = (uint32_t *)k + n; + + /* + * Get f and g into rt3 and rt4 as floating-point approximations. + * + * We need to "scale down" the floating-point representation of + * coefficients when they are too big. We want to keep the value + * below 2^310 or so. Thus, when values are larger than 10 words, + * we consider only the top 10 words. Array lengths have been + * computed so that average maximum length will fall in the + * middle or the upper half of these top 10 words. + */ + rlen = slen; + if (rlen > 10) { + rlen = 10; + } + poly_big_to_fp(rt3, ft + slen - rlen, rlen, slen, logn); + poly_big_to_fp(rt4, gt + slen - rlen, rlen, slen, logn); + + /* + * Values in rt3 and rt4 are downscaled by 2^(scale_fg). + */ + scale_fg = 31 * (int)(slen - rlen); + + /* + * Estimated boundaries for the maximum size (in bits) of the + * coefficients of (f,g). We use the measured average, and + * allow for a deviation of at most six times the standard + * deviation. + */ + minbl_fg = BITLENGTH[depth].avg - 6 * BITLENGTH[depth].std; + maxbl_fg = BITLENGTH[depth].avg + 6 * BITLENGTH[depth].std; + + /* + * Compute 1/(f*adj(f)+g*adj(g)) in rt5. We also keep adj(f) + * and adj(g) in rt3 and rt4, respectively. + */ + PQCLEAN_FALCON1024_CLEAN_FFT(rt3, logn); + PQCLEAN_FALCON1024_CLEAN_FFT(rt4, logn); + PQCLEAN_FALCON1024_CLEAN_poly_invnorm2_fft(rt5, rt3, rt4, logn); + PQCLEAN_FALCON1024_CLEAN_poly_adj_fft(rt3, logn); + PQCLEAN_FALCON1024_CLEAN_poly_adj_fft(rt4, logn); + + /* + * Reduce F and G repeatedly. + * + * The expected maximum bit length of coefficients of F and G + * is kept in maxbl_FG, with the corresponding word length in + * FGlen. + */ + FGlen = llen; + maxbl_FG = 31 * (int)llen; + + /* + * Each reduction operation computes the reduction polynomial + * "k". We need that polynomial to have coefficients that fit + * on 32-bit signed integers, with some scaling; thus, we use + * a descending sequence of scaling values, down to zero. + * + * The size of the coefficients of k is (roughly) the difference + * between the size of the coefficients of (F,G) and the size + * of the coefficients of (f,g). Thus, the maximum size of the + * coefficients of k is, at the start, maxbl_FG - minbl_fg; + * this is our starting scale value for k. + * + * We need to estimate the size of (F,G) during the execution of + * the algorithm; we are allowed some overestimation but not too + * much (poly_big_to_fp() uses a 310-bit window). Generally + * speaking, after applying a reduction with k scaled to + * scale_k, the size of (F,G) will be size(f,g) + scale_k + dd, + * where 'dd' is a few bits to account for the fact that the + * reduction is never perfect (intuitively, dd is on the order + * of sqrt(N), so at most 5 bits; we here allow for 10 extra + * bits). + * + * The size of (f,g) is not known exactly, but maxbl_fg is an + * upper bound. + */ + scale_k = maxbl_FG - minbl_fg; + + for (;;) { + int scale_FG, dc, new_maxbl_FG; + uint32_t scl, sch; + fpr pdc, pt; + + /* + * Convert current F and G into floating-point. We apply + * scaling if the current length is more than 10 words. + */ + rlen = FGlen; + if (rlen > 10) { + rlen = 10; + } + scale_FG = 31 * (int)(FGlen - rlen); + poly_big_to_fp(rt1, Ft + FGlen - rlen, rlen, llen, logn); + poly_big_to_fp(rt2, Gt + FGlen - rlen, rlen, llen, logn); + + /* + * Compute (F*adj(f)+G*adj(g))/(f*adj(f)+g*adj(g)) in rt2. + */ + PQCLEAN_FALCON1024_CLEAN_FFT(rt1, logn); + PQCLEAN_FALCON1024_CLEAN_FFT(rt2, logn); + PQCLEAN_FALCON1024_CLEAN_poly_mul_fft(rt1, rt3, logn); + PQCLEAN_FALCON1024_CLEAN_poly_mul_fft(rt2, rt4, logn); + PQCLEAN_FALCON1024_CLEAN_poly_add(rt2, rt1, logn); + PQCLEAN_FALCON1024_CLEAN_poly_mul_autoadj_fft(rt2, rt5, logn); + PQCLEAN_FALCON1024_CLEAN_iFFT(rt2, logn); + + /* + * (f,g) are scaled by 'scale_fg', meaning that the + * numbers in rt3/rt4 should be multiplied by 2^(scale_fg) + * to have their true mathematical value. + * + * (F,G) are similarly scaled by 'scale_FG'. Therefore, + * the value we computed in rt2 is scaled by + * 'scale_FG-scale_fg'. + * + * We want that value to be scaled by 'scale_k', hence we + * apply a corrective scaling. After scaling, the values + * should fit in -2^31-1..+2^31-1. + */ + dc = scale_k - scale_FG + scale_fg; + + /* + * We will need to multiply values by 2^(-dc). The value + * 'dc' is not secret, so we can compute 2^(-dc) with a + * non-constant-time process. + * (We could use ldexp(), but we prefer to avoid any + * dependency on libm. When using FP emulation, we could + * use our fpr_ldexp(), which is constant-time.) + */ + if (dc < 0) { + dc = -dc; + pt = fpr_two; + } else { + pt = fpr_onehalf; + } + pdc = fpr_one; + while (dc != 0) { + if ((dc & 1) != 0) { + pdc = fpr_mul(pdc, pt); + } + dc >>= 1; + pt = fpr_sqr(pt); + } + + for (u = 0; u < n; u ++) { + fpr xv; + + xv = fpr_mul(rt2[u], pdc); + + /* + * Sometimes the values can be out-of-bounds if + * the algorithm fails; we must not call + * fpr_rint() (and cast to int32_t) if the value + * is not in-bounds. Note that the test does not + * break constant-time discipline, since any + * failure here implies that we discard the current + * secret key (f,g). + */ + if (!fpr_lt(fpr_mtwo31m1, xv) + || !fpr_lt(xv, fpr_ptwo31m1)) { + return 0; + } + k[u] = (int32_t)fpr_rint(xv); + } + + /* + * Values in k[] are integers. They really are scaled + * down by maxbl_FG - minbl_fg bits. + * + * If we are at low depth, then we use the NTT to + * compute k*f and k*g. + */ + sch = (uint32_t)(scale_k / 31); + scl = (uint32_t)(scale_k % 31); + if (depth <= DEPTH_INT_FG) { + poly_sub_scaled_ntt(Ft, FGlen, llen, ft, slen, slen, + k, sch, scl, logn, t1); + poly_sub_scaled_ntt(Gt, FGlen, llen, gt, slen, slen, + k, sch, scl, logn, t1); + } else { + poly_sub_scaled(Ft, FGlen, llen, ft, slen, slen, + k, sch, scl, logn); + poly_sub_scaled(Gt, FGlen, llen, gt, slen, slen, + k, sch, scl, logn); + } + + /* + * We compute the new maximum size of (F,G), assuming that + * (f,g) has _maximal_ length (i.e. that reduction is + * "late" instead of "early". We also adjust FGlen + * accordingly. + */ + new_maxbl_FG = scale_k + maxbl_fg + 10; + if (new_maxbl_FG < maxbl_FG) { + maxbl_FG = new_maxbl_FG; + if ((int)FGlen * 31 >= maxbl_FG + 31) { + FGlen --; + } + } + + /* + * We suppose that scaling down achieves a reduction by + * at least 25 bits per iteration. We stop when we have + * done the loop with an unscaled k. + */ + if (scale_k <= 0) { + break; + } + scale_k -= 25; + if (scale_k < 0) { + scale_k = 0; + } + } + + /* + * If (F,G) length was lowered below 'slen', then we must take + * care to re-extend the sign. + */ + if (FGlen < slen) { + for (u = 0; u < n; u ++, Ft += llen, Gt += llen) { + size_t v; + uint32_t sw; + + sw = -(Ft[FGlen - 1] >> 30) >> 1; + for (v = FGlen; v < slen; v ++) { + Ft[v] = sw; + } + sw = -(Gt[FGlen - 1] >> 30) >> 1; + for (v = FGlen; v < slen; v ++) { + Gt[v] = sw; + } + } + } + + /* + * Compress encoding of all values to 'slen' words (this is the + * expected output format). + */ + for (u = 0, x = tmp, y = tmp; + u < (n << 1); u ++, x += slen, y += llen) { + memmove(x, y, slen * sizeof * y); + } + return 1; +} + +/* + * Solving the NTRU equation, binary case, depth = 1. Upon entry, the + * F and G from the previous level should be in the tmp[] array. + * + * Returned value: 1 on success, 0 on error. + */ +static int +solve_NTRU_binary_depth1(unsigned logn_top, + const int8_t *f, const int8_t *g, uint32_t *tmp) { + /* + * The first half of this function is a copy of the corresponding + * part in solve_NTRU_intermediate(), for the reconstruction of + * the unreduced F and G. The second half (Babai reduction) is + * done differently, because the unreduced F and G fit in 53 bits + * of precision, allowing a much simpler process with lower RAM + * usage. + */ + unsigned depth, logn; + size_t n_top, n, hn, slen, dlen, llen, u; + uint32_t *Fd, *Gd, *Ft, *Gt, *ft, *gt, *t1; + fpr *rt1, *rt2, *rt3, *rt4, *rt5, *rt6; + uint32_t *x, *y; + + depth = 1; + n_top = (size_t)1 << logn_top; + logn = logn_top - depth; + n = (size_t)1 << logn; + hn = n >> 1; + + /* + * Equations are: + * + * f' = f0^2 - X^2*f1^2 + * g' = g0^2 - X^2*g1^2 + * F' and G' are a solution to f'G' - g'F' = q (from deeper levels) + * F = F'*(g0 - X*g1) + * G = G'*(f0 - X*f1) + * + * f0, f1, g0, g1, f', g', F' and G' are all "compressed" to + * degree N/2 (their odd-indexed coefficients are all zero). + */ + + /* + * slen = size for our input f and g; also size of the reduced + * F and G we return (degree N) + * + * dlen = size of the F and G obtained from the deeper level + * (degree N/2) + * + * llen = size for intermediary F and G before reduction (degree N) + * + * We build our non-reduced F and G as two independent halves each, + * of degree N/2 (F = F0 + X*F1, G = G0 + X*G1). + */ + slen = MAX_BL_SMALL[depth]; + dlen = MAX_BL_SMALL[depth + 1]; + llen = MAX_BL_LARGE[depth]; + + /* + * Fd and Gd are the F and G from the deeper level. Ft and Gt + * are the destination arrays for the unreduced F and G. + */ + Fd = tmp; + Gd = Fd + dlen * hn; + Ft = Gd + dlen * hn; + Gt = Ft + llen * n; + + /* + * We reduce Fd and Gd modulo all the small primes we will need, + * and store the values in Ft and Gt. + */ + for (u = 0; u < llen; u ++) { + uint32_t p, p0i, R2, Rx; + size_t v; + uint32_t *xs, *ys, *xd, *yd; + + p = PRIMES[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + Rx = modp_Rx((unsigned)dlen, p, p0i, R2); + for (v = 0, xs = Fd, ys = Gd, xd = Ft + u, yd = Gt + u; + v < hn; + v ++, xs += dlen, ys += dlen, xd += llen, yd += llen) { + *xd = zint_mod_small_signed(xs, dlen, p, p0i, R2, Rx); + *yd = zint_mod_small_signed(ys, dlen, p, p0i, R2, Rx); + } + } + + /* + * Now Fd and Gd are not needed anymore; we can squeeze them out. + */ + memmove(tmp, Ft, llen * n * sizeof(uint32_t)); + Ft = tmp; + memmove(Ft + llen * n, Gt, llen * n * sizeof(uint32_t)); + Gt = Ft + llen * n; + ft = Gt + llen * n; + gt = ft + slen * n; + + t1 = gt + slen * n; + + /* + * Compute our F and G modulo sufficiently many small primes. + */ + for (u = 0; u < llen; u ++) { + uint32_t p, p0i, R2; + uint32_t *gm, *igm, *fx, *gx, *Fp, *Gp; + unsigned e; + size_t v; + + /* + * All computations are done modulo p. + */ + p = PRIMES[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + + /* + * We recompute things from the source f and g, of full + * degree. However, we will need only the n first elements + * of the inverse NTT table (igm); the call to modp_mkgm() + * below will fill n_top elements in igm[] (thus overflowing + * into fx[]) but later code will overwrite these extra + * elements. + */ + gm = t1; + igm = gm + n_top; + fx = igm + n; + gx = fx + n_top; + modp_mkgm2(gm, igm, logn_top, PRIMES[u].g, p, p0i); + + /* + * Set ft and gt to f and g modulo p, respectively. + */ + for (v = 0; v < n_top; v ++) { + fx[v] = modp_set(f[v], p); + gx[v] = modp_set(g[v], p); + } + + /* + * Convert to NTT and compute our f and g. + */ + modp_NTT2(fx, gm, logn_top, p, p0i); + modp_NTT2(gx, gm, logn_top, p, p0i); + for (e = logn_top; e > logn; e --) { + modp_poly_rec_res(fx, e, p, p0i, R2); + modp_poly_rec_res(gx, e, p, p0i, R2); + } + + /* + * From that point onward, we only need tables for + * degree n, so we can save some space. + */ + if (depth > 0) { /* always true */ + memmove(gm + n, igm, n * sizeof * igm); + igm = gm + n; + memmove(igm + n, fx, n * sizeof * ft); + fx = igm + n; + memmove(fx + n, gx, n * sizeof * gt); + gx = fx + n; + } + + /* + * Get F' and G' modulo p and in NTT representation + * (they have degree n/2). These values were computed + * in a previous step, and stored in Ft and Gt. + */ + Fp = gx + n; + Gp = Fp + hn; + for (v = 0, x = Ft + u, y = Gt + u; + v < hn; v ++, x += llen, y += llen) { + Fp[v] = *x; + Gp[v] = *y; + } + modp_NTT2(Fp, gm, logn - 1, p, p0i); + modp_NTT2(Gp, gm, logn - 1, p, p0i); + + /* + * Compute our F and G modulo p. + * + * Equations are: + * + * f'(x^2) = N(f)(x^2) = f * adj(f) + * g'(x^2) = N(g)(x^2) = g * adj(g) + * + * f'*G' - g'*F' = q + * + * F = F'(x^2) * adj(g) + * G = G'(x^2) * adj(f) + * + * The NTT representation of f is f(w) for all w which + * are roots of phi. In the binary case, as well as in + * the ternary case for all depth except the deepest, + * these roots can be grouped in pairs (w,-w), and we + * then have: + * + * f(w) = adj(f)(-w) + * f(-w) = adj(f)(w) + * + * and w^2 is then a root for phi at the half-degree. + * + * At the deepest level in the ternary case, this still + * holds, in the following sense: the roots of x^2-x+1 + * are (w,-w^2) (for w^3 = -1, and w != -1), and we + * have: + * + * f(w) = adj(f)(-w^2) + * f(-w^2) = adj(f)(w) + * + * In all case, we can thus compute F and G in NTT + * representation by a few simple multiplications. + * Moreover, the two roots for each pair are consecutive + * in our bit-reversal encoding. + */ + for (v = 0, x = Ft + u, y = Gt + u; + v < hn; v ++, x += (llen << 1), y += (llen << 1)) { + uint32_t ftA, ftB, gtA, gtB; + uint32_t mFp, mGp; + + ftA = fx[(v << 1) + 0]; + ftB = fx[(v << 1) + 1]; + gtA = gx[(v << 1) + 0]; + gtB = gx[(v << 1) + 1]; + mFp = modp_montymul(Fp[v], R2, p, p0i); + mGp = modp_montymul(Gp[v], R2, p, p0i); + x[0] = modp_montymul(gtB, mFp, p, p0i); + x[llen] = modp_montymul(gtA, mFp, p, p0i); + y[0] = modp_montymul(ftB, mGp, p, p0i); + y[llen] = modp_montymul(ftA, mGp, p, p0i); + } + modp_iNTT2_ext(Ft + u, llen, igm, logn, p, p0i); + modp_iNTT2_ext(Gt + u, llen, igm, logn, p, p0i); + + /* + * Also save ft and gt (only up to size slen). + */ + if (u < slen) { + modp_iNTT2(fx, igm, logn, p, p0i); + modp_iNTT2(gx, igm, logn, p, p0i); + for (v = 0, x = ft + u, y = gt + u; + v < n; v ++, x += slen, y += slen) { + *x = fx[v]; + *y = gx[v]; + } + } + } + + /* + * Rebuild f, g, F and G with the CRT. Note that the elements of F + * and G are consecutive, and thus can be rebuilt in a single + * loop; similarly, the elements of f and g are consecutive. + */ + zint_rebuild_CRT(Ft, llen, llen, n << 1, PRIMES, 1, t1); + zint_rebuild_CRT(ft, slen, slen, n << 1, PRIMES, 1, t1); + + /* + * Here starts the Babai reduction, specialized for depth = 1. + * + * Candidates F and G (from Ft and Gt), and base f and g (ft and gt), + * are converted to floating point. There is no scaling, and a + * single pass is sufficient. + */ + + /* + * Convert F and G into floating point (rt1 and rt2). + */ + rt1 = align_fpr(tmp, gt + slen * n); + rt2 = rt1 + n; + poly_big_to_fp(rt1, Ft, llen, llen, logn); + poly_big_to_fp(rt2, Gt, llen, llen, logn); + + /* + * Integer representation of F and G is no longer needed, we + * can remove it. + */ + memmove(tmp, ft, 2 * slen * n * sizeof * ft); + ft = tmp; + gt = ft + slen * n; + rt3 = align_fpr(tmp, gt + slen * n); + memmove(rt3, rt1, 2 * n * sizeof * rt1); + rt1 = rt3; + rt2 = rt1 + n; + rt3 = rt2 + n; + rt4 = rt3 + n; + + /* + * Convert f and g into floating point (rt3 and rt4). + */ + poly_big_to_fp(rt3, ft, slen, slen, logn); + poly_big_to_fp(rt4, gt, slen, slen, logn); + + /* + * Remove unneeded ft and gt. + */ + memmove(tmp, rt1, 4 * n * sizeof * rt1); + rt1 = (fpr *)tmp; + rt2 = rt1 + n; + rt3 = rt2 + n; + rt4 = rt3 + n; + + /* + * We now have: + * rt1 = F + * rt2 = G + * rt3 = f + * rt4 = g + * in that order in RAM. We convert all of them to FFT. + */ + PQCLEAN_FALCON1024_CLEAN_FFT(rt1, logn); + PQCLEAN_FALCON1024_CLEAN_FFT(rt2, logn); + PQCLEAN_FALCON1024_CLEAN_FFT(rt3, logn); + PQCLEAN_FALCON1024_CLEAN_FFT(rt4, logn); + + /* + * Compute: + * rt5 = F*adj(f) + G*adj(g) + * rt6 = 1 / (f*adj(f) + g*adj(g)) + * (Note that rt6 is half-length.) + */ + rt5 = rt4 + n; + rt6 = rt5 + n; + PQCLEAN_FALCON1024_CLEAN_poly_add_muladj_fft(rt5, rt1, rt2, rt3, rt4, logn); + PQCLEAN_FALCON1024_CLEAN_poly_invnorm2_fft(rt6, rt3, rt4, logn); + + /* + * Compute: + * rt5 = (F*adj(f)+G*adj(g)) / (f*adj(f)+g*adj(g)) + */ + PQCLEAN_FALCON1024_CLEAN_poly_mul_autoadj_fft(rt5, rt6, logn); + + /* + * Compute k as the rounded version of rt5. Check that none of + * the values is larger than 2^63-1 (in absolute value) + * because that would make the fpr_rint() do something undefined; + * note that any out-of-bounds value here implies a failure and + * (f,g) will be discarded, so we can make a simple test. + */ + PQCLEAN_FALCON1024_CLEAN_iFFT(rt5, logn); + for (u = 0; u < n; u ++) { + fpr z; + + z = rt5[u]; + if (!fpr_lt(z, fpr_ptwo63m1) || !fpr_lt(fpr_mtwo63m1, z)) { + return 0; + } + rt5[u] = fpr_of(fpr_rint(z)); + } + PQCLEAN_FALCON1024_CLEAN_FFT(rt5, logn); + + /* + * Subtract k*f from F, and k*g from G. + */ + PQCLEAN_FALCON1024_CLEAN_poly_mul_fft(rt3, rt5, logn); + PQCLEAN_FALCON1024_CLEAN_poly_mul_fft(rt4, rt5, logn); + PQCLEAN_FALCON1024_CLEAN_poly_sub(rt1, rt3, logn); + PQCLEAN_FALCON1024_CLEAN_poly_sub(rt2, rt4, logn); + PQCLEAN_FALCON1024_CLEAN_iFFT(rt1, logn); + PQCLEAN_FALCON1024_CLEAN_iFFT(rt2, logn); + + /* + * Convert back F and G to integers, and return. + */ + Ft = tmp; + Gt = Ft + n; + rt3 = align_fpr(tmp, Gt + n); + memmove(rt3, rt1, 2 * n * sizeof * rt1); + rt1 = rt3; + rt2 = rt1 + n; + for (u = 0; u < n; u ++) { + Ft[u] = (uint32_t)fpr_rint(rt1[u]); + Gt[u] = (uint32_t)fpr_rint(rt2[u]); + } + + return 1; +} + +/* + * Solving the NTRU equation, top level. Upon entry, the F and G + * from the previous level should be in the tmp[] array. + * + * Returned value: 1 on success, 0 on error. + */ +static int +solve_NTRU_binary_depth0(unsigned logn, + const int8_t *f, const int8_t *g, uint32_t *tmp) { + size_t n, hn, u; + uint32_t p, p0i, R2; + uint32_t *Fp, *Gp, *t1, *t2, *t3, *t4, *t5; + uint32_t *gm, *igm, *ft, *gt; + fpr *rt2, *rt3; + + n = (size_t)1 << logn; + hn = n >> 1; + + /* + * Equations are: + * + * f' = f0^2 - X^2*f1^2 + * g' = g0^2 - X^2*g1^2 + * F' and G' are a solution to f'G' - g'F' = q (from deeper levels) + * F = F'*(g0 - X*g1) + * G = G'*(f0 - X*f1) + * + * f0, f1, g0, g1, f', g', F' and G' are all "compressed" to + * degree N/2 (their odd-indexed coefficients are all zero). + * + * Everything should fit in 31-bit integers, hence we can just use + * the first small prime p = 2147473409. + */ + p = PRIMES[0].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + + Fp = tmp; + Gp = Fp + hn; + ft = Gp + hn; + gt = ft + n; + gm = gt + n; + igm = gm + n; + + modp_mkgm2(gm, igm, logn, PRIMES[0].g, p, p0i); + + /* + * Convert F' anf G' in NTT representation. + */ + for (u = 0; u < hn; u ++) { + Fp[u] = modp_set(zint_one_to_plain(Fp + u), p); + Gp[u] = modp_set(zint_one_to_plain(Gp + u), p); + } + modp_NTT2(Fp, gm, logn - 1, p, p0i); + modp_NTT2(Gp, gm, logn - 1, p, p0i); + + /* + * Load f and g and convert them to NTT representation. + */ + for (u = 0; u < n; u ++) { + ft[u] = modp_set(f[u], p); + gt[u] = modp_set(g[u], p); + } + modp_NTT2(ft, gm, logn, p, p0i); + modp_NTT2(gt, gm, logn, p, p0i); + + /* + * Build the unreduced F,G in ft and gt. + */ + for (u = 0; u < n; u += 2) { + uint32_t ftA, ftB, gtA, gtB; + uint32_t mFp, mGp; + + ftA = ft[u + 0]; + ftB = ft[u + 1]; + gtA = gt[u + 0]; + gtB = gt[u + 1]; + mFp = modp_montymul(Fp[u >> 1], R2, p, p0i); + mGp = modp_montymul(Gp[u >> 1], R2, p, p0i); + ft[u + 0] = modp_montymul(gtB, mFp, p, p0i); + ft[u + 1] = modp_montymul(gtA, mFp, p, p0i); + gt[u + 0] = modp_montymul(ftB, mGp, p, p0i); + gt[u + 1] = modp_montymul(ftA, mGp, p, p0i); + } + modp_iNTT2(ft, igm, logn, p, p0i); + modp_iNTT2(gt, igm, logn, p, p0i); + + Gp = Fp + n; + t1 = Gp + n; + memmove(Fp, ft, 2 * n * sizeof * ft); + + /* + * We now need to apply the Babai reduction. At that point, + * we have F and G in two n-word arrays. + * + * We can compute F*adj(f)+G*adj(g) and f*adj(f)+g*adj(g) + * modulo p, using the NTT. We still move memory around in + * order to save RAM. + */ + t2 = t1 + n; + t3 = t2 + n; + t4 = t3 + n; + t5 = t4 + n; + + /* + * Compute the NTT tables in t1 and t2. We do not keep t2 + * (we'll recompute it later on). + */ + modp_mkgm2(t1, t2, logn, PRIMES[0].g, p, p0i); + + /* + * Convert F and G to NTT. + */ + modp_NTT2(Fp, t1, logn, p, p0i); + modp_NTT2(Gp, t1, logn, p, p0i); + + /* + * Load f and adj(f) in t4 and t5, and convert them to NTT + * representation. + */ + t4[0] = t5[0] = modp_set(f[0], p); + for (u = 1; u < n; u ++) { + t4[u] = modp_set(f[u], p); + t5[n - u] = modp_set(-f[u], p); + } + modp_NTT2(t4, t1, logn, p, p0i); + modp_NTT2(t5, t1, logn, p, p0i); + + /* + * Compute F*adj(f) in t2, and f*adj(f) in t3. + */ + for (u = 0; u < n; u ++) { + uint32_t w; + + w = modp_montymul(t5[u], R2, p, p0i); + t2[u] = modp_montymul(w, Fp[u], p, p0i); + t3[u] = modp_montymul(w, t4[u], p, p0i); + } + + /* + * Load g and adj(g) in t4 and t5, and convert them to NTT + * representation. + */ + t4[0] = t5[0] = modp_set(g[0], p); + for (u = 1; u < n; u ++) { + t4[u] = modp_set(g[u], p); + t5[n - u] = modp_set(-g[u], p); + } + modp_NTT2(t4, t1, logn, p, p0i); + modp_NTT2(t5, t1, logn, p, p0i); + + /* + * Add G*adj(g) to t2, and g*adj(g) to t3. + */ + for (u = 0; u < n; u ++) { + uint32_t w; + + w = modp_montymul(t5[u], R2, p, p0i); + t2[u] = modp_add(t2[u], + modp_montymul(w, Gp[u], p, p0i), p); + t3[u] = modp_add(t3[u], + modp_montymul(w, t4[u], p, p0i), p); + } + + /* + * Convert back t2 and t3 to normal representation (normalized + * around 0), and then + * move them to t1 and t2. We first need to recompute the + * inverse table for NTT. + */ + modp_mkgm2(t1, t4, logn, PRIMES[0].g, p, p0i); + modp_iNTT2(t2, t4, logn, p, p0i); + modp_iNTT2(t3, t4, logn, p, p0i); + for (u = 0; u < n; u ++) { + t1[u] = (uint32_t)modp_norm(t2[u], p); + t2[u] = (uint32_t)modp_norm(t3[u], p); + } + + /* + * At that point, array contents are: + * + * F (NTT representation) (Fp) + * G (NTT representation) (Gp) + * F*adj(f)+G*adj(g) (t1) + * f*adj(f)+g*adj(g) (t2) + * + * We want to divide t1 by t2. The result is not integral; it + * must be rounded. We thus need to use the FFT. + */ + + /* + * Get f*adj(f)+g*adj(g) in FFT representation. Since this + * polynomial is auto-adjoint, all its coordinates in FFT + * representation are actually real, so we can truncate off + * the imaginary parts. + */ + rt3 = align_fpr(tmp, t3); + for (u = 0; u < n; u ++) { + rt3[u] = fpr_of(((int32_t *)t2)[u]); + } + PQCLEAN_FALCON1024_CLEAN_FFT(rt3, logn); + rt2 = align_fpr(tmp, t2); + memmove(rt2, rt3, hn * sizeof * rt3); + + /* + * Convert F*adj(f)+G*adj(g) in FFT representation. + */ + rt3 = rt2 + hn; + for (u = 0; u < n; u ++) { + rt3[u] = fpr_of(((int32_t *)t1)[u]); + } + PQCLEAN_FALCON1024_CLEAN_FFT(rt3, logn); + + /* + * Compute (F*adj(f)+G*adj(g))/(f*adj(f)+g*adj(g)) and get + * its rounded normal representation in t1. + */ + PQCLEAN_FALCON1024_CLEAN_poly_div_autoadj_fft(rt3, rt2, logn); + PQCLEAN_FALCON1024_CLEAN_iFFT(rt3, logn); + for (u = 0; u < n; u ++) { + t1[u] = modp_set((int32_t)fpr_rint(rt3[u]), p); + } + + /* + * RAM contents are now: + * + * F (NTT representation) (Fp) + * G (NTT representation) (Gp) + * k (t1) + * + * We want to compute F-k*f, and G-k*g. + */ + t2 = t1 + n; + t3 = t2 + n; + t4 = t3 + n; + t5 = t4 + n; + modp_mkgm2(t2, t3, logn, PRIMES[0].g, p, p0i); + for (u = 0; u < n; u ++) { + t4[u] = modp_set(f[u], p); + t5[u] = modp_set(g[u], p); + } + modp_NTT2(t1, t2, logn, p, p0i); + modp_NTT2(t4, t2, logn, p, p0i); + modp_NTT2(t5, t2, logn, p, p0i); + for (u = 0; u < n; u ++) { + uint32_t kw; + + kw = modp_montymul(t1[u], R2, p, p0i); + Fp[u] = modp_sub(Fp[u], + modp_montymul(kw, t4[u], p, p0i), p); + Gp[u] = modp_sub(Gp[u], + modp_montymul(kw, t5[u], p, p0i), p); + } + modp_iNTT2(Fp, t3, logn, p, p0i); + modp_iNTT2(Gp, t3, logn, p, p0i); + for (u = 0; u < n; u ++) { + Fp[u] = (uint32_t)modp_norm(Fp[u], p); + Gp[u] = (uint32_t)modp_norm(Gp[u], p); + } + + return 1; +} + +/* + * Solve the NTRU equation. Returned value is 1 on success, 0 on error. + * G can be NULL, in which case that value is computed but not returned. + * If any of the coefficients of F and G exceeds lim (in absolute value), + * then 0 is returned. + */ +static int +solve_NTRU(unsigned logn, int8_t *F, int8_t *G, + const int8_t *f, const int8_t *g, int lim, uint32_t *tmp) { + size_t n, u; + uint32_t *ft, *gt, *Ft, *Gt, *gm; + uint32_t p, p0i, r; + const small_prime *primes; + + n = MKN(logn); + + if (!solve_NTRU_deepest(logn, f, g, tmp)) { + return 0; + } + + /* + * For logn <= 2, we need to use solve_NTRU_intermediate() + * directly, because coefficients are a bit too large and + * do not fit the hypotheses in solve_NTRU_binary_depth0(). + */ + if (logn <= 2) { + unsigned depth; + + depth = logn; + while (depth -- > 0) { + if (!solve_NTRU_intermediate(logn, f, g, depth, tmp)) { + return 0; + } + } + } else { + unsigned depth; + + depth = logn; + while (depth -- > 2) { + if (!solve_NTRU_intermediate(logn, f, g, depth, tmp)) { + return 0; + } + } + if (!solve_NTRU_binary_depth1(logn, f, g, tmp)) { + return 0; + } + if (!solve_NTRU_binary_depth0(logn, f, g, tmp)) { + return 0; + } + } + + /* + * If no buffer has been provided for G, use a temporary one. + */ + if (G == NULL) { + G = (int8_t *)(tmp + 2 * n); + } + + /* + * Final F and G are in fk->tmp, one word per coefficient + * (signed value over 31 bits). + */ + if (!poly_big_to_small(F, tmp, lim, logn) + || !poly_big_to_small(G, tmp + n, lim, logn)) { + return 0; + } + + /* + * Verify that the NTRU equation is fulfilled. Since all elements + * have short lengths, verifying modulo a small prime p works, and + * allows using the NTT. + * + * We put Gt[] first in tmp[], and process it first, so that it does + * not overlap with G[] in case we allocated it ourselves. + */ + Gt = tmp; + ft = Gt + n; + gt = ft + n; + Ft = gt + n; + gm = Ft + n; + + primes = PRIMES; + p = primes[0].p; + p0i = modp_ninv31(p); + modp_mkgm2(gm, tmp, logn, primes[0].g, p, p0i); + for (u = 0; u < n; u ++) { + Gt[u] = modp_set(G[u], p); + } + for (u = 0; u < n; u ++) { + ft[u] = modp_set(f[u], p); + gt[u] = modp_set(g[u], p); + Ft[u] = modp_set(F[u], p); + } + modp_NTT2(ft, gm, logn, p, p0i); + modp_NTT2(gt, gm, logn, p, p0i); + modp_NTT2(Ft, gm, logn, p, p0i); + modp_NTT2(Gt, gm, logn, p, p0i); + r = modp_montymul(12289, 1, p, p0i); + for (u = 0; u < n; u ++) { + uint32_t z; + + z = modp_sub(modp_montymul(ft[u], Gt[u], p, p0i), + modp_montymul(gt[u], Ft[u], p, p0i), p); + if (z != r) { + return 0; + } + } + + return 1; +} + +/* + * Generate a random polynomial with a Gaussian distribution. This function + * also makes sure that the resultant of the polynomial with phi is odd. + */ +static void +poly_small_mkgauss(RNG_CONTEXT *rng, int8_t *f, unsigned logn) { + size_t n, u; + unsigned mod2; + + n = MKN(logn); + mod2 = 0; + for (u = 0; u < n; u ++) { + int s; + +restart: + s = mkgauss(rng, logn); + + /* + * We need the coefficient to fit within -127..+127; + * realistically, this is always the case except for + * the very low degrees (N = 2 or 4), for which there + * is no real security anyway. + */ + if (s < -127 || s > 127) { + goto restart; + } + + /* + * We need the sum of all coefficients to be 1; otherwise, + * the resultant of the polynomial with X^N+1 will be even, + * and the binary GCD will fail. + */ + if (u == n - 1) { + if ((mod2 ^ (unsigned)(s & 1)) == 0) { + goto restart; + } + } else { + mod2 ^= (unsigned)(s & 1); + } + f[u] = (int8_t)s; + } +} + +/* see falcon.h */ +void +PQCLEAN_FALCON1024_CLEAN_keygen(inner_shake256_context *rng, + int8_t *f, int8_t *g, int8_t *F, int8_t *G, uint16_t *h, + unsigned logn, uint8_t *tmp) { + /* + * Algorithm is the following: + * + * - Generate f and g with the Gaussian distribution. + * + * - If either Res(f,phi) or Res(g,phi) is even, try again. + * + * - If ||(f,g)|| is too large, try again. + * + * - If ||B~_{f,g}|| is too large, try again. + * + * - If f is not invertible mod phi mod q, try again. + * + * - Compute h = g/f mod phi mod q. + * + * - Solve the NTRU equation fG - gF = q; if the solving fails, + * try again. Usual failure condition is when Res(f,phi) + * and Res(g,phi) are not prime to each other. + */ + size_t n, u; + uint16_t *h2, *tmp2; + RNG_CONTEXT *rc; + + n = MKN(logn); + rc = rng; + + /* + * We need to generate f and g randomly, until we find values + * such that the norm of (g,-f), and of the orthogonalized + * vector, are satisfying. The orthogonalized vector is: + * (q*adj(f)/(f*adj(f)+g*adj(g)), q*adj(g)/(f*adj(f)+g*adj(g))) + * (it is actually the (N+1)-th row of the Gram-Schmidt basis). + * + * In the binary case, coefficients of f and g are generated + * independently of each other, with a discrete Gaussian + * distribution of standard deviation 1.17*sqrt(q/(2*N)). Then, + * the two vectors have expected norm 1.17*sqrt(q), which is + * also our acceptance bound: we require both vectors to be no + * larger than that (this will be satisfied about 1/4th of the + * time, thus we expect sampling new (f,g) about 4 times for that + * step). + * + * We require that Res(f,phi) and Res(g,phi) are both odd (the + * NTRU equation solver requires it). + */ + for (;;) { + fpr *rt1, *rt2, *rt3; + fpr bnorm; + uint32_t normf, normg, norm; + int lim; + + /* + * The poly_small_mkgauss() function makes sure + * that the sum of coefficients is 1 modulo 2 + * (i.e. the resultant of the polynomial with phi + * will be odd). + */ + poly_small_mkgauss(rc, f, logn); + poly_small_mkgauss(rc, g, logn); + + /* + * Verify that all coefficients are within the bounds + * defined in max_fg_bits. This is the case with + * overwhelming probability; this guarantees that the + * key will be encodable with FALCON_COMP_TRIM. + */ + lim = 1 << (PQCLEAN_FALCON1024_CLEAN_max_fg_bits[logn] - 1); + for (u = 0; u < n; u ++) { + /* + * We can use non-CT tests since on any failure + * we will discard f and g. + */ + if (f[u] >= lim || f[u] <= -lim + || g[u] >= lim || g[u] <= -lim) { + lim = -1; + break; + } + } + if (lim < 0) { + continue; + } + + /* + * Bound is 1.17*sqrt(q). We compute the squared + * norms. With q = 12289, the squared bound is: + * (1.17^2)* 12289 = 16822.4121 + * Since f and g are integral, the squared norm + * of (g,-f) is an integer. + */ + normf = poly_small_sqnorm(f, logn); + normg = poly_small_sqnorm(g, logn); + norm = (normf + normg) | -((normf | normg) >> 31); + if (norm >= 16823) { + continue; + } + + /* + * We compute the orthogonalized vector norm. + */ + rt1 = (fpr *)tmp; + rt2 = rt1 + n; + rt3 = rt2 + n; + poly_small_to_fp(rt1, f, logn); + poly_small_to_fp(rt2, g, logn); + PQCLEAN_FALCON1024_CLEAN_FFT(rt1, logn); + PQCLEAN_FALCON1024_CLEAN_FFT(rt2, logn); + PQCLEAN_FALCON1024_CLEAN_poly_invnorm2_fft(rt3, rt1, rt2, logn); + PQCLEAN_FALCON1024_CLEAN_poly_adj_fft(rt1, logn); + PQCLEAN_FALCON1024_CLEAN_poly_adj_fft(rt2, logn); + PQCLEAN_FALCON1024_CLEAN_poly_mulconst(rt1, fpr_q, logn); + PQCLEAN_FALCON1024_CLEAN_poly_mulconst(rt2, fpr_q, logn); + PQCLEAN_FALCON1024_CLEAN_poly_mul_autoadj_fft(rt1, rt3, logn); + PQCLEAN_FALCON1024_CLEAN_poly_mul_autoadj_fft(rt2, rt3, logn); + PQCLEAN_FALCON1024_CLEAN_iFFT(rt1, logn); + PQCLEAN_FALCON1024_CLEAN_iFFT(rt2, logn); + bnorm = fpr_zero; + for (u = 0; u < n; u ++) { + bnorm = fpr_add(bnorm, fpr_sqr(rt1[u])); + bnorm = fpr_add(bnorm, fpr_sqr(rt2[u])); + } + if (!fpr_lt(bnorm, fpr_bnorm_max)) { + continue; + } + + /* + * Compute public key h = g/f mod X^N+1 mod q. If this + * fails, we must restart. + */ + if (h == NULL) { + h2 = (uint16_t *)tmp; + tmp2 = h2 + n; + } else { + h2 = h; + tmp2 = (uint16_t *)tmp; + } + if (!PQCLEAN_FALCON1024_CLEAN_compute_public(h2, f, g, logn, (uint8_t *)tmp2)) { + continue; + } + + /* + * Solve the NTRU equation to get F and G. + */ + lim = (1 << (PQCLEAN_FALCON1024_CLEAN_max_FG_bits[logn] - 1)) - 1; + if (!solve_NTRU(logn, F, G, f, g, lim, (uint32_t *)tmp)) { + continue; + } + + /* + * Key pair is generated. + */ + break; + } +} diff --git a/crypto_sign/falcon/falcon-1024/clean/pqclean.c b/crypto_sign/falcon/falcon-1024/clean/pqclean.c new file mode 100644 index 00000000..487efd52 --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/clean/pqclean.c @@ -0,0 +1,386 @@ +#include "api.h" +#include "inner.h" +#include "randombytes.h" +#include +#include +/* + * Wrapper for implementing the PQClean API. + */ + + + +#define NONCELEN 40 +#define SEEDLEN 48 + +/* + * Encoding formats (nnnn = log of degree, 9 for Falcon-512, 10 for Falcon-1024) + * + * private key: + * header byte: 0101nnnn + * private f (6 or 5 bits by element, depending on degree) + * private g (6 or 5 bits by element, depending on degree) + * private F (8 bits by element) + * + * public key: + * header byte: 0000nnnn + * public h (14 bits by element) + * + * signature: + * header byte: 0011nnnn + * nonce 40 bytes + * value (12 bits by element) + * + * message + signature: + * signature length (2 bytes, big-endian) + * nonce 40 bytes + * message + * header byte: 0010nnnn + * value (12 bits by element) + * (signature length is 1+len(value), not counting the nonce) + */ + +/* see api.h */ +int +PQCLEAN_FALCON1024_CLEAN_crypto_sign_keypair(unsigned char *pk, unsigned char *sk) { + union { + uint8_t b[28 * 1024]; + uint64_t dummy_u64; + fpr dummy_fpr; + } tmp; + int8_t f[1024], g[1024], F[1024], G[1024]; + uint16_t h[1024]; + unsigned char seed[SEEDLEN]; + inner_shake256_context rng; + size_t u, v; + + + /* + * Generate key pair. + */ + randombytes(seed, sizeof seed); + inner_shake256_init(&rng); + inner_shake256_inject(&rng, seed, sizeof seed); + inner_shake256_flip(&rng); + PQCLEAN_FALCON1024_CLEAN_keygen(&rng, f, g, F, G, h, 10, tmp.b); + inner_shake256_ctx_release(&rng); + + /* + * Encode private key. + */ + sk[0] = 0x50 + 10; + u = 1; + v = PQCLEAN_FALCON1024_CLEAN_trim_i8_encode( + sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u, + f, 10, PQCLEAN_FALCON1024_CLEAN_max_fg_bits[10]); + if (v == 0) { + return -1; + } + u += v; + v = PQCLEAN_FALCON1024_CLEAN_trim_i8_encode( + sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u, + g, 10, PQCLEAN_FALCON1024_CLEAN_max_fg_bits[10]); + if (v == 0) { + return -1; + } + u += v; + v = PQCLEAN_FALCON1024_CLEAN_trim_i8_encode( + sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u, + F, 10, PQCLEAN_FALCON1024_CLEAN_max_FG_bits[10]); + if (v == 0) { + return -1; + } + u += v; + if (u != PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES) { + return -1; + } + + /* + * Encode public key. + */ + pk[0] = 0x00 + 10; + v = PQCLEAN_FALCON1024_CLEAN_modq_encode( + pk + 1, PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES - 1, + h, 10); + if (v != PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES - 1) { + return -1; + } + + return 0; +} + +/* + * Compute the signature. nonce[] receives the nonce and must have length + * NONCELEN bytes. sigbuf[] receives the signature value (without nonce + * or header byte), with *sigbuflen providing the maximum value length and + * receiving the actual value length. + * + * If a signature could be computed but not encoded because it would + * exceed the output buffer size, then a new signature is computed. If + * the provided buffer size is too low, this could loop indefinitely, so + * the caller must provide a size that can accommodate signatures with a + * large enough probability. + * + * Return value: 0 on success, -1 on error. + */ +static int +do_sign(uint8_t *nonce, uint8_t *sigbuf, size_t *sigbuflen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + union { + uint8_t b[72 * 1024]; + uint64_t dummy_u64; + fpr dummy_fpr; + } tmp; + int8_t f[1024], g[1024], F[1024], G[1024]; + union { + int16_t sig[1024]; + uint16_t hm[1024]; + } r; + unsigned char seed[SEEDLEN]; + inner_shake256_context sc; + size_t u, v; + + /* + * Decode the private key. + */ + if (sk[0] != 0x50 + 10) { + return -1; + } + u = 1; + v = PQCLEAN_FALCON1024_CLEAN_trim_i8_decode( + f, 10, PQCLEAN_FALCON1024_CLEAN_max_fg_bits[10], + sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u); + if (v == 0) { + return -1; + } + u += v; + v = PQCLEAN_FALCON1024_CLEAN_trim_i8_decode( + g, 10, PQCLEAN_FALCON1024_CLEAN_max_fg_bits[10], + sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u); + if (v == 0) { + return -1; + } + u += v; + v = PQCLEAN_FALCON1024_CLEAN_trim_i8_decode( + F, 10, PQCLEAN_FALCON1024_CLEAN_max_FG_bits[10], + sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u); + if (v == 0) { + return -1; + } + u += v; + if (u != PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES) { + return -1; + } + if (!PQCLEAN_FALCON1024_CLEAN_complete_private(G, f, g, F, 10, tmp.b)) { + return -1; + } + + + /* + * Create a random nonce (40 bytes). + */ + randombytes(nonce, NONCELEN); + + /* + * Hash message nonce + message into a vector. + */ + inner_shake256_init(&sc); + inner_shake256_inject(&sc, nonce, NONCELEN); + inner_shake256_inject(&sc, m, mlen); + inner_shake256_flip(&sc); + PQCLEAN_FALCON1024_CLEAN_hash_to_point_vartime(&sc, r.hm, 10); + inner_shake256_ctx_release(&sc); + + /* + * Initialize a RNG. + */ + randombytes(seed, sizeof seed); + inner_shake256_init(&sc); + inner_shake256_inject(&sc, seed, sizeof seed); + inner_shake256_flip(&sc); + + /* + * Compute and return the signature. This loops until a signature + * value is found that fits in the provided buffer. + */ + for (;;) { + PQCLEAN_FALCON1024_CLEAN_sign_dyn(r.sig, &sc, f, g, F, G, r.hm, 10, tmp.b); + v = PQCLEAN_FALCON1024_CLEAN_comp_encode(sigbuf, *sigbuflen, r.sig, 10); + if (v != 0) { + inner_shake256_ctx_release(&sc); + *sigbuflen = v; + return 0; + } + } +} + +/* + * Verify a sigature. The nonce has size NONCELEN bytes. sigbuf[] + * (of size sigbuflen) contains the signature value, not including the + * header byte or nonce. Return value is 0 on success, -1 on error. + */ +static int +do_verify( + const uint8_t *nonce, const uint8_t *sigbuf, size_t sigbuflen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + union { + uint8_t b[2 * 1024]; + uint64_t dummy_u64; + fpr dummy_fpr; + } tmp; + uint16_t h[1024], hm[1024]; + int16_t sig[1024]; + inner_shake256_context sc; + + /* + * Decode public key. + */ + if (pk[0] != 0x00 + 10) { + return -1; + } + if (PQCLEAN_FALCON1024_CLEAN_modq_decode(h, 10, + pk + 1, PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES - 1) + != PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES - 1) { + return -1; + } + PQCLEAN_FALCON1024_CLEAN_to_ntt_monty(h, 10); + + /* + * Decode signature. + */ + if (sigbuflen == 0) { + return -1; + } + if (PQCLEAN_FALCON1024_CLEAN_comp_decode(sig, 10, sigbuf, sigbuflen) != sigbuflen) { + return -1; + } + + /* + * Hash nonce + message into a vector. + */ + inner_shake256_init(&sc); + inner_shake256_inject(&sc, nonce, NONCELEN); + inner_shake256_inject(&sc, m, mlen); + inner_shake256_flip(&sc); + PQCLEAN_FALCON1024_CLEAN_hash_to_point_ct(&sc, hm, 10, tmp.b); + inner_shake256_ctx_release(&sc); + + /* + * Verify signature. + */ + if (!PQCLEAN_FALCON1024_CLEAN_verify_raw(hm, sig, h, 10, tmp.b)) { + return -1; + } + return 0; +} + +/* see api.h */ +int +PQCLEAN_FALCON1024_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + /* + * The PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES constant is used for + * the signed message object (as produced by PQCLEAN_FALCON1024_CLEAN_crypto_sign()) + * and includes a two-byte length value, so we take care here + * to only generate signatures that are two bytes shorter than + * the maximum. This is done to ensure that PQCLEAN_FALCON1024_CLEAN_crypto_sign() + * and PQCLEAN_FALCON1024_CLEAN_crypto_sign_signature() produce the exact same signature + * value, if used on the same message, with the same private key, + * and using the same output from randombytes() (this is for + * reproducibility of tests). + */ + size_t vlen; + + vlen = PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES - NONCELEN - 3; + if (do_sign(sig + 1, sig + 1 + NONCELEN, &vlen, m, mlen, sk) < 0) { + return -1; + } + sig[0] = 0x30 + 10; + *siglen = 1 + NONCELEN + vlen; + return 0; +} + +/* see api.h */ +int +PQCLEAN_FALCON1024_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + if (siglen < 1 + NONCELEN) { + return -1; + } + if (sig[0] != 0x30 + 10) { + return -1; + } + return do_verify(sig + 1, + sig + 1 + NONCELEN, siglen - 1 - NONCELEN, m, mlen, pk); +} + +/* see api.h */ +int +PQCLEAN_FALCON1024_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + uint8_t *pm, *sigbuf; + size_t sigbuflen; + + /* + * Move the message to its final location; this is a memmove() so + * it handles overlaps properly. + */ + memmove(sm + 2 + NONCELEN, m, mlen); + pm = sm + 2 + NONCELEN; + sigbuf = pm + 1 + mlen; + sigbuflen = PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES - NONCELEN - 3; + if (do_sign(sm + 2, sigbuf, &sigbuflen, pm, mlen, sk) < 0) { + return -1; + } + pm[mlen] = 0x20 + 10; + sigbuflen ++; + sm[0] = (uint8_t)(sigbuflen >> 8); + sm[1] = (uint8_t)sigbuflen; + *smlen = mlen + 2 + NONCELEN + sigbuflen; + return 0; +} + +/* see api.h */ +int +PQCLEAN_FALCON1024_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + const uint8_t *sigbuf; + size_t pmlen, sigbuflen; + + if (smlen < 3 + NONCELEN) { + return -1; + } + sigbuflen = ((size_t)sm[0] << 8) | (size_t)sm[1]; + if (sigbuflen < 2 || sigbuflen > (smlen - NONCELEN - 2)) { + return -1; + } + sigbuflen --; + pmlen = smlen - NONCELEN - 3 - sigbuflen; + if (sm[2 + NONCELEN + pmlen] != 0x20 + 10) { + return -1; + } + sigbuf = sm + 2 + NONCELEN + pmlen + 1; + + /* + * The 2-byte length header and the one-byte signature header + * have been verified. Nonce is at sm+2, followed by the message + * itself. Message length is in pmlen. sigbuf/sigbuflen point to + * the signature value (excluding the header byte). + */ + if (do_verify(sm + 2, sigbuf, sigbuflen, + sm + 2 + NONCELEN, pmlen, pk) < 0) { + return -1; + } + + /* + * Signature is correct, we just have to copy/move the message + * to its final destination. The memmove() properly handles + * overlaps. + */ + memmove(m, sm + 2 + NONCELEN, pmlen); + *mlen = pmlen; + return 0; +} diff --git a/crypto_sign/falcon/falcon-1024/clean/rng.c b/crypto_sign/falcon/falcon-1024/clean/rng.c new file mode 100644 index 00000000..f5739a8f --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/clean/rng.c @@ -0,0 +1,201 @@ +#include "inner.h" +#include +/* + * PRNG and interface to the system RNG. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + + +/* + * Include relevant system header files. For Win32, this will also need + * linking with advapi32.dll, which we trigger with an appropriate #pragma. + */ + +/* see inner.h */ +int +PQCLEAN_FALCON1024_CLEAN_get_seed(void *seed, size_t len) { + (void)seed; + if (len == 0) { + return 1; + } + return 0; +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_prng_init(prng *p, inner_shake256_context *src) { + /* + * To ensure reproducibility for a given seed, we + * must enforce little-endian interpretation of + * the state words. + */ + uint8_t tmp[56]; + uint64_t th, tl; + int i; + + inner_shake256_extract(src, tmp, 56); + for (i = 0; i < 14; i ++) { + uint32_t w; + + w = (uint32_t)tmp[(i << 2) + 0] + | ((uint32_t)tmp[(i << 2) + 1] << 8) + | ((uint32_t)tmp[(i << 2) + 2] << 16) + | ((uint32_t)tmp[(i << 2) + 3] << 24); + *(uint32_t *)(p->state.d + (i << 2)) = w; + } + tl = *(uint32_t *)(p->state.d + 48); + th = *(uint32_t *)(p->state.d + 52); + *(uint64_t *)(p->state.d + 48) = tl + (th << 32); + PQCLEAN_FALCON1024_CLEAN_prng_refill(p); +} + +/* + * PRNG based on ChaCha20. + * + * State consists in key (32 bytes) then IV (16 bytes) and block counter + * (8 bytes). Normally, we should not care about local endianness (this + * is for a PRNG), but for the NIST competition we need reproducible KAT + * vectors that work across architectures, so we enforce little-endian + * interpretation where applicable. Moreover, output words are "spread + * out" over the output buffer with the interleaving pattern that is + * naturally obtained from the AVX2 implementation that runs eight + * ChaCha20 instances in parallel. + * + * The block counter is XORed into the first 8 bytes of the IV. + */ +void +PQCLEAN_FALCON1024_CLEAN_prng_refill(prng *p) { + + static const uint32_t CW[] = { + 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 + }; + + uint64_t cc; + size_t u; + + /* + * State uses local endianness. Only the output bytes must be + * converted to little endian (if used on a big-endian machine). + */ + cc = *(uint64_t *)(p->state.d + 48); + for (u = 0; u < 8; u ++) { + uint32_t state[16]; + size_t v; + int i; + + memcpy(&state[0], CW, sizeof CW); + memcpy(&state[4], p->state.d, 48); + state[14] ^= (uint32_t)cc; + state[15] ^= (uint32_t)(cc >> 32); + for (i = 0; i < 10; i ++) { + +#define QROUND(a, b, c, d) do { \ + state[a] += state[b]; \ + state[d] ^= state[a]; \ + state[d] = (state[d] << 16) | (state[d] >> 16); \ + state[c] += state[d]; \ + state[b] ^= state[c]; \ + state[b] = (state[b] << 12) | (state[b] >> 20); \ + state[a] += state[b]; \ + state[d] ^= state[a]; \ + state[d] = (state[d] << 8) | (state[d] >> 24); \ + state[c] += state[d]; \ + state[b] ^= state[c]; \ + state[b] = (state[b] << 7) | (state[b] >> 25); \ + } while (0) + + QROUND( 0, 4, 8, 12); + QROUND( 1, 5, 9, 13); + QROUND( 2, 6, 10, 14); + QROUND( 3, 7, 11, 15); + QROUND( 0, 5, 10, 15); + QROUND( 1, 6, 11, 12); + QROUND( 2, 7, 8, 13); + QROUND( 3, 4, 9, 14); + +#undef QROUND + + } + + for (v = 0; v < 4; v ++) { + state[v] += CW[v]; + } + for (v = 4; v < 14; v ++) { + state[v] += ((uint32_t *)p->state.d)[v - 4]; + } + state[14] += ((uint32_t *)p->state.d)[10] + ^ (uint32_t)cc; + state[15] += ((uint32_t *)p->state.d)[11] + ^ (uint32_t)(cc >> 32); + cc ++; + + /* + * We mimic the interleaving that is used in the AVX2 + * implementation. + */ + for (v = 0; v < 16; v ++) { + p->buf.d[(u << 2) + (v << 5) + 0] = + (uint8_t)state[v]; + p->buf.d[(u << 2) + (v << 5) + 1] = + (uint8_t)(state[v] >> 8); + p->buf.d[(u << 2) + (v << 5) + 2] = + (uint8_t)(state[v] >> 16); + p->buf.d[(u << 2) + (v << 5) + 3] = + (uint8_t)(state[v] >> 24); + } + } + *(uint64_t *)(p->state.d + 48) = cc; + + + p->ptr = 0; +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_prng_get_bytes(prng *p, void *dst, size_t len) { + uint8_t *buf; + + buf = dst; + while (len > 0) { + size_t clen; + + clen = (sizeof p->buf.d) - p->ptr; + if (clen > len) { + clen = len; + } + memcpy(buf, p->buf.d, clen); + buf += clen; + len -= clen; + p->ptr += clen; + if (p->ptr == sizeof p->buf.d) { + PQCLEAN_FALCON1024_CLEAN_prng_refill(p); + } + } +} diff --git a/crypto_sign/falcon/falcon-1024/clean/sign.c b/crypto_sign/falcon/falcon-1024/clean/sign.c new file mode 100644 index 00000000..fb05cdad --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/clean/sign.c @@ -0,0 +1,1254 @@ +#include "inner.h" + +/* + * Falcon signature generation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +/* =================================================================== */ + +/* + * Compute degree N from logarithm 'logn'. + */ +#define MKN(logn) ((size_t)1 << (logn)) + +/* =================================================================== */ +/* + * Binary case: + * N = 2^logn + * phi = X^N+1 + */ + +/* + * Get the size of the LDL tree for an input with polynomials of size + * 2^logn. The size is expressed in the number of elements. + */ +static inline unsigned +ffLDL_treesize(unsigned logn) { + /* + * For logn = 0 (polynomials are constant), the "tree" is a + * single element. Otherwise, the tree node has size 2^logn, and + * has two child trees for size logn-1 each. Thus, treesize s() + * must fulfill these two relations: + * + * s(0) = 1 + * s(logn) = (2^logn) + 2*s(logn-1) + */ + return (logn + 1) << logn; +} + +/* + * Inner function for ffLDL_fft(). It expects the matrix to be both + * auto-adjoint and quasicyclic; also, it uses the source operands + * as modifiable temporaries. + * + * tmp[] must have room for at least one polynomial. + */ +static void +ffLDL_fft_inner(fpr *tree, + fpr *g0, fpr *g1, unsigned logn, fpr *tmp) { + size_t n, hn; + + n = MKN(logn); + if (n == 1) { + tree[0] = g0[0]; + return; + } + hn = n >> 1; + + /* + * The LDL decomposition yields L (which is written in the tree) + * and the diagonal of D. Since d00 = g0, we just write d11 + * into tmp. + */ + PQCLEAN_FALCON1024_CLEAN_poly_LDLmv_fft(tmp, tree, g0, g1, g0, logn); + + /* + * Split d00 (currently in g0) and d11 (currently in tmp). We + * reuse g0 and g1 as temporary storage spaces: + * d00 splits into g1, g1+hn + * d11 splits into g0, g0+hn + */ + PQCLEAN_FALCON1024_CLEAN_poly_split_fft(g1, g1 + hn, g0, logn); + PQCLEAN_FALCON1024_CLEAN_poly_split_fft(g0, g0 + hn, tmp, logn); + + /* + * Each split result is the first row of a new auto-adjoint + * quasicyclic matrix for the next recursive step. + */ + ffLDL_fft_inner(tree + n, + g1, g1 + hn, logn - 1, tmp); + ffLDL_fft_inner(tree + n + ffLDL_treesize(logn - 1), + g0, g0 + hn, logn - 1, tmp); +} + +/* + * Compute the ffLDL tree of an auto-adjoint matrix G. The matrix + * is provided as three polynomials (FFT representation). + * + * The "tree" array is filled with the computed tree, of size + * (logn+1)*(2^logn) elements (see ffLDL_treesize()). + * + * Input arrays MUST NOT overlap, except possibly the three unmodified + * arrays g00, g01 and g11. tmp[] should have room for at least three + * polynomials of 2^logn elements each. + */ +static void +ffLDL_fft(fpr *tree, const fpr *g00, + const fpr *g01, const fpr *g11, + unsigned logn, fpr *tmp) { + size_t n, hn; + fpr *d00, *d11; + + n = MKN(logn); + if (n == 1) { + tree[0] = g00[0]; + return; + } + hn = n >> 1; + d00 = tmp; + d11 = tmp + n; + tmp += n << 1; + + memcpy(d00, g00, n * sizeof * g00); + PQCLEAN_FALCON1024_CLEAN_poly_LDLmv_fft(d11, tree, g00, g01, g11, logn); + + PQCLEAN_FALCON1024_CLEAN_poly_split_fft(tmp, tmp + hn, d00, logn); + PQCLEAN_FALCON1024_CLEAN_poly_split_fft(d00, d00 + hn, d11, logn); + memcpy(d11, tmp, n * sizeof * tmp); + ffLDL_fft_inner(tree + n, + d11, d11 + hn, logn - 1, tmp); + ffLDL_fft_inner(tree + n + ffLDL_treesize(logn - 1), + d00, d00 + hn, logn - 1, tmp); +} + +/* + * Normalize an ffLDL tree: each leaf of value x is replaced with + * sigma / sqrt(x). + */ +static void +ffLDL_binary_normalize(fpr *tree, unsigned logn) { + /* + * TODO: make an iterative version. + */ + size_t n; + + n = MKN(logn); + if (n == 1) { + /* + * We actually store in the tree leaf the inverse of + * the value mandated by the specification: this + * saves a division both here and in the sampler. + */ + tree[0] = fpr_mul(fpr_sqrt(tree[0]), fpr_inv_sigma); + } else { + ffLDL_binary_normalize(tree + n, logn - 1); + ffLDL_binary_normalize(tree + n + ffLDL_treesize(logn - 1), + logn - 1); + } +} + +/* =================================================================== */ + +/* + * Convert an integer polynomial (with small values) into the + * representation with complex numbers. + */ +static void +smallints_to_fpr(fpr *r, const int8_t *t, unsigned logn) { + size_t n, u; + + n = MKN(logn); + for (u = 0; u < n; u ++) { + r[u] = fpr_of(t[u]); + } +} + +/* + * The expanded private key contains: + * - The B0 matrix (four elements) + * - The ffLDL tree + */ + +static inline size_t +skoff_b00(unsigned logn) { + (void)logn; + return 0; +} + +static inline size_t +skoff_b01(unsigned logn) { + return MKN(logn); +} + +static inline size_t +skoff_b10(unsigned logn) { + return 2 * MKN(logn); +} + +static inline size_t +skoff_b11(unsigned logn) { + return 3 * MKN(logn); +} + +static inline size_t +skoff_tree(unsigned logn) { + return 4 * MKN(logn); +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_expand_privkey(fpr *expanded_key, + const int8_t *f, const int8_t *g, + const int8_t *F, const int8_t *G, + unsigned logn, uint8_t *tmp) { + size_t n; + fpr *rf, *rg, *rF, *rG; + fpr *b00, *b01, *b10, *b11; + fpr *g00, *g01, *g11, *gxx; + fpr *tree; + + n = MKN(logn); + b00 = expanded_key + skoff_b00(logn); + b01 = expanded_key + skoff_b01(logn); + b10 = expanded_key + skoff_b10(logn); + b11 = expanded_key + skoff_b11(logn); + tree = expanded_key + skoff_tree(logn); + + /* + * We load the private key elements directly into the B0 matrix, + * since B0 = [[g, -f], [G, -F]]. + */ + rf = b01; + rg = b00; + rF = b11; + rG = b10; + + smallints_to_fpr(rf, f, logn); + smallints_to_fpr(rg, g, logn); + smallints_to_fpr(rF, F, logn); + smallints_to_fpr(rG, G, logn); + + /* + * Compute the FFT for the key elements, and negate f and F. + */ + PQCLEAN_FALCON1024_CLEAN_FFT(rf, logn); + PQCLEAN_FALCON1024_CLEAN_FFT(rg, logn); + PQCLEAN_FALCON1024_CLEAN_FFT(rF, logn); + PQCLEAN_FALCON1024_CLEAN_FFT(rG, logn); + PQCLEAN_FALCON1024_CLEAN_poly_neg(rf, logn); + PQCLEAN_FALCON1024_CLEAN_poly_neg(rF, logn); + + /* + * The Gram matrix is G = B·B*. Formulas are: + * g00 = b00*adj(b00) + b01*adj(b01) + * g01 = b00*adj(b10) + b01*adj(b11) + * g10 = b10*adj(b00) + b11*adj(b01) + * g11 = b10*adj(b10) + b11*adj(b11) + * + * For historical reasons, this implementation uses + * g00, g01 and g11 (upper triangle). + */ + g00 = (fpr *)tmp; + g01 = g00 + n; + g11 = g01 + n; + gxx = g11 + n; + + memcpy(g00, b00, n * sizeof * b00); + PQCLEAN_FALCON1024_CLEAN_poly_mulselfadj_fft(g00, logn); + memcpy(gxx, b01, n * sizeof * b01); + PQCLEAN_FALCON1024_CLEAN_poly_mulselfadj_fft(gxx, logn); + PQCLEAN_FALCON1024_CLEAN_poly_add(g00, gxx, logn); + + memcpy(g01, b00, n * sizeof * b00); + PQCLEAN_FALCON1024_CLEAN_poly_muladj_fft(g01, b10, logn); + memcpy(gxx, b01, n * sizeof * b01); + PQCLEAN_FALCON1024_CLEAN_poly_muladj_fft(gxx, b11, logn); + PQCLEAN_FALCON1024_CLEAN_poly_add(g01, gxx, logn); + + memcpy(g11, b10, n * sizeof * b10); + PQCLEAN_FALCON1024_CLEAN_poly_mulselfadj_fft(g11, logn); + memcpy(gxx, b11, n * sizeof * b11); + PQCLEAN_FALCON1024_CLEAN_poly_mulselfadj_fft(gxx, logn); + PQCLEAN_FALCON1024_CLEAN_poly_add(g11, gxx, logn); + + /* + * Compute the Falcon tree. + */ + ffLDL_fft(tree, g00, g01, g11, logn, gxx); + + /* + * Normalize tree. + */ + ffLDL_binary_normalize(tree, logn); +} + +typedef int (*samplerZ)(void *ctx, fpr mu, fpr sigma); + +/* + * Perform Fast Fourier Sampling for target vector t. The Gram matrix + * is provided (G = [[g00, g01], [adj(g01), g11]]). The sampled vector + * is written over (t0,t1). The Gram matrix is modified as well. The + * tmp[] buffer must have room for four polynomials. + */ +static void +ffSampling_fft_dyntree(samplerZ samp, void *samp_ctx, + fpr *t0, fpr *t1, + fpr *g00, fpr *g01, fpr *g11, + unsigned logn, fpr *tmp) { + size_t n, hn; + fpr *z0, *z1; + + /* + * Deepest level: the LDL tree leaf value is just g00 (the + * array has length only 1 at this point); we normalize it + * with regards to sigma, then use it for sampling. + */ + if (logn == 0) { + fpr leaf; + + leaf = g00[0]; + leaf = fpr_mul(fpr_sqrt(leaf), fpr_inv_sigma); + t0[0] = fpr_of(samp(samp_ctx, t0[0], leaf)); + t1[0] = fpr_of(samp(samp_ctx, t1[0], leaf)); + return; + } + + n = (size_t)1 << logn; + hn = n >> 1; + + /* + * Decompose G into LDL. We only need d00 (identical to g00), + * d11, and l10; we do that in place. + */ + PQCLEAN_FALCON1024_CLEAN_poly_LDL_fft(g00, g01, g11, logn); + + /* + * Split d00 and d11 and expand them into half-size quasi-cyclic + * Gram matrices. We also save l10 in tmp[]. + */ + PQCLEAN_FALCON1024_CLEAN_poly_split_fft(tmp, tmp + hn, g00, logn); + memcpy(g00, tmp, n * sizeof * tmp); + PQCLEAN_FALCON1024_CLEAN_poly_split_fft(tmp, tmp + hn, g11, logn); + memcpy(g11, tmp, n * sizeof * tmp); + memcpy(tmp, g01, n * sizeof * g01); + memcpy(g01, g00, hn * sizeof * g00); + memcpy(g01 + hn, g11, hn * sizeof * g00); + + /* + * The half-size Gram matrices for the recursive LDL tree + * building are now: + * - left sub-tree: g00, g00+hn, g01 + * - right sub-tree: g11, g11+hn, g01+hn + * l10 is in tmp[]. + */ + + /* + * We split t1 and use the first recursive call on the two + * halves, using the right sub-tree. The result is merged + * back into tmp + 2*n. + */ + z1 = tmp + n; + PQCLEAN_FALCON1024_CLEAN_poly_split_fft(z1, z1 + hn, t1, logn); + ffSampling_fft_dyntree(samp, samp_ctx, z1, z1 + hn, + g11, g11 + hn, g01 + hn, logn - 1, z1 + n); + PQCLEAN_FALCON1024_CLEAN_poly_merge_fft(tmp + (n << 1), z1, z1 + hn, logn); + + /* + * Compute tb0 = t0 + (t1 - z1) * l10. + * At that point, l10 is in tmp, t1 is unmodified, and z1 is + * in tmp + (n << 1). The buffer in z1 is free. + * + * In the end, z1 is written over t1, and tb0 is in t0. + */ + memcpy(z1, t1, n * sizeof * t1); + PQCLEAN_FALCON1024_CLEAN_poly_sub(z1, tmp + (n << 1), logn); + memcpy(t1, tmp + (n << 1), n * sizeof * tmp); + PQCLEAN_FALCON1024_CLEAN_poly_mul_fft(tmp, z1, logn); + PQCLEAN_FALCON1024_CLEAN_poly_add(t0, tmp, logn); + + /* + * Second recursive invocation, on the split tb0 (currently in t0) + * and the left sub-tree. + */ + z0 = tmp; + PQCLEAN_FALCON1024_CLEAN_poly_split_fft(z0, z0 + hn, t0, logn); + ffSampling_fft_dyntree(samp, samp_ctx, z0, z0 + hn, + g00, g00 + hn, g01, logn - 1, z0 + n); + PQCLEAN_FALCON1024_CLEAN_poly_merge_fft(t0, z0, z0 + hn, logn); +} + +/* + * Perform Fast Fourier Sampling for target vector t and LDL tree T. + * tmp[] must have size for at least two polynomials of size 2^logn. + */ +static void +ffSampling_fft(samplerZ samp, void *samp_ctx, + fpr *z0, fpr *z1, + const fpr *tree, + const fpr *t0, const fpr *t1, unsigned logn, + fpr *tmp) { + size_t n, hn; + const fpr *tree0, *tree1; + + /* + * When logn == 2, we inline the last two recursion levels. + */ + if (logn == 2) { + fpr x0, x1, y0, y1, w0, w1, w2, w3, sigma; + fpr a_re, a_im, b_re, b_im, c_re, c_im; + + tree0 = tree + 4; + tree1 = tree + 8; + + /* + * We split t1 into w*, then do the recursive invocation, + * with output in w*. We finally merge back into z1. + */ + a_re = t1[0]; + a_im = t1[2]; + b_re = t1[1]; + b_im = t1[3]; + c_re = fpr_add(a_re, b_re); + c_im = fpr_add(a_im, b_im); + w0 = fpr_half(c_re); + w1 = fpr_half(c_im); + c_re = fpr_sub(a_re, b_re); + c_im = fpr_sub(a_im, b_im); + w2 = fpr_mul(fpr_add(c_re, c_im), fpr_invsqrt8); + w3 = fpr_mul(fpr_sub(c_im, c_re), fpr_invsqrt8); + + x0 = w2; + x1 = w3; + sigma = tree1[3]; + w2 = fpr_of(samp(samp_ctx, x0, sigma)); + w3 = fpr_of(samp(samp_ctx, x1, sigma)); + a_re = fpr_sub(x0, w2); + a_im = fpr_sub(x1, w3); + b_re = tree1[0]; + b_im = tree1[1]; + c_re = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im)); + c_im = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re)); + x0 = fpr_add(c_re, w0); + x1 = fpr_add(c_im, w1); + sigma = tree1[2]; + w0 = fpr_of(samp(samp_ctx, x0, sigma)); + w1 = fpr_of(samp(samp_ctx, x1, sigma)); + + a_re = w0; + a_im = w1; + b_re = w2; + b_im = w3; + c_re = fpr_mul(fpr_sub(b_re, b_im), fpr_invsqrt2); + c_im = fpr_mul(fpr_add(b_re, b_im), fpr_invsqrt2); + z1[0] = w0 = fpr_add(a_re, c_re); + z1[2] = w2 = fpr_add(a_im, c_im); + z1[1] = w1 = fpr_sub(a_re, c_re); + z1[3] = w3 = fpr_sub(a_im, c_im); + + /* + * Compute tb0 = t0 + (t1 - z1) * L. Value tb0 ends up in w*. + */ + w0 = fpr_sub(t1[0], w0); + w1 = fpr_sub(t1[1], w1); + w2 = fpr_sub(t1[2], w2); + w3 = fpr_sub(t1[3], w3); + + a_re = w0; + a_im = w2; + b_re = tree[0]; + b_im = tree[2]; + w0 = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im)); + w2 = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re)); + a_re = w1; + a_im = w3; + b_re = tree[1]; + b_im = tree[3]; + w1 = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im)); + w3 = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re)); + + w0 = fpr_add(w0, t0[0]); + w1 = fpr_add(w1, t0[1]); + w2 = fpr_add(w2, t0[2]); + w3 = fpr_add(w3, t0[3]); + + /* + * Second recursive invocation. + */ + a_re = w0; + a_im = w2; + b_re = w1; + b_im = w3; + c_re = fpr_add(a_re, b_re); + c_im = fpr_add(a_im, b_im); + w0 = fpr_half(c_re); + w1 = fpr_half(c_im); + c_re = fpr_sub(a_re, b_re); + c_im = fpr_sub(a_im, b_im); + w2 = fpr_mul(fpr_add(c_re, c_im), fpr_invsqrt8); + w3 = fpr_mul(fpr_sub(c_im, c_re), fpr_invsqrt8); + + x0 = w2; + x1 = w3; + sigma = tree0[3]; + w2 = y0 = fpr_of(samp(samp_ctx, x0, sigma)); + w3 = y1 = fpr_of(samp(samp_ctx, x1, sigma)); + a_re = fpr_sub(x0, y0); + a_im = fpr_sub(x1, y1); + b_re = tree0[0]; + b_im = tree0[1]; + c_re = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im)); + c_im = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re)); + x0 = fpr_add(c_re, w0); + x1 = fpr_add(c_im, w1); + sigma = tree0[2]; + w0 = fpr_of(samp(samp_ctx, x0, sigma)); + w1 = fpr_of(samp(samp_ctx, x1, sigma)); + + a_re = w0; + a_im = w1; + b_re = w2; + b_im = w3; + c_re = fpr_mul(fpr_sub(b_re, b_im), fpr_invsqrt2); + c_im = fpr_mul(fpr_add(b_re, b_im), fpr_invsqrt2); + z0[0] = fpr_add(a_re, c_re); + z0[2] = fpr_add(a_im, c_im); + z0[1] = fpr_sub(a_re, c_re); + z0[3] = fpr_sub(a_im, c_im); + + return; + } + + /* + * Case logn == 1 is reachable only when using Falcon-2 (the + * smallest size for which Falcon is mathematically defined, but + * of course way too insecure to be of any use). + */ + if (logn == 1) { + fpr x0, x1, y0, y1, sigma; + fpr a_re, a_im, b_re, b_im, c_re, c_im; + + x0 = t1[0]; + x1 = t1[1]; + sigma = tree[3]; + z1[0] = y0 = fpr_of(samp(samp_ctx, x0, sigma)); + z1[1] = y1 = fpr_of(samp(samp_ctx, x1, sigma)); + a_re = fpr_sub(x0, y0); + a_im = fpr_sub(x1, y1); + b_re = tree[0]; + b_im = tree[1]; + c_re = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im)); + c_im = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re)); + x0 = fpr_add(c_re, t0[0]); + x1 = fpr_add(c_im, t0[1]); + sigma = tree[2]; + z0[0] = fpr_of(samp(samp_ctx, x0, sigma)); + z0[1] = fpr_of(samp(samp_ctx, x1, sigma)); + + return; + } + + /* + * Normal end of recursion is for logn == 0. Since the last + * steps of the recursions were inlined in the blocks above + * (when logn == 1 or 2), this case is not reachable, and is + * retained here only for documentation purposes. + + if (logn == 0) { + fpr x0, x1, sigma; + + x0 = t0[0]; + x1 = t1[0]; + sigma = tree[0]; + z0[0] = fpr_of(samp(samp_ctx, x0, sigma)); + z1[0] = fpr_of(samp(samp_ctx, x1, sigma)); + return; + } + + */ + + /* + * General recursive case (logn >= 3). + */ + + n = (size_t)1 << logn; + hn = n >> 1; + tree0 = tree + n; + tree1 = tree + n + ffLDL_treesize(logn - 1); + + /* + * We split t1 into z1 (reused as temporary storage), then do + * the recursive invocation, with output in tmp. We finally + * merge back into z1. + */ + PQCLEAN_FALCON1024_CLEAN_poly_split_fft(z1, z1 + hn, t1, logn); + ffSampling_fft(samp, samp_ctx, tmp, tmp + hn, + tree1, z1, z1 + hn, logn - 1, tmp + n); + PQCLEAN_FALCON1024_CLEAN_poly_merge_fft(z1, tmp, tmp + hn, logn); + + /* + * Compute tb0 = t0 + (t1 - z1) * L. Value tb0 ends up in tmp[]. + */ + memcpy(tmp, t1, n * sizeof * t1); + PQCLEAN_FALCON1024_CLEAN_poly_sub(tmp, z1, logn); + PQCLEAN_FALCON1024_CLEAN_poly_mul_fft(tmp, tree, logn); + PQCLEAN_FALCON1024_CLEAN_poly_add(tmp, t0, logn); + + /* + * Second recursive invocation. + */ + PQCLEAN_FALCON1024_CLEAN_poly_split_fft(z0, z0 + hn, tmp, logn); + ffSampling_fft(samp, samp_ctx, tmp, tmp + hn, + tree0, z0, z0 + hn, logn - 1, tmp + n); + PQCLEAN_FALCON1024_CLEAN_poly_merge_fft(z0, tmp, tmp + hn, logn); +} + +/* + * Compute a signature: the signature contains two vectors, s1 and s2. + * The s1 vector is not returned. The squared norm of (s1,s2) is + * computed, and if it is short enough, then s2 is returned into the + * s2[] buffer, and 1 is returned; otherwise, s2[] is untouched and 0 is + * returned; the caller should then try again. This function uses an + * expanded key. + * + * tmp[] must have room for at least six polynomials. + */ +static int +do_sign_tree(samplerZ samp, void *samp_ctx, int16_t *s2, + const fpr *expanded_key, + const uint16_t *hm, + unsigned logn, fpr *tmp) { + size_t n, u; + fpr *t0, *t1, *tx, *ty; + const fpr *b00, *b01, *b10, *b11, *tree; + fpr ni; + uint32_t sqn, ng; + int16_t *s1tmp, *s2tmp; + + n = MKN(logn); + t0 = tmp; + t1 = t0 + n; + b00 = expanded_key + skoff_b00(logn); + b01 = expanded_key + skoff_b01(logn); + b10 = expanded_key + skoff_b10(logn); + b11 = expanded_key + skoff_b11(logn); + tree = expanded_key + skoff_tree(logn); + + /* + * Set the target vector to [hm, 0] (hm is the hashed message). + */ + for (u = 0; u < n; u ++) { + t0[u] = fpr_of(hm[u]); + /* This is implicit. + t1[u] = fpr_zero; + */ + } + + /* + * Apply the lattice basis to obtain the real target + * vector (after normalization with regards to modulus). + */ + PQCLEAN_FALCON1024_CLEAN_FFT(t0, logn); + ni = fpr_inverse_of_q; + memcpy(t1, t0, n * sizeof * t0); + PQCLEAN_FALCON1024_CLEAN_poly_mul_fft(t1, b01, logn); + PQCLEAN_FALCON1024_CLEAN_poly_mulconst(t1, fpr_neg(ni), logn); + PQCLEAN_FALCON1024_CLEAN_poly_mul_fft(t0, b11, logn); + PQCLEAN_FALCON1024_CLEAN_poly_mulconst(t0, ni, logn); + + tx = t1 + n; + ty = tx + n; + + /* + * Apply sampling. Output is written back in [tx, ty]. + */ + ffSampling_fft(samp, samp_ctx, tx, ty, tree, t0, t1, logn, ty + n); + + /* + * Get the lattice point corresponding to that tiny vector. + */ + memcpy(t0, tx, n * sizeof * tx); + memcpy(t1, ty, n * sizeof * ty); + PQCLEAN_FALCON1024_CLEAN_poly_mul_fft(tx, b00, logn); + PQCLEAN_FALCON1024_CLEAN_poly_mul_fft(ty, b10, logn); + PQCLEAN_FALCON1024_CLEAN_poly_add(tx, ty, logn); + memcpy(ty, t0, n * sizeof * t0); + PQCLEAN_FALCON1024_CLEAN_poly_mul_fft(ty, b01, logn); + + memcpy(t0, tx, n * sizeof * tx); + PQCLEAN_FALCON1024_CLEAN_poly_mul_fft(t1, b11, logn); + PQCLEAN_FALCON1024_CLEAN_poly_add(t1, ty, logn); + + PQCLEAN_FALCON1024_CLEAN_iFFT(t0, logn); + PQCLEAN_FALCON1024_CLEAN_iFFT(t1, logn); + + /* + * Compute the signature. + */ + s1tmp = (int16_t *)tx; + sqn = 0; + ng = 0; + for (u = 0; u < n; u ++) { + int32_t z; + + z = (int32_t)hm[u] - (int32_t)fpr_rint(t0[u]); + sqn += (uint32_t)(z * z); + ng |= sqn; + s1tmp[u] = (int16_t)z; + } + sqn |= -(ng >> 31); + + /* + * With "normal" degrees (e.g. 512 or 1024), it is very + * improbable that the computed vector is not short enough; + * however, it may happen in practice for the very reduced + * versions (e.g. degree 16 or below). In that case, the caller + * will loop, and we must not write anything into s2[] because + * s2[] may overlap with the hashed message hm[] and we need + * hm[] for the next iteration. + */ + s2tmp = (int16_t *)tmp; + for (u = 0; u < n; u ++) { + s2tmp[u] = (int16_t) - fpr_rint(t1[u]); + } + if (PQCLEAN_FALCON1024_CLEAN_is_short_half(sqn, s2tmp, logn)) { + memcpy(s2, s2tmp, n * sizeof * s2); + memcpy(tmp, s1tmp, n * sizeof * s1tmp); + return 1; + } + return 0; +} + +/* + * Compute a signature: the signature contains two vectors, s1 and s2. + * The s1 vector is not returned. The squared norm of (s1,s2) is + * computed, and if it is short enough, then s2 is returned into the + * s2[] buffer, and 1 is returned; otherwise, s2[] is untouched and 0 is + * returned; the caller should then try again. + * + * tmp[] must have room for at least nine polynomials. + */ +static int +do_sign_dyn(samplerZ samp, void *samp_ctx, int16_t *s2, + const int8_t *f, const int8_t *g, + const int8_t *F, const int8_t *G, + const uint16_t *hm, unsigned logn, fpr *tmp) { + size_t n, u; + fpr *t0, *t1, *tx, *ty; + fpr *b00, *b01, *b10, *b11, *g00, *g01, *g11; + fpr ni; + uint32_t sqn, ng; + int16_t *s1tmp, *s2tmp; + + n = MKN(logn); + + /* + * Lattice basis is B = [[g, -f], [G, -F]]. We convert it to FFT. + */ + b00 = tmp; + b01 = b00 + n; + b10 = b01 + n; + b11 = b10 + n; + smallints_to_fpr(b01, f, logn); + smallints_to_fpr(b00, g, logn); + smallints_to_fpr(b11, F, logn); + smallints_to_fpr(b10, G, logn); + PQCLEAN_FALCON1024_CLEAN_FFT(b01, logn); + PQCLEAN_FALCON1024_CLEAN_FFT(b00, logn); + PQCLEAN_FALCON1024_CLEAN_FFT(b11, logn); + PQCLEAN_FALCON1024_CLEAN_FFT(b10, logn); + PQCLEAN_FALCON1024_CLEAN_poly_neg(b01, logn); + PQCLEAN_FALCON1024_CLEAN_poly_neg(b11, logn); + + /* + * Compute the Gram matrix G = B·B*. Formulas are: + * g00 = b00*adj(b00) + b01*adj(b01) + * g01 = b00*adj(b10) + b01*adj(b11) + * g10 = b10*adj(b00) + b11*adj(b01) + * g11 = b10*adj(b10) + b11*adj(b11) + * + * For historical reasons, this implementation uses + * g00, g01 and g11 (upper triangle). g10 is not kept + * since it is equal to adj(g01). + * + * We _replace_ the matrix B with the Gram matrix, but we + * must keep b01 and b11 for computing the target vector. + */ + t0 = b11 + n; + t1 = t0 + n; + + memcpy(t0, b01, n * sizeof * b01); + PQCLEAN_FALCON1024_CLEAN_poly_mulselfadj_fft(t0, logn); // t0 <- b01*adj(b01) + + memcpy(t1, b00, n * sizeof * b00); + PQCLEAN_FALCON1024_CLEAN_poly_muladj_fft(t1, b10, logn); // t1 <- b00*adj(b10) + PQCLEAN_FALCON1024_CLEAN_poly_mulselfadj_fft(b00, logn); // b00 <- b00*adj(b00) + PQCLEAN_FALCON1024_CLEAN_poly_add(b00, t0, logn); // b00 <- g00 + memcpy(t0, b01, n * sizeof * b01); + PQCLEAN_FALCON1024_CLEAN_poly_muladj_fft(b01, b11, logn); // b01 <- b01*adj(b11) + PQCLEAN_FALCON1024_CLEAN_poly_add(b01, t1, logn); // b01 <- g01 + + PQCLEAN_FALCON1024_CLEAN_poly_mulselfadj_fft(b10, logn); // b10 <- b10*adj(b10) + memcpy(t1, b11, n * sizeof * b11); + PQCLEAN_FALCON1024_CLEAN_poly_mulselfadj_fft(t1, logn); // t1 <- b11*adj(b11) + PQCLEAN_FALCON1024_CLEAN_poly_add(b10, t1, logn); // b10 <- g11 + + /* + * We rename variables to make things clearer. The three elements + * of the Gram matrix uses the first 3*n slots of tmp[], followed + * by b11 and b01 (in that order). + */ + g00 = b00; + g01 = b01; + g11 = b10; + b01 = t0; + t0 = b01 + n; + t1 = t0 + n; + + /* + * Memory layout at that point: + * g00 g01 g11 b11 b01 t0 t1 + */ + + /* + * Set the target vector to [hm, 0] (hm is the hashed message). + */ + for (u = 0; u < n; u ++) { + t0[u] = fpr_of(hm[u]); + /* This is implicit. + t1[u] = fpr_zero; + */ + } + + /* + * Apply the lattice basis to obtain the real target + * vector (after normalization with regards to modulus). + */ + PQCLEAN_FALCON1024_CLEAN_FFT(t0, logn); + ni = fpr_inverse_of_q; + memcpy(t1, t0, n * sizeof * t0); + PQCLEAN_FALCON1024_CLEAN_poly_mul_fft(t1, b01, logn); + PQCLEAN_FALCON1024_CLEAN_poly_mulconst(t1, fpr_neg(ni), logn); + PQCLEAN_FALCON1024_CLEAN_poly_mul_fft(t0, b11, logn); + PQCLEAN_FALCON1024_CLEAN_poly_mulconst(t0, ni, logn); + + /* + * b01 and b11 can be discarded, so we move back (t0,t1). + * Memory layout is now: + * g00 g01 g11 t0 t1 + */ + memcpy(b11, t0, n * 2 * sizeof * t0); + t0 = g11 + n; + t1 = t0 + n; + + /* + * Apply sampling; result is written over (t0,t1). + */ + ffSampling_fft_dyntree(samp, samp_ctx, + t0, t1, g00, g01, g11, logn, t1 + n); + + /* + * We arrange the layout back to: + * b00 b01 b10 b11 t0 t1 + * + * We did not conserve the matrix basis, so we must recompute + * it now. + */ + b00 = tmp; + b01 = b00 + n; + b10 = b01 + n; + b11 = b10 + n; + memmove(b11 + n, t0, n * 2 * sizeof * t0); + t0 = b11 + n; + t1 = t0 + n; + smallints_to_fpr(b01, f, logn); + smallints_to_fpr(b00, g, logn); + smallints_to_fpr(b11, F, logn); + smallints_to_fpr(b10, G, logn); + PQCLEAN_FALCON1024_CLEAN_FFT(b01, logn); + PQCLEAN_FALCON1024_CLEAN_FFT(b00, logn); + PQCLEAN_FALCON1024_CLEAN_FFT(b11, logn); + PQCLEAN_FALCON1024_CLEAN_FFT(b10, logn); + PQCLEAN_FALCON1024_CLEAN_poly_neg(b01, logn); + PQCLEAN_FALCON1024_CLEAN_poly_neg(b11, logn); + tx = t1 + n; + ty = tx + n; + + /* + * Get the lattice point corresponding to that tiny vector. + */ + memcpy(tx, t0, n * sizeof * t0); + memcpy(ty, t1, n * sizeof * t1); + PQCLEAN_FALCON1024_CLEAN_poly_mul_fft(tx, b00, logn); + PQCLEAN_FALCON1024_CLEAN_poly_mul_fft(ty, b10, logn); + PQCLEAN_FALCON1024_CLEAN_poly_add(tx, ty, logn); + memcpy(ty, t0, n * sizeof * t0); + PQCLEAN_FALCON1024_CLEAN_poly_mul_fft(ty, b01, logn); + + memcpy(t0, tx, n * sizeof * tx); + PQCLEAN_FALCON1024_CLEAN_poly_mul_fft(t1, b11, logn); + PQCLEAN_FALCON1024_CLEAN_poly_add(t1, ty, logn); + PQCLEAN_FALCON1024_CLEAN_iFFT(t0, logn); + PQCLEAN_FALCON1024_CLEAN_iFFT(t1, logn); + + s1tmp = (int16_t *)tx; + sqn = 0; + ng = 0; + for (u = 0; u < n; u ++) { + int32_t z; + + z = (int32_t)hm[u] - (int32_t)fpr_rint(t0[u]); + sqn += (uint32_t)(z * z); + ng |= sqn; + s1tmp[u] = (int16_t)z; + } + sqn |= -(ng >> 31); + + /* + * With "normal" degrees (e.g. 512 or 1024), it is very + * improbable that the computed vector is not short enough; + * however, it may happen in practice for the very reduced + * versions (e.g. degree 16 or below). In that case, the caller + * will loop, and we must not write anything into s2[] because + * s2[] may overlap with the hashed message hm[] and we need + * hm[] for the next iteration. + */ + s2tmp = (int16_t *)tmp; + for (u = 0; u < n; u ++) { + s2tmp[u] = (int16_t) - fpr_rint(t1[u]); + } + if (PQCLEAN_FALCON1024_CLEAN_is_short_half(sqn, s2tmp, logn)) { + memcpy(s2, s2tmp, n * sizeof * s2); + memcpy(tmp, s1tmp, n * sizeof * s1tmp); + return 1; + } + return 0; +} + +/* + * Sample an integer value along a half-gaussian distribution centered + * on zero and standard deviation 1.8205, with a precision of 72 bits. + */ +int +PQCLEAN_FALCON1024_CLEAN_gaussian0_sampler(prng *p) { + + static const uint32_t dist[] = { + 10745844u, 3068844u, 3741698u, + 5559083u, 1580863u, 8248194u, + 2260429u, 13669192u, 2736639u, + 708981u, 4421575u, 10046180u, + 169348u, 7122675u, 4136815u, + 30538u, 13063405u, 7650655u, + 4132u, 14505003u, 7826148u, + 417u, 16768101u, 11363290u, + 31u, 8444042u, 8086568u, + 1u, 12844466u, 265321u, + 0u, 1232676u, 13644283u, + 0u, 38047u, 9111839u, + 0u, 870u, 6138264u, + 0u, 14u, 12545723u, + 0u, 0u, 3104126u, + 0u, 0u, 28824u, + 0u, 0u, 198u, + 0u, 0u, 1u + }; + + uint32_t v0, v1, v2, hi; + uint64_t lo; + size_t u; + int z; + + /* + * Get a random 72-bit value, into three 24-bit limbs v0..v2. + */ + lo = prng_get_u64(p); + hi = prng_get_u8(p); + v0 = (uint32_t)lo & 0xFFFFFF; + v1 = (uint32_t)(lo >> 24) & 0xFFFFFF; + v2 = (uint32_t)(lo >> 48) | (hi << 16); + + /* + * Sampled value is z, such that v0..v2 is lower than the first + * z elements of the table. + */ + z = 0; + for (u = 0; u < (sizeof dist) / sizeof(dist[0]); u += 3) { + uint32_t w0, w1, w2, cc; + + w0 = dist[u + 2]; + w1 = dist[u + 1]; + w2 = dist[u + 0]; + cc = (v0 - w0) >> 31; + cc = (v1 - w1 - cc) >> 31; + cc = (v2 - w2 - cc) >> 31; + z += (int)cc; + } + return z; + +} + +/* + * Sample a bit with probability exp(-x) for some x >= 0. + */ +static int +BerExp(prng *p, fpr x, fpr ccs) { + int s, i; + fpr r; + uint32_t sw, w; + uint64_t z; + + /* + * Reduce x modulo log(2): x = s*log(2) + r, with s an integer, + * and 0 <= r < log(2). Since x >= 0, we can use fpr_trunc(). + */ + s = (int)fpr_trunc(fpr_mul(x, fpr_inv_log2)); + r = fpr_sub(x, fpr_mul(fpr_of(s), fpr_log2)); + + /* + * It may happen (quite rarely) that s >= 64; if sigma = 1.2 + * (the minimum value for sigma), r = 0 and b = 1, then we get + * s >= 64 if the half-Gaussian produced a z >= 13, which happens + * with probability about 0.000000000230383991, which is + * approximatively equal to 2^(-32). In any case, if s >= 64, + * then BerExp will be non-zero with probability less than + * 2^(-64), so we can simply saturate s at 63. + */ + sw = (uint32_t)s; + sw ^= (sw ^ 63) & -((63 - sw) >> 31); + s = (int)sw; + + /* + * Compute exp(-r); we know that 0 <= r < log(2) at this point, so + * we can use fpr_expm_p63(), which yields a result scaled to 2^63. + * We scale it up to 2^64, then right-shift it by s bits because + * we really want exp(-x) = 2^(-s)*exp(-r). + * + * The "-1" operation makes sure that the value fits on 64 bits + * (i.e. if r = 0, we may get 2^64, and we prefer 2^64-1 in that + * case). The bias is negligible since fpr_expm_p63() only computes + * with 51 bits of precision or so. + */ + z = ((fpr_expm_p63(r, ccs) << 1) - 1) >> s; + + /* + * Sample a bit with probability exp(-x). Since x = s*log(2) + r, + * exp(-x) = 2^-s * exp(-r), we compare lazily exp(-x) with the + * PRNG output to limit its consumption, the sign of the difference + * yields the expected result. + */ + i = 64; + do { + i -= 8; + w = prng_get_u8(p) - ((uint32_t)(z >> i) & 0xFF); + } while (!w && i > 0); + return (int)(w >> 31); +} + +/* + * The sampler produces a random integer that follows a discrete Gaussian + * distribution, centered on mu, and with standard deviation sigma. The + * provided parameter isigma is equal to 1/sigma. + * + * The value of sigma MUST lie between 1 and 2 (i.e. isigma lies between + * 0.5 and 1); in Falcon, sigma should always be between 1.2 and 1.9. + */ +int +PQCLEAN_FALCON1024_CLEAN_sampler(void *ctx, fpr mu, fpr isigma) { + sampler_context *spc; + int s, z0, z, b; + fpr r, dss, ccs, x; + + spc = ctx; + + /* + * Center is mu. We compute mu = s + r where s is an integer + * and 0 <= r < 1. + */ + s = (int)fpr_floor(mu); + r = fpr_sub(mu, fpr_of(s)); + + /* + * dss = 1/(2*sigma^2) = 0.5*(isigma^2). + */ + dss = fpr_half(fpr_sqr(isigma)); + + /* + * ccs = sigma_min / sigma = sigma_min * isigma. + */ + ccs = fpr_mul(isigma, spc->sigma_min); + + /* + * We now need to sample on center r. + */ + for (;;) { + /* + * Sample z for a Gaussian distribution. Then get a + * random bit b to turn the sampling into a bimodal + * distribution: if b = 1, we use z+1, otherwise we + * use -z. We thus have two situations: + * + * - b = 1: z >= 1 and sampled against a Gaussian + * centered on 1. + * - b = 0: z <= 0 and sampled against a Gaussian + * centered on 0. + */ + z0 = PQCLEAN_FALCON1024_CLEAN_gaussian0_sampler(&spc->p); + b = (int)prng_get_u8(&spc->p) & 1; + z = b + ((b << 1) - 1) * z0; + + /* + * Rejection sampling. We want a Gaussian centered on r; + * but we sampled against a Gaussian centered on b (0 or + * 1). But we know that z is always in the range where + * our sampling distribution is greater than the Gaussian + * distribution, so rejection works. + * + * We got z with distribution: + * G(z) = exp(-((z-b)^2)/(2*sigma0^2)) + * We target distribution: + * S(z) = exp(-((z-r)^2)/(2*sigma^2)) + * Rejection sampling works by keeping the value z with + * probability S(z)/G(z), and starting again otherwise. + * This requires S(z) <= G(z), which is the case here. + * Thus, we simply need to keep our z with probability: + * P = exp(-x) + * where: + * x = ((z-r)^2)/(2*sigma^2) - ((z-b)^2)/(2*sigma0^2) + * + * Here, we scale up the Bernouilli distribution, which + * makes rejection more probable, but makes rejection + * rate sufficiently decorrelated from the Gaussian + * center and standard deviation that the whole sampler + * can be said to be constant-time. + */ + x = fpr_mul(fpr_sqr(fpr_sub(fpr_of(z), r)), dss); + x = fpr_sub(x, fpr_mul(fpr_of(z0 * z0), fpr_inv_2sqrsigma0)); + if (BerExp(&spc->p, x, ccs)) { + /* + * Rejection sampling was centered on r, but the + * actual center is mu = s + r. + */ + return s + z; + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_sign_tree(int16_t *sig, inner_shake256_context *rng, + const fpr *expanded_key, + const uint16_t *hm, unsigned logn, uint8_t *tmp) { + fpr *ftmp; + + ftmp = (fpr *)tmp; + for (;;) { + /* + * Signature produces short vectors s1 and s2. The + * signature is acceptable only if the aggregate vector + * s1,s2 is short; we must use the same bound as the + * verifier. + * + * If the signature is acceptable, then we return only s2 + * (the verifier recomputes s1 from s2, the hashed message, + * and the public key). + */ + sampler_context spc; + samplerZ samp; + void *samp_ctx; + + /* + * Normal sampling. We use a fast PRNG seeded from our + * SHAKE context ('rng'). + */ + if (logn == 10) { + spc.sigma_min = fpr_sigma_min_10; + } else { + spc.sigma_min = fpr_sigma_min_9; + } + PQCLEAN_FALCON1024_CLEAN_prng_init(&spc.p, rng); + samp = PQCLEAN_FALCON1024_CLEAN_sampler; + samp_ctx = &spc; + + /* + * Do the actual signature. + */ + if (do_sign_tree(samp, samp_ctx, sig, + expanded_key, hm, logn, ftmp)) { + break; + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_sign_dyn(int16_t *sig, inner_shake256_context *rng, + const int8_t *f, const int8_t *g, + const int8_t *F, const int8_t *G, + const uint16_t *hm, unsigned logn, uint8_t *tmp) { + fpr *ftmp; + + ftmp = (fpr *)tmp; + for (;;) { + /* + * Signature produces short vectors s1 and s2. The + * signature is acceptable only if the aggregate vector + * s1,s2 is short; we must use the same bound as the + * verifier. + * + * If the signature is acceptable, then we return only s2 + * (the verifier recomputes s1 from s2, the hashed message, + * and the public key). + */ + sampler_context spc; + samplerZ samp; + void *samp_ctx; + + /* + * Normal sampling. We use a fast PRNG seeded from our + * SHAKE context ('rng'). + */ + if (logn == 10) { + spc.sigma_min = fpr_sigma_min_10; + } else { + spc.sigma_min = fpr_sigma_min_9; + } + PQCLEAN_FALCON1024_CLEAN_prng_init(&spc.p, rng); + samp = PQCLEAN_FALCON1024_CLEAN_sampler; + samp_ctx = &spc; + + /* + * Do the actual signature. + */ + if (do_sign_dyn(samp, samp_ctx, sig, + f, g, F, G, hm, logn, ftmp)) { + break; + } + } +} diff --git a/crypto_sign/falcon/falcon-1024/clean/vrfy.c b/crypto_sign/falcon/falcon-1024/clean/vrfy.c new file mode 100644 index 00000000..93f2d526 --- /dev/null +++ b/crypto_sign/falcon/falcon-1024/clean/vrfy.c @@ -0,0 +1,853 @@ +#include "inner.h" + +/* + * Falcon signature verification. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +/* ===================================================================== */ +/* + * Constants for NTT. + * + * n = 2^logn (2 <= n <= 1024) + * phi = X^n + 1 + * q = 12289 + * q0i = -1/q mod 2^16 + * R = 2^16 mod q + * R2 = 2^32 mod q + */ + +#define Q 12289 +#define Q0I 12287 +#define R 4091 +#define R2 10952 + +/* + * Table for NTT, binary case: + * GMb[x] = R*(g^rev(x)) mod q + * where g = 7 (it is a 2048-th primitive root of 1 modulo q) + * and rev() is the bit-reversal function over 10 bits. + */ +static const uint16_t GMb[] = { + 4091, 7888, 11060, 11208, 6960, 4342, 6275, 9759, + 1591, 6399, 9477, 5266, 586, 5825, 7538, 9710, + 1134, 6407, 1711, 965, 7099, 7674, 3743, 6442, + 10414, 8100, 1885, 1688, 1364, 10329, 10164, 9180, + 12210, 6240, 997, 117, 4783, 4407, 1549, 7072, + 2829, 6458, 4431, 8877, 7144, 2564, 5664, 4042, + 12189, 432, 10751, 1237, 7610, 1534, 3983, 7863, + 2181, 6308, 8720, 6570, 4843, 1690, 14, 3872, + 5569, 9368, 12163, 2019, 7543, 2315, 4673, 7340, + 1553, 1156, 8401, 11389, 1020, 2967, 10772, 7045, + 3316, 11236, 5285, 11578, 10637, 10086, 9493, 6180, + 9277, 6130, 3323, 883, 10469, 489, 1502, 2851, + 11061, 9729, 2742, 12241, 4970, 10481, 10078, 1195, + 730, 1762, 3854, 2030, 5892, 10922, 9020, 5274, + 9179, 3604, 3782, 10206, 3180, 3467, 4668, 2446, + 7613, 9386, 834, 7703, 6836, 3403, 5351, 12276, + 3580, 1739, 10820, 9787, 10209, 4070, 12250, 8525, + 10401, 2749, 7338, 10574, 6040, 943, 9330, 1477, + 6865, 9668, 3585, 6633, 12145, 4063, 3684, 7680, + 8188, 6902, 3533, 9807, 6090, 727, 10099, 7003, + 6945, 1949, 9731, 10559, 6057, 378, 7871, 8763, + 8901, 9229, 8846, 4551, 9589, 11664, 7630, 8821, + 5680, 4956, 6251, 8388, 10156, 8723, 2341, 3159, + 1467, 5460, 8553, 7783, 2649, 2320, 9036, 6188, + 737, 3698, 4699, 5753, 9046, 3687, 16, 914, + 5186, 10531, 4552, 1964, 3509, 8436, 7516, 5381, + 10733, 3281, 7037, 1060, 2895, 7156, 8887, 5357, + 6409, 8197, 2962, 6375, 5064, 6634, 5625, 278, + 932, 10229, 8927, 7642, 351, 9298, 237, 5858, + 7692, 3146, 12126, 7586, 2053, 11285, 3802, 5204, + 4602, 1748, 11300, 340, 3711, 4614, 300, 10993, + 5070, 10049, 11616, 12247, 7421, 10707, 5746, 5654, + 3835, 5553, 1224, 8476, 9237, 3845, 250, 11209, + 4225, 6326, 9680, 12254, 4136, 2778, 692, 8808, + 6410, 6718, 10105, 10418, 3759, 7356, 11361, 8433, + 6437, 3652, 6342, 8978, 5391, 2272, 6476, 7416, + 8418, 10824, 11986, 5733, 876, 7030, 2167, 2436, + 3442, 9217, 8206, 4858, 5964, 2746, 7178, 1434, + 7389, 8879, 10661, 11457, 4220, 1432, 10832, 4328, + 8557, 1867, 9454, 2416, 3816, 9076, 686, 5393, + 2523, 4339, 6115, 619, 937, 2834, 7775, 3279, + 2363, 7488, 6112, 5056, 824, 10204, 11690, 1113, + 2727, 9848, 896, 2028, 5075, 2654, 10464, 7884, + 12169, 5434, 3070, 6400, 9132, 11672, 12153, 4520, + 1273, 9739, 11468, 9937, 10039, 9720, 2262, 9399, + 11192, 315, 4511, 1158, 6061, 6751, 11865, 357, + 7367, 4550, 983, 8534, 8352, 10126, 7530, 9253, + 4367, 5221, 3999, 8777, 3161, 6990, 4130, 11652, + 3374, 11477, 1753, 292, 8681, 2806, 10378, 12188, + 5800, 11811, 3181, 1988, 1024, 9340, 2477, 10928, + 4582, 6750, 3619, 5503, 5233, 2463, 8470, 7650, + 7964, 6395, 1071, 1272, 3474, 11045, 3291, 11344, + 8502, 9478, 9837, 1253, 1857, 6233, 4720, 11561, + 6034, 9817, 3339, 1797, 2879, 6242, 5200, 2114, + 7962, 9353, 11363, 5475, 6084, 9601, 4108, 7323, + 10438, 9471, 1271, 408, 6911, 3079, 360, 8276, + 11535, 9156, 9049, 11539, 850, 8617, 784, 7919, + 8334, 12170, 1846, 10213, 12184, 7827, 11903, 5600, + 9779, 1012, 721, 2784, 6676, 6552, 5348, 4424, + 6816, 8405, 9959, 5150, 2356, 5552, 5267, 1333, + 8801, 9661, 7308, 5788, 4910, 909, 11613, 4395, + 8238, 6686, 4302, 3044, 2285, 12249, 1963, 9216, + 4296, 11918, 695, 4371, 9793, 4884, 2411, 10230, + 2650, 841, 3890, 10231, 7248, 8505, 11196, 6688, + 4059, 6060, 3686, 4722, 11853, 5816, 7058, 6868, + 11137, 7926, 4894, 12284, 4102, 3908, 3610, 6525, + 7938, 7982, 11977, 6755, 537, 4562, 1623, 8227, + 11453, 7544, 906, 11816, 9548, 10858, 9703, 2815, + 11736, 6813, 6979, 819, 8903, 6271, 10843, 348, + 7514, 8339, 6439, 694, 852, 5659, 2781, 3716, + 11589, 3024, 1523, 8659, 4114, 10738, 3303, 5885, + 2978, 7289, 11884, 9123, 9323, 11830, 98, 2526, + 2116, 4131, 11407, 1844, 3645, 3916, 8133, 2224, + 10871, 8092, 9651, 5989, 7140, 8480, 1670, 159, + 10923, 4918, 128, 7312, 725, 9157, 5006, 6393, + 3494, 6043, 10972, 6181, 11838, 3423, 10514, 7668, + 3693, 6658, 6905, 11953, 10212, 11922, 9101, 8365, + 5110, 45, 2400, 1921, 4377, 2720, 1695, 51, + 2808, 650, 1896, 9997, 9971, 11980, 8098, 4833, + 4135, 4257, 5838, 4765, 10985, 11532, 590, 12198, + 482, 12173, 2006, 7064, 10018, 3912, 12016, 10519, + 11362, 6954, 2210, 284, 5413, 6601, 3865, 10339, + 11188, 6231, 517, 9564, 11281, 3863, 1210, 4604, + 8160, 11447, 153, 7204, 5763, 5089, 9248, 12154, + 11748, 1354, 6672, 179, 5532, 2646, 5941, 12185, + 862, 3158, 477, 7279, 5678, 7914, 4254, 302, + 2893, 10114, 6890, 9560, 9647, 11905, 4098, 9824, + 10269, 1353, 10715, 5325, 6254, 3951, 1807, 6449, + 5159, 1308, 8315, 3404, 1877, 1231, 112, 6398, + 11724, 12272, 7286, 1459, 12274, 9896, 3456, 800, + 1397, 10678, 103, 7420, 7976, 936, 764, 632, + 7996, 8223, 8445, 7758, 10870, 9571, 2508, 1946, + 6524, 10158, 1044, 4338, 2457, 3641, 1659, 4139, + 4688, 9733, 11148, 3946, 2082, 5261, 2036, 11850, + 7636, 12236, 5366, 2380, 1399, 7720, 2100, 3217, + 10912, 8898, 7578, 11995, 2791, 1215, 3355, 2711, + 2267, 2004, 8568, 10176, 3214, 2337, 1750, 4729, + 4997, 7415, 6315, 12044, 4374, 7157, 4844, 211, + 8003, 10159, 9290, 11481, 1735, 2336, 5793, 9875, + 8192, 986, 7527, 1401, 870, 3615, 8465, 2756, + 9770, 2034, 10168, 3264, 6132, 54, 2880, 4763, + 11805, 3074, 8286, 9428, 4881, 6933, 1090, 10038, + 2567, 708, 893, 6465, 4962, 10024, 2090, 5718, + 10743, 780, 4733, 4623, 2134, 2087, 4802, 884, + 5372, 5795, 5938, 4333, 6559, 7549, 5269, 10664, + 4252, 3260, 5917, 10814, 5768, 9983, 8096, 7791, + 6800, 7491, 6272, 1907, 10947, 6289, 11803, 6032, + 11449, 1171, 9201, 7933, 2479, 7970, 11337, 7062, + 8911, 6728, 6542, 8114, 8828, 6595, 3545, 4348, + 4610, 2205, 6999, 8106, 5560, 10390, 9321, 2499, + 2413, 7272, 6881, 10582, 9308, 9437, 3554, 3326, + 5991, 11969, 3415, 12283, 9838, 12063, 4332, 7830, + 11329, 6605, 12271, 2044, 11611, 7353, 11201, 11582, + 3733, 8943, 9978, 1627, 7168, 3935, 5050, 2762, + 7496, 10383, 755, 1654, 12053, 4952, 10134, 4394, + 6592, 7898, 7497, 8904, 12029, 3581, 10748, 5674, + 10358, 4901, 7414, 8771, 710, 6764, 8462, 7193, + 5371, 7274, 11084, 290, 7864, 6827, 11822, 2509, + 6578, 4026, 5807, 1458, 5721, 5762, 4178, 2105, + 11621, 4852, 8897, 2856, 11510, 9264, 2520, 8776, + 7011, 2647, 1898, 7039, 5950, 11163, 5488, 6277, + 9182, 11456, 633, 10046, 11554, 5633, 9587, 2333, + 7008, 7084, 5047, 7199, 9865, 8997, 569, 6390, + 10845, 9679, 8268, 11472, 4203, 1997, 2, 9331, + 162, 6182, 2000, 3649, 9792, 6363, 7557, 6187, + 8510, 9935, 5536, 9019, 3706, 12009, 1452, 3067, + 5494, 9692, 4865, 6019, 7106, 9610, 4588, 10165, + 6261, 5887, 2652, 10172, 1580, 10379, 4638, 9949 +}; + +/* + * Table for inverse NTT, binary case: + * iGMb[x] = R*((1/g)^rev(x)) mod q + * Since g = 7, 1/g = 8778 mod 12289. + */ +static const uint16_t iGMb[] = { + 4091, 4401, 1081, 1229, 2530, 6014, 7947, 5329, + 2579, 4751, 6464, 11703, 7023, 2812, 5890, 10698, + 3109, 2125, 1960, 10925, 10601, 10404, 4189, 1875, + 5847, 8546, 4615, 5190, 11324, 10578, 5882, 11155, + 8417, 12275, 10599, 7446, 5719, 3569, 5981, 10108, + 4426, 8306, 10755, 4679, 11052, 1538, 11857, 100, + 8247, 6625, 9725, 5145, 3412, 7858, 5831, 9460, + 5217, 10740, 7882, 7506, 12172, 11292, 6049, 79, + 13, 6938, 8886, 5453, 4586, 11455, 2903, 4676, + 9843, 7621, 8822, 9109, 2083, 8507, 8685, 3110, + 7015, 3269, 1367, 6397, 10259, 8435, 10527, 11559, + 11094, 2211, 1808, 7319, 48, 9547, 2560, 1228, + 9438, 10787, 11800, 1820, 11406, 8966, 6159, 3012, + 6109, 2796, 2203, 1652, 711, 7004, 1053, 8973, + 5244, 1517, 9322, 11269, 900, 3888, 11133, 10736, + 4949, 7616, 9974, 4746, 10270, 126, 2921, 6720, + 6635, 6543, 1582, 4868, 42, 673, 2240, 7219, + 1296, 11989, 7675, 8578, 11949, 989, 10541, 7687, + 7085, 8487, 1004, 10236, 4703, 163, 9143, 4597, + 6431, 12052, 2991, 11938, 4647, 3362, 2060, 11357, + 12011, 6664, 5655, 7225, 5914, 9327, 4092, 5880, + 6932, 3402, 5133, 9394, 11229, 5252, 9008, 1556, + 6908, 4773, 3853, 8780, 10325, 7737, 1758, 7103, + 11375, 12273, 8602, 3243, 6536, 7590, 8591, 11552, + 6101, 3253, 9969, 9640, 4506, 3736, 6829, 10822, + 9130, 9948, 3566, 2133, 3901, 6038, 7333, 6609, + 3468, 4659, 625, 2700, 7738, 3443, 3060, 3388, + 3526, 4418, 11911, 6232, 1730, 2558, 10340, 5344, + 5286, 2190, 11562, 6199, 2482, 8756, 5387, 4101, + 4609, 8605, 8226, 144, 5656, 8704, 2621, 5424, + 10812, 2959, 11346, 6249, 1715, 4951, 9540, 1888, + 3764, 39, 8219, 2080, 2502, 1469, 10550, 8709, + 5601, 1093, 3784, 5041, 2058, 8399, 11448, 9639, + 2059, 9878, 7405, 2496, 7918, 11594, 371, 7993, + 3073, 10326, 40, 10004, 9245, 7987, 5603, 4051, + 7894, 676, 11380, 7379, 6501, 4981, 2628, 3488, + 10956, 7022, 6737, 9933, 7139, 2330, 3884, 5473, + 7865, 6941, 5737, 5613, 9505, 11568, 11277, 2510, + 6689, 386, 4462, 105, 2076, 10443, 119, 3955, + 4370, 11505, 3672, 11439, 750, 3240, 3133, 754, + 4013, 11929, 9210, 5378, 11881, 11018, 2818, 1851, + 4966, 8181, 2688, 6205, 6814, 926, 2936, 4327, + 10175, 7089, 6047, 9410, 10492, 8950, 2472, 6255, + 728, 7569, 6056, 10432, 11036, 2452, 2811, 3787, + 945, 8998, 1244, 8815, 11017, 11218, 5894, 4325, + 4639, 3819, 9826, 7056, 6786, 8670, 5539, 7707, + 1361, 9812, 2949, 11265, 10301, 9108, 478, 6489, + 101, 1911, 9483, 3608, 11997, 10536, 812, 8915, + 637, 8159, 5299, 9128, 3512, 8290, 7068, 7922, + 3036, 4759, 2163, 3937, 3755, 11306, 7739, 4922, + 11932, 424, 5538, 6228, 11131, 7778, 11974, 1097, + 2890, 10027, 2569, 2250, 2352, 821, 2550, 11016, + 7769, 136, 617, 3157, 5889, 9219, 6855, 120, + 4405, 1825, 9635, 7214, 10261, 11393, 2441, 9562, + 11176, 599, 2085, 11465, 7233, 6177, 4801, 9926, + 9010, 4514, 9455, 11352, 11670, 6174, 7950, 9766, + 6896, 11603, 3213, 8473, 9873, 2835, 10422, 3732, + 7961, 1457, 10857, 8069, 832, 1628, 3410, 4900, + 10855, 5111, 9543, 6325, 7431, 4083, 3072, 8847, + 9853, 10122, 5259, 11413, 6556, 303, 1465, 3871, + 4873, 5813, 10017, 6898, 3311, 5947, 8637, 5852, + 3856, 928, 4933, 8530, 1871, 2184, 5571, 5879, + 3481, 11597, 9511, 8153, 35, 2609, 5963, 8064, + 1080, 12039, 8444, 3052, 3813, 11065, 6736, 8454, + 2340, 7651, 1910, 10709, 2117, 9637, 6402, 6028, + 2124, 7701, 2679, 5183, 6270, 7424, 2597, 6795, + 9222, 10837, 280, 8583, 3270, 6753, 2354, 3779, + 6102, 4732, 5926, 2497, 8640, 10289, 6107, 12127, + 2958, 12287, 10292, 8086, 817, 4021, 2610, 1444, + 5899, 11720, 3292, 2424, 5090, 7242, 5205, 5281, + 9956, 2702, 6656, 735, 2243, 11656, 833, 3107, + 6012, 6801, 1126, 6339, 5250, 10391, 9642, 5278, + 3513, 9769, 3025, 779, 9433, 3392, 7437, 668, + 10184, 8111, 6527, 6568, 10831, 6482, 8263, 5711, + 9780, 467, 5462, 4425, 11999, 1205, 5015, 6918, + 5096, 3827, 5525, 11579, 3518, 4875, 7388, 1931, + 6615, 1541, 8708, 260, 3385, 4792, 4391, 5697, + 7895, 2155, 7337, 236, 10635, 11534, 1906, 4793, + 9527, 7239, 8354, 5121, 10662, 2311, 3346, 8556, + 707, 1088, 4936, 678, 10245, 18, 5684, 960, + 4459, 7957, 226, 2451, 6, 8874, 320, 6298, + 8963, 8735, 2852, 2981, 1707, 5408, 5017, 9876, + 9790, 2968, 1899, 6729, 4183, 5290, 10084, 7679, + 7941, 8744, 5694, 3461, 4175, 5747, 5561, 3378, + 5227, 952, 4319, 9810, 4356, 3088, 11118, 840, + 6257, 486, 6000, 1342, 10382, 6017, 4798, 5489, + 4498, 4193, 2306, 6521, 1475, 6372, 9029, 8037, + 1625, 7020, 4740, 5730, 7956, 6351, 6494, 6917, + 11405, 7487, 10202, 10155, 7666, 7556, 11509, 1546, + 6571, 10199, 2265, 7327, 5824, 11396, 11581, 9722, + 2251, 11199, 5356, 7408, 2861, 4003, 9215, 484, + 7526, 9409, 12235, 6157, 9025, 2121, 10255, 2519, + 9533, 3824, 8674, 11419, 10888, 4762, 11303, 4097, + 2414, 6496, 9953, 10554, 808, 2999, 2130, 4286, + 12078, 7445, 5132, 7915, 245, 5974, 4874, 7292, + 7560, 10539, 9952, 9075, 2113, 3721, 10285, 10022, + 9578, 8934, 11074, 9498, 294, 4711, 3391, 1377, + 9072, 10189, 4569, 10890, 9909, 6923, 53, 4653, + 439, 10253, 7028, 10207, 8343, 1141, 2556, 7601, + 8150, 10630, 8648, 9832, 7951, 11245, 2131, 5765, + 10343, 9781, 2718, 1419, 4531, 3844, 4066, 4293, + 11657, 11525, 11353, 4313, 4869, 12186, 1611, 10892, + 11489, 8833, 2393, 15, 10830, 5003, 17, 565, + 5891, 12177, 11058, 10412, 8885, 3974, 10981, 7130, + 5840, 10482, 8338, 6035, 6964, 1574, 10936, 2020, + 2465, 8191, 384, 2642, 2729, 5399, 2175, 9396, + 11987, 8035, 4375, 6611, 5010, 11812, 9131, 11427, + 104, 6348, 9643, 6757, 12110, 5617, 10935, 541, + 135, 3041, 7200, 6526, 5085, 12136, 842, 4129, + 7685, 11079, 8426, 1008, 2725, 11772, 6058, 1101, + 1950, 8424, 5688, 6876, 12005, 10079, 5335, 927, + 1770, 273, 8377, 2271, 5225, 10283, 116, 11807, + 91, 11699, 757, 1304, 7524, 6451, 8032, 8154, + 7456, 4191, 309, 2318, 2292, 10393, 11639, 9481, + 12238, 10594, 9569, 7912, 10368, 9889, 12244, 7179, + 3924, 3188, 367, 2077, 336, 5384, 5631, 8596, + 4621, 1775, 8866, 451, 6108, 1317, 6246, 8795, + 5896, 7283, 3132, 11564, 4977, 12161, 7371, 1366, + 12130, 10619, 3809, 5149, 6300, 2638, 4197, 1418, + 10065, 4156, 8373, 8644, 10445, 882, 8158, 10173, + 9763, 12191, 459, 2966, 3166, 405, 5000, 9311, + 6404, 8986, 1551, 8175, 3630, 10766, 9265, 700, + 8573, 9508, 6630, 11437, 11595, 5850, 3950, 4775, + 11941, 1446, 6018, 3386, 11470, 5310, 5476, 553, + 9474, 2586, 1431, 2741, 473, 11383, 4745, 836, + 4062, 10666, 7727, 11752, 5534, 312, 4307, 4351, + 5764, 8679, 8381, 8187, 5, 7395, 4363, 1152, + 5421, 5231, 6473, 436, 7567, 8603, 6229, 8230 +}; + +/* + * Reduce a small signed integer modulo q. The source integer MUST + * be between -q/2 and +q/2. + */ +static inline uint32_t +mq_conv_small(int x) { + /* + * If x < 0, the cast to uint32_t will set the high bit to 1. + */ + uint32_t y; + + y = (uint32_t)x; + y += Q & -(y >> 31); + return y; +} + +/* + * Addition modulo q. Operands must be in the 0..q-1 range. + */ +static inline uint32_t +mq_add(uint32_t x, uint32_t y) { + /* + * We compute x + y - q. If the result is negative, then the + * high bit will be set, and 'd >> 31' will be equal to 1; + * thus '-(d >> 31)' will be an all-one pattern. Otherwise, + * it will be an all-zero pattern. In other words, this + * implements a conditional addition of q. + */ + uint32_t d; + + d = x + y - Q; + d += Q & -(d >> 31); + return d; +} + +/* + * Subtraction modulo q. Operands must be in the 0..q-1 range. + */ +static inline uint32_t +mq_sub(uint32_t x, uint32_t y) { + /* + * As in mq_add(), we use a conditional addition to ensure the + * result is in the 0..q-1 range. + */ + uint32_t d; + + d = x - y; + d += Q & -(d >> 31); + return d; +} + +/* + * Division by 2 modulo q. Operand must be in the 0..q-1 range. + */ +static inline uint32_t +mq_rshift1(uint32_t x) { + x += Q & -(x & 1); + return (x >> 1); +} + +/* + * Montgomery multiplication modulo q. If we set R = 2^16 mod q, then + * this function computes: x * y / R mod q + * Operands must be in the 0..q-1 range. + */ +static inline uint32_t +mq_montymul(uint32_t x, uint32_t y) { + uint32_t z, w; + + /* + * We compute x*y + k*q with a value of k chosen so that the 16 + * low bits of the result are 0. We can then shift the value. + * After the shift, result may still be larger than q, but it + * will be lower than 2*q, so a conditional subtraction works. + */ + + z = x * y; + w = ((z * Q0I) & 0xFFFF) * Q; + + /* + * When adding z and w, the result will have its low 16 bits + * equal to 0. Since x, y and z are lower than q, the sum will + * be no more than (2^15 - 1) * q + (q - 1)^2, which will + * fit on 29 bits. + */ + z = (z + w) >> 16; + + /* + * After the shift, analysis shows that the value will be less + * than 2q. We do a subtraction then conditional subtraction to + * ensure the result is in the expected range. + */ + z -= Q; + z += Q & -(z >> 31); + return z; +} + +/* + * Montgomery squaring (computes (x^2)/R). + */ +static inline uint32_t +mq_montysqr(uint32_t x) { + return mq_montymul(x, x); +} + +/* + * Divide x by y modulo q = 12289. + */ +static inline uint32_t +mq_div_12289(uint32_t x, uint32_t y) { + /* + * We invert y by computing y^(q-2) mod q. + * + * We use the following addition chain for exponent e = 12287: + * + * e0 = 1 + * e1 = 2 * e0 = 2 + * e2 = e1 + e0 = 3 + * e3 = e2 + e1 = 5 + * e4 = 2 * e3 = 10 + * e5 = 2 * e4 = 20 + * e6 = 2 * e5 = 40 + * e7 = 2 * e6 = 80 + * e8 = 2 * e7 = 160 + * e9 = e8 + e2 = 163 + * e10 = e9 + e8 = 323 + * e11 = 2 * e10 = 646 + * e12 = 2 * e11 = 1292 + * e13 = e12 + e9 = 1455 + * e14 = 2 * e13 = 2910 + * e15 = 2 * e14 = 5820 + * e16 = e15 + e10 = 6143 + * e17 = 2 * e16 = 12286 + * e18 = e17 + e0 = 12287 + * + * Additions on exponents are converted to Montgomery + * multiplications. We define all intermediate results as so + * many local variables, and let the C compiler work out which + * must be kept around. + */ + uint32_t y0, y1, y2, y3, y4, y5, y6, y7, y8, y9; + uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18; + + y0 = mq_montymul(y, R2); + y1 = mq_montysqr(y0); + y2 = mq_montymul(y1, y0); + y3 = mq_montymul(y2, y1); + y4 = mq_montysqr(y3); + y5 = mq_montysqr(y4); + y6 = mq_montysqr(y5); + y7 = mq_montysqr(y6); + y8 = mq_montysqr(y7); + y9 = mq_montymul(y8, y2); + y10 = mq_montymul(y9, y8); + y11 = mq_montysqr(y10); + y12 = mq_montysqr(y11); + y13 = mq_montymul(y12, y9); + y14 = mq_montysqr(y13); + y15 = mq_montysqr(y14); + y16 = mq_montymul(y15, y10); + y17 = mq_montysqr(y16); + y18 = mq_montymul(y17, y0); + + /* + * Final multiplication with x, which is not in Montgomery + * representation, computes the correct division result. + */ + return mq_montymul(y18, x); +} + +/* + * Compute NTT on a ring element. + */ +static void +mq_NTT(uint16_t *a, unsigned logn) { + size_t n, t, m; + + n = (size_t)1 << logn; + t = n; + for (m = 1; m < n; m <<= 1) { + size_t ht, i, j1; + + ht = t >> 1; + for (i = 0, j1 = 0; i < m; i ++, j1 += t) { + size_t j, j2; + uint32_t s; + + s = GMb[m + i]; + j2 = j1 + ht; + for (j = j1; j < j2; j ++) { + uint32_t u, v; + + u = a[j]; + v = mq_montymul(a[j + ht], s); + a[j] = (uint16_t)mq_add(u, v); + a[j + ht] = (uint16_t)mq_sub(u, v); + } + } + t = ht; + } +} + +/* + * Compute the inverse NTT on a ring element, binary case. + */ +static void +mq_iNTT(uint16_t *a, unsigned logn) { + size_t n, t, m; + uint32_t ni; + + n = (size_t)1 << logn; + t = 1; + m = n; + while (m > 1) { + size_t hm, dt, i, j1; + + hm = m >> 1; + dt = t << 1; + for (i = 0, j1 = 0; i < hm; i ++, j1 += dt) { + size_t j, j2; + uint32_t s; + + j2 = j1 + t; + s = iGMb[hm + i]; + for (j = j1; j < j2; j ++) { + uint32_t u, v, w; + + u = a[j]; + v = a[j + t]; + a[j] = (uint16_t)mq_add(u, v); + w = mq_sub(u, v); + a[j + t] = (uint16_t) + mq_montymul(w, s); + } + } + t = dt; + m = hm; + } + + /* + * To complete the inverse NTT, we must now divide all values by + * n (the vector size). We thus need the inverse of n, i.e. we + * need to divide 1 by 2 logn times. But we also want it in + * Montgomery representation, i.e. we also want to multiply it + * by R = 2^16. In the common case, this should be a simple right + * shift. The loop below is generic and works also in corner cases; + * its computation time is negligible. + */ + ni = R; + for (m = n; m > 1; m >>= 1) { + ni = mq_rshift1(ni); + } + for (m = 0; m < n; m ++) { + a[m] = (uint16_t)mq_montymul(a[m], ni); + } +} + +/* + * Convert a polynomial (mod q) to Montgomery representation. + */ +static void +mq_poly_tomonty(uint16_t *f, unsigned logn) { + size_t u, n; + + n = (size_t)1 << logn; + for (u = 0; u < n; u ++) { + f[u] = (uint16_t)mq_montymul(f[u], R2); + } +} + +/* + * Multiply two polynomials together (NTT representation, and using + * a Montgomery multiplication). Result f*g is written over f. + */ +static void +mq_poly_montymul_ntt(uint16_t *f, const uint16_t *g, unsigned logn) { + size_t u, n; + + n = (size_t)1 << logn; + for (u = 0; u < n; u ++) { + f[u] = (uint16_t)mq_montymul(f[u], g[u]); + } +} + +/* + * Subtract polynomial g from polynomial f. + */ +static void +mq_poly_sub(uint16_t *f, const uint16_t *g, unsigned logn) { + size_t u, n; + + n = (size_t)1 << logn; + for (u = 0; u < n; u ++) { + f[u] = (uint16_t)mq_sub(f[u], g[u]); + } +} + +/* ===================================================================== */ + +/* see inner.h */ +void +PQCLEAN_FALCON1024_CLEAN_to_ntt_monty(uint16_t *h, unsigned logn) { + mq_NTT(h, logn); + mq_poly_tomonty(h, logn); +} + +/* see inner.h */ +int +PQCLEAN_FALCON1024_CLEAN_verify_raw(const uint16_t *c0, const int16_t *s2, + const uint16_t *h, unsigned logn, uint8_t *tmp) { + size_t u, n; + uint16_t *tt; + + n = (size_t)1 << logn; + tt = (uint16_t *)tmp; + + /* + * Reduce s2 elements modulo q ([0..q-1] range). + */ + for (u = 0; u < n; u ++) { + uint32_t w; + + w = (uint32_t)s2[u]; + w += Q & -(w >> 31); + tt[u] = (uint16_t)w; + } + + /* + * Compute -s1 = s2*h - c0 mod phi mod q (in tt[]). + */ + mq_NTT(tt, logn); + mq_poly_montymul_ntt(tt, h, logn); + mq_iNTT(tt, logn); + mq_poly_sub(tt, c0, logn); + + /* + * Normalize -s1 elements into the [-q/2..q/2] range. + */ + for (u = 0; u < n; u ++) { + int32_t w; + + w = (int32_t)tt[u]; + w -= (int32_t)(Q & -(((Q >> 1) - (uint32_t)w) >> 31)); + ((int16_t *)tt)[u] = (int16_t)w; + } + + /* + * Signature is valid if and only if the aggregate (-s1,s2) vector + * is short enough. + */ + return PQCLEAN_FALCON1024_CLEAN_is_short((int16_t *)tt, s2, logn); +} + +/* see inner.h */ +int +PQCLEAN_FALCON1024_CLEAN_compute_public(uint16_t *h, + const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp) { + size_t u, n; + uint16_t *tt; + + n = (size_t)1 << logn; + tt = (uint16_t *)tmp; + for (u = 0; u < n; u ++) { + tt[u] = (uint16_t)mq_conv_small(f[u]); + h[u] = (uint16_t)mq_conv_small(g[u]); + } + mq_NTT(h, logn); + mq_NTT(tt, logn); + for (u = 0; u < n; u ++) { + if (tt[u] == 0) { + return 0; + } + h[u] = (uint16_t)mq_div_12289(h[u], tt[u]); + } + mq_iNTT(h, logn); + return 1; +} + +/* see inner.h */ +int +PQCLEAN_FALCON1024_CLEAN_complete_private(int8_t *G, + const int8_t *f, const int8_t *g, const int8_t *F, + unsigned logn, uint8_t *tmp) { + size_t u, n; + uint16_t *t1, *t2; + + n = (size_t)1 << logn; + t1 = (uint16_t *)tmp; + t2 = t1 + n; + for (u = 0; u < n; u ++) { + t1[u] = (uint16_t)mq_conv_small(g[u]); + t2[u] = (uint16_t)mq_conv_small(F[u]); + } + mq_NTT(t1, logn); + mq_NTT(t2, logn); + mq_poly_tomonty(t1, logn); + mq_poly_montymul_ntt(t1, t2, logn); + for (u = 0; u < n; u ++) { + t2[u] = (uint16_t)mq_conv_small(f[u]); + } + mq_NTT(t2, logn); + for (u = 0; u < n; u ++) { + if (t2[u] == 0) { + return 0; + } + t1[u] = (uint16_t)mq_div_12289(t1[u], t2[u]); + } + mq_iNTT(t1, logn); + for (u = 0; u < n; u ++) { + uint32_t w; + int32_t gi; + + w = t1[u]; + w -= (Q & ~ -((w - (Q >> 1)) >> 31)); + gi = *(int32_t *)&w; + if (gi < -127 || gi > +127) { + return 0; + } + G[u] = (int8_t)gi; + } + return 1; +} + +/* see inner.h */ +int +PQCLEAN_FALCON1024_CLEAN_is_invertible( + const int16_t *s2, unsigned logn, uint8_t *tmp) { + size_t u, n; + uint16_t *tt; + uint32_t r; + + n = (size_t)1 << logn; + tt = (uint16_t *)tmp; + for (u = 0; u < n; u ++) { + uint32_t w; + + w = (uint32_t)s2[u]; + w += Q & -(w >> 31); + tt[u] = (uint16_t)w; + } + mq_NTT(tt, logn); + r = 0; + for (u = 0; u < n; u ++) { + r |= (uint32_t)(tt[u] - 1); + } + return (int)(1u - (r >> 31)); +} + +/* see inner.h */ +int +PQCLEAN_FALCON1024_CLEAN_verify_recover(uint16_t *h, + const uint16_t *c0, const int16_t *s1, const int16_t *s2, + unsigned logn, uint8_t *tmp) { + size_t u, n; + uint16_t *tt; + uint32_t r; + + n = (size_t)1 << logn; + + /* + * Reduce elements of s1 and s2 modulo q; then write s2 into tt[] + * and c0 - s1 into h[]. + */ + tt = (uint16_t *)tmp; + for (u = 0; u < n; u ++) { + uint32_t w; + + w = (uint32_t)s2[u]; + w += Q & -(w >> 31); + tt[u] = (uint16_t)w; + + w = (uint32_t)s1[u]; + w += Q & -(w >> 31); + w = mq_sub(c0[u], w); + h[u] = (uint16_t)w; + } + + /* + * Compute h = (c0 - s1) / s2. If one of the coefficients of s2 + * is zero (in NTT representation) then the operation fails. We + * keep that information into a flag so that we do not deviate + * from strict constant-time processing; if all coefficients of + * s2 are non-zero, then the high bit of r will be zero. + */ + mq_NTT(tt, logn); + mq_NTT(h, logn); + r = 0; + for (u = 0; u < n; u ++) { + r |= (uint32_t)(tt[u] - 1); + h[u] = (uint16_t)mq_div_12289(h[u], tt[u]); + } + mq_iNTT(h, logn); + + /* + * Signature is acceptable if and only if it is short enough, + * and s2 was invertible mod phi mod q. The caller must still + * check that the rebuilt public key matches the expected + * value (e.g. through a hash). + */ + r = ~r & (uint32_t) - PQCLEAN_FALCON1024_CLEAN_is_short(s1, s2, logn); + return (int)(r >> 31); +} + +/* see inner.h */ +int +PQCLEAN_FALCON1024_CLEAN_count_nttzero(const int16_t *sig, unsigned logn, uint8_t *tmp) { + uint16_t *s2; + size_t u, n; + uint32_t r; + + n = (size_t)1 << logn; + s2 = (uint16_t *)tmp; + for (u = 0; u < n; u ++) { + uint32_t w; + + w = (uint32_t)sig[u]; + w += Q & -(w >> 31); + s2[u] = (uint16_t)w; + } + mq_NTT(s2, logn); + r = 0; + for (u = 0; u < n; u ++) { + uint32_t w; + + w = (uint32_t)s2[u] - 1u; + r += (w >> 31); + } + return (int)r; +} diff --git a/crypto_sign/falcon/falcon-512/META.yml b/crypto_sign/falcon/falcon-512/META.yml new file mode 100644 index 00000000..eee16206 --- /dev/null +++ b/crypto_sign/falcon/falcon-512/META.yml @@ -0,0 +1,32 @@ +name: Falcon-512 +type: signature +claimed-nist-level: 1 +length-public-key: 897 +length-secret-key: 1281 +length-signature: 690 +nistkat-sha256: e9c3985f1ce732e29ca81aeca091f20d4dbb5beb456ee1a7ab41d04add4dab10 +testvectors-sha256: 036b5e803ab825146502513b7460b24cc9493f8e366323cd5e30e2dc6d4ca6a7 +principal-submitters: + - Thomas Prest +auxiliary-submitters: + - Pierre-Alain Fouque + - Jeffrey Hoffstein + - Paul Kirchner + - Vadim Lyubashevsky + - Thomas Pornin + - Thomas Ricosset + - Gregor Seiler + - William Whyte + - Zhenfei Zhang +implementations: + - name: clean + version: supercop-20201018 via https://github.com/jschanck/package-pqclean/tree/78831f03/falcon + - name: avx2 + version: supercop-20201018 via https://github.com/jschanck/package-pqclean/tree/78831f03/falcon + supported_platforms: + - architecture: x86_64 + operating_systems: + - Linux + - Darwin + required_flags: + - avx2 diff --git a/crypto_sign/falcon/falcon-512/avx2/LICENSE b/crypto_sign/falcon/falcon-512/avx2/LICENSE new file mode 100644 index 00000000..12c7b56c --- /dev/null +++ b/crypto_sign/falcon/falcon-512/avx2/LICENSE @@ -0,0 +1,24 @@ +\ +MIT License + +Copyright (c) 2017-2019 Falcon Project + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/crypto_sign/falcon/falcon-512/avx2/api.h b/crypto_sign/falcon/falcon-512/avx2/api.h new file mode 100644 index 00000000..fd8f55cd --- /dev/null +++ b/crypto_sign/falcon/falcon-512/avx2/api.h @@ -0,0 +1,80 @@ +#ifndef PQCLEAN_FALCON512_AVX2_API_H +#define PQCLEAN_FALCON512_AVX2_API_H + +#include +#include + +#define PQCLEAN_FALCON512_AVX2_CRYPTO_SECRETKEYBYTES 1281 +#define PQCLEAN_FALCON512_AVX2_CRYPTO_PUBLICKEYBYTES 897 +#define PQCLEAN_FALCON512_AVX2_CRYPTO_BYTES 690 + +#define PQCLEAN_FALCON512_AVX2_CRYPTO_ALGNAME "Falcon-512" + +/* + * Generate a new key pair. Public key goes into pk[], private key in sk[]. + * Key sizes are exact (in bytes): + * public (pk): PQCLEAN_FALCON512_AVX2_CRYPTO_PUBLICKEYBYTES + * private (sk): PQCLEAN_FALCON512_AVX2_CRYPTO_SECRETKEYBYTES + * + * Return value: 0 on success, -1 on error. + */ +int PQCLEAN_FALCON512_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/* + * Compute a signature on a provided message (m, mlen), with a given + * private key (sk). Signature is written in sig[], with length written + * into *siglen. Signature length is variable; maximum signature length + * (in bytes) is PQCLEAN_FALCON512_AVX2_CRYPTO_BYTES. + * + * sig[], m[] and sk[] may overlap each other arbitrarily. + * + * Return value: 0 on success, -1 on error. + */ +int PQCLEAN_FALCON512_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/* + * Verify a signature (sig, siglen) on a message (m, mlen) with a given + * public key (pk). + * + * sig[], m[] and pk[] may overlap each other arbitrarily. + * + * Return value: 0 on success, -1 on error. + */ +int PQCLEAN_FALCON512_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/* + * Compute a signature on a message and pack the signature and message + * into a single object, written into sm[]. The length of that output is + * written in *smlen; that length may be larger than the message length + * (mlen) by up to PQCLEAN_FALCON512_AVX2_CRYPTO_BYTES. + * + * sm[] and m[] may overlap each other arbitrarily; however, sm[] shall + * not overlap with sk[]. + * + * Return value: 0 on success, -1 on error. + */ +int PQCLEAN_FALCON512_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/* + * Open a signed message object (sm, smlen) and verify the signature; + * on success, the message itself is written into m[] and its length + * into *mlen. The message is shorter than the signed message object, + * but the size difference depends on the signature value; the difference + * may range up to PQCLEAN_FALCON512_AVX2_CRYPTO_BYTES. + * + * m[], sm[] and pk[] may overlap each other arbitrarily. + * + * Return value: 0 on success, -1 on error. + */ +int PQCLEAN_FALCON512_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/falcon/falcon-512/avx2/codec.c b/crypto_sign/falcon/falcon-512/avx2/codec.c new file mode 100644 index 00000000..8b64ed5b --- /dev/null +++ b/crypto_sign/falcon/falcon-512/avx2/codec.c @@ -0,0 +1,555 @@ +#include "inner.h" + +/* + * Encoding/decoding of keys and signatures. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +/* see inner.h */ +size_t +PQCLEAN_FALCON512_AVX2_modq_encode( + void *out, size_t max_out_len, + const uint16_t *x, unsigned logn) { + size_t n, out_len, u; + uint8_t *buf; + uint32_t acc; + int acc_len; + + n = (size_t)1 << logn; + for (u = 0; u < n; u ++) { + if (x[u] >= 12289) { + return 0; + } + } + out_len = ((n * 14) + 7) >> 3; + if (out == NULL) { + return out_len; + } + if (out_len > max_out_len) { + return 0; + } + buf = out; + acc = 0; + acc_len = 0; + for (u = 0; u < n; u ++) { + acc = (acc << 14) | x[u]; + acc_len += 14; + while (acc_len >= 8) { + acc_len -= 8; + *buf ++ = (uint8_t)(acc >> acc_len); + } + } + if (acc_len > 0) { + *buf = (uint8_t)(acc << (8 - acc_len)); + } + return out_len; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON512_AVX2_modq_decode( + uint16_t *x, unsigned logn, + const void *in, size_t max_in_len) { + size_t n, in_len, u; + const uint8_t *buf; + uint32_t acc; + int acc_len; + + n = (size_t)1 << logn; + in_len = ((n * 14) + 7) >> 3; + if (in_len > max_in_len) { + return 0; + } + buf = in; + acc = 0; + acc_len = 0; + u = 0; + while (u < n) { + acc = (acc << 8) | (*buf ++); + acc_len += 8; + if (acc_len >= 14) { + unsigned w; + + acc_len -= 14; + w = (acc >> acc_len) & 0x3FFF; + if (w >= 12289) { + return 0; + } + x[u ++] = (uint16_t)w; + } + } + if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { + return 0; + } + return in_len; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON512_AVX2_trim_i16_encode( + void *out, size_t max_out_len, + const int16_t *x, unsigned logn, unsigned bits) { + size_t n, u, out_len; + int minv, maxv; + uint8_t *buf; + uint32_t acc, mask; + unsigned acc_len; + + n = (size_t)1 << logn; + maxv = (1 << (bits - 1)) - 1; + minv = -maxv; + for (u = 0; u < n; u ++) { + if (x[u] < minv || x[u] > maxv) { + return 0; + } + } + out_len = ((n * bits) + 7) >> 3; + if (out == NULL) { + return out_len; + } + if (out_len > max_out_len) { + return 0; + } + buf = out; + acc = 0; + acc_len = 0; + mask = ((uint32_t)1 << bits) - 1; + for (u = 0; u < n; u ++) { + acc = (acc << bits) | ((uint16_t)x[u] & mask); + acc_len += bits; + while (acc_len >= 8) { + acc_len -= 8; + *buf ++ = (uint8_t)(acc >> acc_len); + } + } + if (acc_len > 0) { + *buf ++ = (uint8_t)(acc << (8 - acc_len)); + } + return out_len; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON512_AVX2_trim_i16_decode( + int16_t *x, unsigned logn, unsigned bits, + const void *in, size_t max_in_len) { + size_t n, in_len; + const uint8_t *buf; + size_t u; + uint32_t acc, mask1, mask2; + unsigned acc_len; + + n = (size_t)1 << logn; + in_len = ((n * bits) + 7) >> 3; + if (in_len > max_in_len) { + return 0; + } + buf = in; + u = 0; + acc = 0; + acc_len = 0; + mask1 = ((uint32_t)1 << bits) - 1; + mask2 = (uint32_t)1 << (bits - 1); + while (u < n) { + acc = (acc << 8) | *buf ++; + acc_len += 8; + while (acc_len >= bits && u < n) { + uint32_t w; + + acc_len -= bits; + w = (acc >> acc_len) & mask1; + w |= -(w & mask2); + if (w == -mask2) { + /* + * The -2^(bits-1) value is forbidden. + */ + return 0; + } + w |= -(w & mask2); + x[u ++] = (int16_t) * (int32_t *)&w; + } + } + if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { + /* + * Extra bits in the last byte must be zero. + */ + return 0; + } + return in_len; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON512_AVX2_trim_i8_encode( + void *out, size_t max_out_len, + const int8_t *x, unsigned logn, unsigned bits) { + size_t n, u, out_len; + int minv, maxv; + uint8_t *buf; + uint32_t acc, mask; + unsigned acc_len; + + n = (size_t)1 << logn; + maxv = (1 << (bits - 1)) - 1; + minv = -maxv; + for (u = 0; u < n; u ++) { + if (x[u] < minv || x[u] > maxv) { + return 0; + } + } + out_len = ((n * bits) + 7) >> 3; + if (out == NULL) { + return out_len; + } + if (out_len > max_out_len) { + return 0; + } + buf = out; + acc = 0; + acc_len = 0; + mask = ((uint32_t)1 << bits) - 1; + for (u = 0; u < n; u ++) { + acc = (acc << bits) | ((uint8_t)x[u] & mask); + acc_len += bits; + while (acc_len >= 8) { + acc_len -= 8; + *buf ++ = (uint8_t)(acc >> acc_len); + } + } + if (acc_len > 0) { + *buf ++ = (uint8_t)(acc << (8 - acc_len)); + } + return out_len; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON512_AVX2_trim_i8_decode( + int8_t *x, unsigned logn, unsigned bits, + const void *in, size_t max_in_len) { + size_t n, in_len; + const uint8_t *buf; + size_t u; + uint32_t acc, mask1, mask2; + unsigned acc_len; + + n = (size_t)1 << logn; + in_len = ((n * bits) + 7) >> 3; + if (in_len > max_in_len) { + return 0; + } + buf = in; + u = 0; + acc = 0; + acc_len = 0; + mask1 = ((uint32_t)1 << bits) - 1; + mask2 = (uint32_t)1 << (bits - 1); + while (u < n) { + acc = (acc << 8) | *buf ++; + acc_len += 8; + while (acc_len >= bits && u < n) { + uint32_t w; + + acc_len -= bits; + w = (acc >> acc_len) & mask1; + w |= -(w & mask2); + if (w == -mask2) { + /* + * The -2^(bits-1) value is forbidden. + */ + return 0; + } + x[u ++] = (int8_t) * (int32_t *)&w; + } + } + if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { + /* + * Extra bits in the last byte must be zero. + */ + return 0; + } + return in_len; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON512_AVX2_comp_encode( + void *out, size_t max_out_len, + const int16_t *x, unsigned logn) { + uint8_t *buf; + size_t n, u, v; + uint32_t acc; + unsigned acc_len; + + n = (size_t)1 << logn; + buf = out; + + /* + * Make sure that all values are within the -2047..+2047 range. + */ + for (u = 0; u < n; u ++) { + if (x[u] < -2047 || x[u] > +2047) { + return 0; + } + } + + acc = 0; + acc_len = 0; + v = 0; + for (u = 0; u < n; u ++) { + int t; + unsigned w; + + /* + * Get sign and absolute value of next integer; push the + * sign bit. + */ + acc <<= 1; + t = x[u]; + if (t < 0) { + t = -t; + acc |= 1; + } + w = (unsigned)t; + + /* + * Push the low 7 bits of the absolute value. + */ + acc <<= 7; + acc |= w & 127u; + w >>= 7; + + /* + * We pushed exactly 8 bits. + */ + acc_len += 8; + + /* + * Push as many zeros as necessary, then a one. Since the + * absolute value is at most 2047, w can only range up to + * 15 at this point, thus we will add at most 16 bits + * here. With the 8 bits above and possibly up to 7 bits + * from previous iterations, we may go up to 31 bits, which + * will fit in the accumulator, which is an uint32_t. + */ + acc <<= (w + 1); + acc |= 1; + acc_len += w + 1; + + /* + * Produce all full bytes. + */ + while (acc_len >= 8) { + acc_len -= 8; + if (buf != NULL) { + if (v >= max_out_len) { + return 0; + } + buf[v] = (uint8_t)(acc >> acc_len); + } + v ++; + } + } + + /* + * Flush remaining bits (if any). + */ + if (acc_len > 0) { + if (buf != NULL) { + if (v >= max_out_len) { + return 0; + } + buf[v] = (uint8_t)(acc << (8 - acc_len)); + } + v ++; + } + + return v; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON512_AVX2_comp_decode( + int16_t *x, unsigned logn, + const void *in, size_t max_in_len) { + const uint8_t *buf; + size_t n, u, v; + uint32_t acc; + unsigned acc_len; + + n = (size_t)1 << logn; + buf = in; + acc = 0; + acc_len = 0; + v = 0; + for (u = 0; u < n; u ++) { + unsigned b, s, m; + + /* + * Get next eight bits: sign and low seven bits of the + * absolute value. + */ + if (v >= max_in_len) { + return 0; + } + acc = (acc << 8) | (uint32_t)buf[v ++]; + b = acc >> acc_len; + s = b & 128; + m = b & 127; + + /* + * Get next bits until a 1 is reached. + */ + for (;;) { + if (acc_len == 0) { + if (v >= max_in_len) { + return 0; + } + acc = (acc << 8) | (uint32_t)buf[v ++]; + acc_len = 8; + } + acc_len --; + if (((acc >> acc_len) & 1) != 0) { + break; + } + m += 128; + if (m > 2047) { + return 0; + } + } + x[u] = (int16_t) m; + if (s) { + x[u] = (int16_t) - x[u]; + } + } + return v; +} + +/* + * Key elements and signatures are polynomials with small integer + * coefficients. Here are some statistics gathered over many + * generated key pairs (10000 or more for each degree): + * + * log(n) n max(f,g) std(f,g) max(F,G) std(F,G) + * 1 2 129 56.31 143 60.02 + * 2 4 123 40.93 160 46.52 + * 3 8 97 28.97 159 38.01 + * 4 16 100 21.48 154 32.50 + * 5 32 71 15.41 151 29.36 + * 6 64 59 11.07 138 27.77 + * 7 128 39 7.91 144 27.00 + * 8 256 32 5.63 148 26.61 + * 9 512 22 4.00 137 26.46 + * 10 1024 15 2.84 146 26.41 + * + * We want a compact storage format for private key, and, as part of + * key generation, we are allowed to reject some keys which would + * otherwise be fine (this does not induce any noticeable vulnerability + * as long as we reject only a small proportion of possible keys). + * Hence, we enforce at key generation time maximum values for the + * elements of f, g, F and G, so that their encoding can be expressed + * in fixed-width values. Limits have been chosen so that generated + * keys are almost always within bounds, thus not impacting neither + * security or performance. + * + * IMPORTANT: the code assumes that all coefficients of f, g, F and G + * ultimately fit in the -127..+127 range. Thus, none of the elements + * of max_fg_bits[] and max_FG_bits[] shall be greater than 8. + */ + +const uint8_t PQCLEAN_FALCON512_AVX2_max_fg_bits[] = { + 0, /* unused */ + 8, + 8, + 8, + 8, + 8, + 7, + 7, + 6, + 6, + 5 +}; + +const uint8_t PQCLEAN_FALCON512_AVX2_max_FG_bits[] = { + 0, /* unused */ + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8 +}; + +/* + * When generating a new key pair, we can always reject keys which + * feature an abnormally large coefficient. This can also be done for + * signatures, albeit with some care: in case the signature process is + * used in a derandomized setup (explicitly seeded with the message and + * private key), we have to follow the specification faithfully, and the + * specification only enforces a limit on the L2 norm of the signature + * vector. The limit on the L2 norm implies that the absolute value of + * a coefficient of the signature cannot be more than the following: + * + * log(n) n max sig coeff (theoretical) + * 1 2 412 + * 2 4 583 + * 3 8 824 + * 4 16 1166 + * 5 32 1649 + * 6 64 2332 + * 7 128 3299 + * 8 256 4665 + * 9 512 6598 + * 10 1024 9331 + * + * However, the largest observed signature coefficients during our + * experiments was 1077 (in absolute value), hence we can assume that, + * with overwhelming probability, signature coefficients will fit + * in -2047..2047, i.e. 12 bits. + */ + +const uint8_t PQCLEAN_FALCON512_AVX2_max_sig_bits[] = { + 0, /* unused */ + 10, + 11, + 11, + 12, + 12, + 12, + 12, + 12, + 12, + 12 +}; diff --git a/crypto_sign/falcon/falcon-512/avx2/common.c b/crypto_sign/falcon/falcon-512/avx2/common.c new file mode 100644 index 00000000..7c19e7db --- /dev/null +++ b/crypto_sign/falcon/falcon-512/avx2/common.c @@ -0,0 +1,294 @@ +#include "inner.h" + +/* + * Support functions for signatures (hash-to-point, norm). + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_hash_to_point_vartime( + inner_shake256_context *sc, + uint16_t *x, unsigned logn) { + /* + * This is the straightforward per-the-spec implementation. It + * is not constant-time, thus it might reveal information on the + * plaintext (at least, enough to check the plaintext against a + * list of potential plaintexts) in a scenario where the + * attacker does not have access to the signature value or to + * the public key, but knows the nonce (without knowledge of the + * nonce, the hashed output cannot be matched against potential + * plaintexts). + */ + size_t n; + + n = (size_t)1 << logn; + while (n > 0) { + uint8_t buf[2]; + uint32_t w; + + inner_shake256_extract(sc, (void *)buf, sizeof buf); + w = ((unsigned)buf[0] << 8) | (unsigned)buf[1]; + if (w < 61445) { + while (w >= 12289) { + w -= 12289; + } + *x ++ = (uint16_t)w; + n --; + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_hash_to_point_ct( + inner_shake256_context *sc, + uint16_t *x, unsigned logn, uint8_t *tmp) { + /* + * Each 16-bit sample is a value in 0..65535. The value is + * kept if it falls in 0..61444 (because 61445 = 5*12289) + * and rejected otherwise; thus, each sample has probability + * about 0.93758 of being selected. + * + * We want to oversample enough to be sure that we will + * have enough values with probability at least 1 - 2^(-256). + * Depending on degree N, this leads to the following + * required oversampling: + * + * logn n oversampling + * 1 2 65 + * 2 4 67 + * 3 8 71 + * 4 16 77 + * 5 32 86 + * 6 64 100 + * 7 128 122 + * 8 256 154 + * 9 512 205 + * 10 1024 287 + * + * If logn >= 7, then the provided temporary buffer is large + * enough. Otherwise, we use a stack buffer of 63 entries + * (i.e. 126 bytes) for the values that do not fit in tmp[]. + */ + + static const uint16_t overtab[] = { + 0, /* unused */ + 65, + 67, + 71, + 77, + 86, + 100, + 122, + 154, + 205, + 287 + }; + + unsigned n, n2, u, m, p, over; + uint16_t *tt1, tt2[63]; + + /* + * We first generate m 16-bit value. Values 0..n-1 go to x[]. + * Values n..2*n-1 go to tt1[]. Values 2*n and later go to tt2[]. + * We also reduce modulo q the values; rejected values are set + * to 0xFFFF. + */ + n = 1U << logn; + n2 = n << 1; + over = overtab[logn]; + m = n + over; + tt1 = (uint16_t *)tmp; + for (u = 0; u < m; u ++) { + uint8_t buf[2]; + uint32_t w, wr; + + inner_shake256_extract(sc, buf, sizeof buf); + w = ((uint32_t)buf[0] << 8) | (uint32_t)buf[1]; + wr = w - ((uint32_t)24578 & (((w - 24578) >> 31) - 1)); + wr = wr - ((uint32_t)24578 & (((wr - 24578) >> 31) - 1)); + wr = wr - ((uint32_t)12289 & (((wr - 12289) >> 31) - 1)); + wr |= ((w - 61445) >> 31) - 1; + if (u < n) { + x[u] = (uint16_t)wr; + } else if (u < n2) { + tt1[u - n] = (uint16_t)wr; + } else { + tt2[u - n2] = (uint16_t)wr; + } + } + + /* + * Now we must "squeeze out" the invalid values. We do this in + * a logarithmic sequence of passes; each pass computes where a + * value should go, and moves it down by 'p' slots if necessary, + * where 'p' uses an increasing powers-of-two scale. It can be + * shown that in all cases where the loop decides that a value + * has to be moved down by p slots, the destination slot is + * "free" (i.e. contains an invalid value). + */ + for (p = 1; p <= over; p <<= 1) { + unsigned v; + + /* + * In the loop below: + * + * - v contains the index of the final destination of + * the value; it is recomputed dynamically based on + * whether values are valid or not. + * + * - u is the index of the value we consider ("source"); + * its address is s. + * + * - The loop may swap the value with the one at index + * u-p. The address of the swap destination is d. + */ + v = 0; + for (u = 0; u < m; u ++) { + uint16_t *s, *d; + unsigned j, sv, dv, mk; + + if (u < n) { + s = &x[u]; + } else if (u < n2) { + s = &tt1[u - n]; + } else { + s = &tt2[u - n2]; + } + sv = *s; + + /* + * The value in sv should ultimately go to + * address v, i.e. jump back by u-v slots. + */ + j = u - v; + + /* + * We increment v for the next iteration, but + * only if the source value is valid. The mask + * 'mk' is -1 if the value is valid, 0 otherwise, + * so we _subtract_ mk. + */ + mk = (sv >> 15) - 1U; + v -= mk; + + /* + * In this loop we consider jumps by p slots; if + * u < p then there is nothing more to do. + */ + if (u < p) { + continue; + } + + /* + * Destination for the swap: value at address u-p. + */ + if ((u - p) < n) { + d = &x[u - p]; + } else if ((u - p) < n2) { + d = &tt1[(u - p) - n]; + } else { + d = &tt2[(u - p) - n2]; + } + dv = *d; + + /* + * The swap should be performed only if the source + * is valid AND the jump j has its 'p' bit set. + */ + mk &= -(((j & p) + 0x1FF) >> 9); + + *s = (uint16_t)(sv ^ (mk & (sv ^ dv))); + *d = (uint16_t)(dv ^ (mk & (sv ^ dv))); + } + } +} + +/* see inner.h */ +int +PQCLEAN_FALCON512_AVX2_is_short( + const int16_t *s1, const int16_t *s2, unsigned logn) { + /* + * We use the l2-norm. Code below uses only 32-bit operations to + * compute the square of the norm with saturation to 2^32-1 if + * the value exceeds 2^31-1. + */ + size_t n, u; + uint32_t s, ng; + + n = (size_t)1 << logn; + s = 0; + ng = 0; + for (u = 0; u < n; u ++) { + int32_t z; + + z = s1[u]; + s += (uint32_t)(z * z); + ng |= s; + z = s2[u]; + s += (uint32_t)(z * z); + ng |= s; + } + s |= -(ng >> 31); + + /* + * Acceptance bound on the l2-norm is: + * 1.2*1.55*sqrt(q)*sqrt(2*N) + * Value 7085 is floor((1.2^2)*(1.55^2)*2*1024). + */ + return s < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn)); +} + +/* see inner.h */ +int +PQCLEAN_FALCON512_AVX2_is_short_half( + uint32_t sqn, const int16_t *s2, unsigned logn) { + size_t n, u; + uint32_t ng; + + n = (size_t)1 << logn; + ng = -(sqn >> 31); + for (u = 0; u < n; u ++) { + int32_t z; + + z = s2[u]; + sqn += (uint32_t)(z * z); + ng |= sqn; + } + sqn |= -(ng >> 31); + + /* + * Acceptance bound on the l2-norm is: + * 1.2*1.55*sqrt(q)*sqrt(2*N) + * Value 7085 is floor((1.2^2)*(1.55^2)*2*1024). + */ + return sqn < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn)); +} diff --git a/crypto_sign/falcon/falcon-512/avx2/fft.c b/crypto_sign/falcon/falcon-512/avx2/fft.c new file mode 100644 index 00000000..e92af44b --- /dev/null +++ b/crypto_sign/falcon/falcon-512/avx2/fft.c @@ -0,0 +1,1109 @@ +#include "inner.h" + +/* + * FFT code. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +/* + * Rules for complex number macros: + * -------------------------------- + * + * Operand order is: destination, source1, source2... + * + * Each operand is a real and an imaginary part. + * + * All overlaps are allowed. + */ + +/* + * Addition of two complex numbers (d = a + b). + */ +#define FPC_ADD(d_re, d_im, a_re, a_im, b_re, b_im) do { \ + fpr fpct_re, fpct_im; \ + fpct_re = fpr_add(a_re, b_re); \ + fpct_im = fpr_add(a_im, b_im); \ + (d_re) = fpct_re; \ + (d_im) = fpct_im; \ + } while (0) + +/* + * Subtraction of two complex numbers (d = a - b). + */ +#define FPC_SUB(d_re, d_im, a_re, a_im, b_re, b_im) do { \ + fpr fpct_re, fpct_im; \ + fpct_re = fpr_sub(a_re, b_re); \ + fpct_im = fpr_sub(a_im, b_im); \ + (d_re) = fpct_re; \ + (d_im) = fpct_im; \ + } while (0) + +/* + * Multplication of two complex numbers (d = a * b). + */ +#define FPC_MUL(d_re, d_im, a_re, a_im, b_re, b_im) do { \ + fpr fpct_a_re, fpct_a_im; \ + fpr fpct_b_re, fpct_b_im; \ + fpr fpct_d_re, fpct_d_im; \ + fpct_a_re = (a_re); \ + fpct_a_im = (a_im); \ + fpct_b_re = (b_re); \ + fpct_b_im = (b_im); \ + fpct_d_re = fpr_sub( \ + fpr_mul(fpct_a_re, fpct_b_re), \ + fpr_mul(fpct_a_im, fpct_b_im)); \ + fpct_d_im = fpr_add( \ + fpr_mul(fpct_a_re, fpct_b_im), \ + fpr_mul(fpct_a_im, fpct_b_re)); \ + (d_re) = fpct_d_re; \ + (d_im) = fpct_d_im; \ + } while (0) + +/* + * Squaring of a complex number (d = a * a). + */ +#define FPC_SQR(d_re, d_im, a_re, a_im) do { \ + fpr fpct_a_re, fpct_a_im; \ + fpr fpct_d_re, fpct_d_im; \ + fpct_a_re = (a_re); \ + fpct_a_im = (a_im); \ + fpct_d_re = fpr_sub(fpr_sqr(fpct_a_re), fpr_sqr(fpct_a_im)); \ + fpct_d_im = fpr_double(fpr_mul(fpct_a_re, fpct_a_im)); \ + (d_re) = fpct_d_re; \ + (d_im) = fpct_d_im; \ + } while (0) + +/* + * Inversion of a complex number (d = 1 / a). + */ +#define FPC_INV(d_re, d_im, a_re, a_im) do { \ + fpr fpct_a_re, fpct_a_im; \ + fpr fpct_d_re, fpct_d_im; \ + fpr fpct_m; \ + fpct_a_re = (a_re); \ + fpct_a_im = (a_im); \ + fpct_m = fpr_add(fpr_sqr(fpct_a_re), fpr_sqr(fpct_a_im)); \ + fpct_m = fpr_inv(fpct_m); \ + fpct_d_re = fpr_mul(fpct_a_re, fpct_m); \ + fpct_d_im = fpr_mul(fpr_neg(fpct_a_im), fpct_m); \ + (d_re) = fpct_d_re; \ + (d_im) = fpct_d_im; \ + } while (0) + +/* + * Division of complex numbers (d = a / b). + */ +#define FPC_DIV(d_re, d_im, a_re, a_im, b_re, b_im) do { \ + fpr fpct_a_re, fpct_a_im; \ + fpr fpct_b_re, fpct_b_im; \ + fpr fpct_d_re, fpct_d_im; \ + fpr fpct_m; \ + fpct_a_re = (a_re); \ + fpct_a_im = (a_im); \ + fpct_b_re = (b_re); \ + fpct_b_im = (b_im); \ + fpct_m = fpr_add(fpr_sqr(fpct_b_re), fpr_sqr(fpct_b_im)); \ + fpct_m = fpr_inv(fpct_m); \ + fpct_b_re = fpr_mul(fpct_b_re, fpct_m); \ + fpct_b_im = fpr_mul(fpr_neg(fpct_b_im), fpct_m); \ + fpct_d_re = fpr_sub( \ + fpr_mul(fpct_a_re, fpct_b_re), \ + fpr_mul(fpct_a_im, fpct_b_im)); \ + fpct_d_im = fpr_add( \ + fpr_mul(fpct_a_re, fpct_b_im), \ + fpr_mul(fpct_a_im, fpct_b_re)); \ + (d_re) = fpct_d_re; \ + (d_im) = fpct_d_im; \ + } while (0) + +/* + * Let w = exp(i*pi/N); w is a primitive 2N-th root of 1. We define the + * values w_j = w^(2j+1) for all j from 0 to N-1: these are the roots + * of X^N+1 in the field of complex numbers. A crucial property is that + * w_{N-1-j} = conj(w_j) = 1/w_j for all j. + * + * FFT representation of a polynomial f (taken modulo X^N+1) is the + * set of values f(w_j). Since f is real, conj(f(w_j)) = f(conj(w_j)), + * thus f(w_{N-1-j}) = conj(f(w_j)). We thus store only half the values, + * for j = 0 to N/2-1; the other half can be recomputed easily when (if) + * needed. A consequence is that FFT representation has the same size + * as normal representation: N/2 complex numbers use N real numbers (each + * complex number is the combination of a real and an imaginary part). + * + * We use a specific ordering which makes computations easier. Let rev() + * be the bit-reversal function over log(N) bits. For j in 0..N/2-1, we + * store the real and imaginary parts of f(w_j) in slots: + * + * Re(f(w_j)) -> slot rev(j)/2 + * Im(f(w_j)) -> slot rev(j)/2+N/2 + * + * (Note that rev(j) is even for j < N/2.) + */ + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_FFT(fpr *f, unsigned logn) { + /* + * FFT algorithm in bit-reversal order uses the following + * iterative algorithm: + * + * t = N + * for m = 1; m < N; m *= 2: + * ht = t/2 + * for i1 = 0; i1 < m; i1 ++: + * j1 = i1 * t + * s = GM[m + i1] + * for j = j1; j < (j1 + ht); j ++: + * x = f[j] + * y = s * f[j + ht] + * f[j] = x + y + * f[j + ht] = x - y + * t = ht + * + * GM[k] contains w^rev(k) for primitive root w = exp(i*pi/N). + * + * In the description above, f[] is supposed to contain complex + * numbers. In our in-memory representation, the real and + * imaginary parts of f[k] are in array slots k and k+N/2. + * + * We only keep the first half of the complex numbers. We can + * see that after the first iteration, the first and second halves + * of the array of complex numbers have separate lives, so we + * simply ignore the second part. + */ + + unsigned u; + size_t t, n, hn, m; + + /* + * First iteration: compute f[j] + i * f[j+N/2] for all j < N/2 + * (because GM[1] = w^rev(1) = w^(N/2) = i). + * In our chosen representation, this is a no-op: everything is + * already where it should be. + */ + + /* + * Subsequent iterations are truncated to use only the first + * half of values. + */ + n = (size_t)1 << logn; + hn = n >> 1; + t = hn; + for (u = 1, m = 2; u < logn; u ++, m <<= 1) { + size_t ht, hm, i1, j1; + + ht = t >> 1; + hm = m >> 1; + for (i1 = 0, j1 = 0; i1 < hm; i1 ++, j1 += t) { + size_t j, j2; + + j2 = j1 + ht; + if (ht >= 4) { + __m256d s_re, s_im; + + s_re = _mm256_set1_pd( + fpr_gm_tab[((m + i1) << 1) + 0].v); + s_im = _mm256_set1_pd( + fpr_gm_tab[((m + i1) << 1) + 1].v); + for (j = j1; j < j2; j += 4) { + __m256d x_re, x_im, y_re, y_im; + __m256d z_re, z_im; + + x_re = _mm256_loadu_pd(&f[j].v); + x_im = _mm256_loadu_pd(&f[j + hn].v); + z_re = _mm256_loadu_pd(&f[j + ht].v); + z_im = _mm256_loadu_pd(&f[j + ht + hn].v); + y_re = FMSUB(z_re, s_re, + _mm256_mul_pd(z_im, s_im)); + y_im = FMADD(z_re, s_im, + _mm256_mul_pd(z_im, s_re)); + _mm256_storeu_pd(&f[j].v, + _mm256_add_pd(x_re, y_re)); + _mm256_storeu_pd(&f[j + hn].v, + _mm256_add_pd(x_im, y_im)); + _mm256_storeu_pd(&f[j + ht].v, + _mm256_sub_pd(x_re, y_re)); + _mm256_storeu_pd(&f[j + ht + hn].v, + _mm256_sub_pd(x_im, y_im)); + } + } else { + fpr s_re, s_im; + + s_re = fpr_gm_tab[((m + i1) << 1) + 0]; + s_im = fpr_gm_tab[((m + i1) << 1) + 1]; + for (j = j1; j < j2; j ++) { + fpr x_re, x_im, y_re, y_im; + + x_re = f[j]; + x_im = f[j + hn]; + y_re = f[j + ht]; + y_im = f[j + ht + hn]; + FPC_MUL(y_re, y_im, + y_re, y_im, s_re, s_im); + FPC_ADD(f[j], f[j + hn], + x_re, x_im, y_re, y_im); + FPC_SUB(f[j + ht], f[j + ht + hn], + x_re, x_im, y_re, y_im); + } + } + } + t = ht; + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_iFFT(fpr *f, unsigned logn) { + /* + * Inverse FFT algorithm in bit-reversal order uses the following + * iterative algorithm: + * + * t = 1 + * for m = N; m > 1; m /= 2: + * hm = m/2 + * dt = t*2 + * for i1 = 0; i1 < hm; i1 ++: + * j1 = i1 * dt + * s = iGM[hm + i1] + * for j = j1; j < (j1 + t); j ++: + * x = f[j] + * y = f[j + t] + * f[j] = x + y + * f[j + t] = s * (x - y) + * t = dt + * for i1 = 0; i1 < N; i1 ++: + * f[i1] = f[i1] / N + * + * iGM[k] contains (1/w)^rev(k) for primitive root w = exp(i*pi/N) + * (actually, iGM[k] = 1/GM[k] = conj(GM[k])). + * + * In the main loop (not counting the final division loop), in + * all iterations except the last, the first and second half of f[] + * (as an array of complex numbers) are separate. In our chosen + * representation, we do not keep the second half. + * + * The last iteration recombines the recomputed half with the + * implicit half, and should yield only real numbers since the + * target polynomial is real; moreover, s = i at that step. + * Thus, when considering x and y: + * y = conj(x) since the final f[j] must be real + * Therefore, f[j] is filled with 2*Re(x), and f[j + t] is + * filled with 2*Im(x). + * But we already have Re(x) and Im(x) in array slots j and j+t + * in our chosen representation. That last iteration is thus a + * simple doubling of the values in all the array. + * + * We make the last iteration a no-op by tweaking the final + * division into a division by N/2, not N. + */ + size_t u, n, hn, t, m; + + n = (size_t)1 << logn; + t = 1; + m = n; + hn = n >> 1; + for (u = logn; u > 1; u --) { + size_t hm, dt, i1, j1; + + hm = m >> 1; + dt = t << 1; + for (i1 = 0, j1 = 0; j1 < hn; i1 ++, j1 += dt) { + size_t j, j2; + + j2 = j1 + t; + if (t >= 4) { + __m256d s_re, s_im; + + s_re = _mm256_set1_pd( + fpr_gm_tab[((hm + i1) << 1) + 0].v); + s_im = _mm256_set1_pd( + fpr_gm_tab[((hm + i1) << 1) + 1].v); + for (j = j1; j < j2; j += 4) { + __m256d x_re, x_im, y_re, y_im; + __m256d z_re, z_im; + + x_re = _mm256_loadu_pd(&f[j].v); + x_im = _mm256_loadu_pd(&f[j + hn].v); + y_re = _mm256_loadu_pd(&f[j + t].v); + y_im = _mm256_loadu_pd(&f[j + t + hn].v); + _mm256_storeu_pd(&f[j].v, + _mm256_add_pd(x_re, y_re)); + _mm256_storeu_pd(&f[j + hn].v, + _mm256_add_pd(x_im, y_im)); + x_re = _mm256_sub_pd(y_re, x_re); + x_im = _mm256_sub_pd(x_im, y_im); + z_re = FMSUB(x_im, s_im, + _mm256_mul_pd(x_re, s_re)); + z_im = FMADD(x_re, s_im, + _mm256_mul_pd(x_im, s_re)); + _mm256_storeu_pd(&f[j + t].v, z_re); + _mm256_storeu_pd(&f[j + t + hn].v, z_im); + } + } else { + fpr s_re, s_im; + + s_re = fpr_gm_tab[((hm + i1) << 1) + 0]; + s_im = fpr_neg(fpr_gm_tab[((hm + i1) << 1) + 1]); + for (j = j1; j < j2; j ++) { + fpr x_re, x_im, y_re, y_im; + + x_re = f[j]; + x_im = f[j + hn]; + y_re = f[j + t]; + y_im = f[j + t + hn]; + FPC_ADD(f[j], f[j + hn], + x_re, x_im, y_re, y_im); + FPC_SUB(x_re, x_im, + x_re, x_im, y_re, y_im); + FPC_MUL(f[j + t], f[j + t + hn], + x_re, x_im, s_re, s_im); + } + } + } + t = dt; + m = hm; + } + + /* + * Last iteration is a no-op, provided that we divide by N/2 + * instead of N. We need to make a special case for logn = 0. + */ + if (logn > 0) { + fpr ni; + + ni = fpr_p2_tab[logn]; + for (u = 0; u < n; u ++) { + f[u] = fpr_mul(f[u], ni); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_poly_add( + fpr *a, const fpr *b, unsigned logn) { + size_t n, u; + + n = (size_t)1 << logn; + if (n >= 4) { + for (u = 0; u < n; u += 4) { + _mm256_storeu_pd(&a[u].v, + _mm256_add_pd( + _mm256_loadu_pd(&a[u].v), + _mm256_loadu_pd(&b[u].v))); + } + } else { + for (u = 0; u < n; u ++) { + a[u] = fpr_add(a[u], b[u]); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_poly_sub( + fpr *a, const fpr *b, unsigned logn) { + size_t n, u; + + n = (size_t)1 << logn; + if (n >= 4) { + for (u = 0; u < n; u += 4) { + _mm256_storeu_pd(&a[u].v, + _mm256_sub_pd( + _mm256_loadu_pd(&a[u].v), + _mm256_loadu_pd(&b[u].v))); + } + } else { + for (u = 0; u < n; u ++) { + a[u] = fpr_sub(a[u], b[u]); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_poly_neg(fpr *a, unsigned logn) { + size_t n, u; + + n = (size_t)1 << logn; + if (n >= 4) { + __m256d s; + + s = _mm256_set1_pd(-0.0); + for (u = 0; u < n; u += 4) { + _mm256_storeu_pd(&a[u].v, + _mm256_xor_pd(_mm256_loadu_pd(&a[u].v), s)); + } + } else { + for (u = 0; u < n; u ++) { + a[u] = fpr_neg(a[u]); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_poly_adj_fft(fpr *a, unsigned logn) { + size_t n, u; + + n = (size_t)1 << logn; + if (n >= 8) { + __m256d s; + + s = _mm256_set1_pd(-0.0); + for (u = (n >> 1); u < n; u += 4) { + _mm256_storeu_pd(&a[u].v, + _mm256_xor_pd(_mm256_loadu_pd(&a[u].v), s)); + } + } else { + for (u = (n >> 1); u < n; u ++) { + a[u] = fpr_neg(a[u]); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_poly_mul_fft( + fpr *a, const fpr *b, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + if (n >= 8) { + for (u = 0; u < hn; u += 4) { + __m256d a_re, a_im, b_re, b_im, c_re, c_im; + + a_re = _mm256_loadu_pd(&a[u].v); + a_im = _mm256_loadu_pd(&a[u + hn].v); + b_re = _mm256_loadu_pd(&b[u].v); + b_im = _mm256_loadu_pd(&b[u + hn].v); + c_re = FMSUB( + a_re, b_re, _mm256_mul_pd(a_im, b_im)); + c_im = FMADD( + a_re, b_im, _mm256_mul_pd(a_im, b_re)); + _mm256_storeu_pd(&a[u].v, c_re); + _mm256_storeu_pd(&a[u + hn].v, c_im); + } + } else { + for (u = 0; u < hn; u ++) { + fpr a_re, a_im, b_re, b_im; + + a_re = a[u]; + a_im = a[u + hn]; + b_re = b[u]; + b_im = b[u + hn]; + FPC_MUL(a[u], a[u + hn], a_re, a_im, b_re, b_im); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_poly_muladj_fft( + fpr *a, const fpr *b, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + if (n >= 8) { + for (u = 0; u < hn; u += 4) { + __m256d a_re, a_im, b_re, b_im, c_re, c_im; + + a_re = _mm256_loadu_pd(&a[u].v); + a_im = _mm256_loadu_pd(&a[u + hn].v); + b_re = _mm256_loadu_pd(&b[u].v); + b_im = _mm256_loadu_pd(&b[u + hn].v); + c_re = FMADD( + a_re, b_re, _mm256_mul_pd(a_im, b_im)); + c_im = FMSUB( + a_im, b_re, _mm256_mul_pd(a_re, b_im)); + _mm256_storeu_pd(&a[u].v, c_re); + _mm256_storeu_pd(&a[u + hn].v, c_im); + } + } else { + for (u = 0; u < hn; u ++) { + fpr a_re, a_im, b_re, b_im; + + a_re = a[u]; + a_im = a[u + hn]; + b_re = b[u]; + b_im = fpr_neg(b[u + hn]); + FPC_MUL(a[u], a[u + hn], a_re, a_im, b_re, b_im); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_poly_mulselfadj_fft(fpr *a, unsigned logn) { + /* + * Since each coefficient is multiplied with its own conjugate, + * the result contains only real values. + */ + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + if (n >= 8) { + __m256d zero; + + zero = _mm256_setzero_pd(); + for (u = 0; u < hn; u += 4) { + __m256d a_re, a_im; + + a_re = _mm256_loadu_pd(&a[u].v); + a_im = _mm256_loadu_pd(&a[u + hn].v); + _mm256_storeu_pd(&a[u].v, + FMADD(a_re, a_re, + _mm256_mul_pd(a_im, a_im))); + _mm256_storeu_pd(&a[u + hn].v, zero); + } + } else { + for (u = 0; u < hn; u ++) { + fpr a_re, a_im; + + a_re = a[u]; + a_im = a[u + hn]; + a[u] = fpr_add(fpr_sqr(a_re), fpr_sqr(a_im)); + a[u + hn] = fpr_zero; + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_poly_mulconst(fpr *a, fpr x, unsigned logn) { + size_t n, u; + + n = (size_t)1 << logn; + if (n >= 4) { + __m256d x4; + + x4 = _mm256_set1_pd(x.v); + for (u = 0; u < n; u += 4) { + _mm256_storeu_pd(&a[u].v, + _mm256_mul_pd(x4, _mm256_loadu_pd(&a[u].v))); + } + } else { + for (u = 0; u < n; u ++) { + a[u] = fpr_mul(a[u], x); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_poly_div_fft( + fpr *a, const fpr *b, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + if (n >= 8) { + __m256d one; + + one = _mm256_set1_pd(1.0); + for (u = 0; u < hn; u += 4) { + __m256d a_re, a_im, b_re, b_im, c_re, c_im, t; + + a_re = _mm256_loadu_pd(&a[u].v); + a_im = _mm256_loadu_pd(&a[u + hn].v); + b_re = _mm256_loadu_pd(&b[u].v); + b_im = _mm256_loadu_pd(&b[u + hn].v); + t = _mm256_div_pd(one, + FMADD(b_re, b_re, + _mm256_mul_pd(b_im, b_im))); + b_re = _mm256_mul_pd(b_re, t); + b_im = _mm256_mul_pd(b_im, t); + c_re = FMADD( + a_re, b_re, _mm256_mul_pd(a_im, b_im)); + c_im = FMSUB( + a_im, b_re, _mm256_mul_pd(a_re, b_im)); + _mm256_storeu_pd(&a[u].v, c_re); + _mm256_storeu_pd(&a[u + hn].v, c_im); + } + } else { + for (u = 0; u < hn; u ++) { + fpr a_re, a_im, b_re, b_im; + + a_re = a[u]; + a_im = a[u + hn]; + b_re = b[u]; + b_im = b[u + hn]; + FPC_DIV(a[u], a[u + hn], a_re, a_im, b_re, b_im); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_poly_invnorm2_fft(fpr *d, + const fpr *a, const fpr *b, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + if (n >= 8) { + __m256d one; + + one = _mm256_set1_pd(1.0); + for (u = 0; u < hn; u += 4) { + __m256d a_re, a_im, b_re, b_im, dv; + + a_re = _mm256_loadu_pd(&a[u].v); + a_im = _mm256_loadu_pd(&a[u + hn].v); + b_re = _mm256_loadu_pd(&b[u].v); + b_im = _mm256_loadu_pd(&b[u + hn].v); + dv = _mm256_div_pd(one, + _mm256_add_pd( + FMADD(a_re, a_re, + _mm256_mul_pd(a_im, a_im)), + FMADD(b_re, b_re, + _mm256_mul_pd(b_im, b_im)))); + _mm256_storeu_pd(&d[u].v, dv); + } + } else { + for (u = 0; u < hn; u ++) { + fpr a_re, a_im; + fpr b_re, b_im; + + a_re = a[u]; + a_im = a[u + hn]; + b_re = b[u]; + b_im = b[u + hn]; + d[u] = fpr_inv(fpr_add( + fpr_add(fpr_sqr(a_re), fpr_sqr(a_im)), + fpr_add(fpr_sqr(b_re), fpr_sqr(b_im)))); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_poly_add_muladj_fft(fpr *d, + const fpr *F, const fpr *G, + const fpr *f, const fpr *g, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + if (n >= 8) { + for (u = 0; u < hn; u += 4) { + __m256d F_re, F_im, G_re, G_im; + __m256d f_re, f_im, g_re, g_im; + __m256d a_re, a_im, b_re, b_im; + + F_re = _mm256_loadu_pd(&F[u].v); + F_im = _mm256_loadu_pd(&F[u + hn].v); + G_re = _mm256_loadu_pd(&G[u].v); + G_im = _mm256_loadu_pd(&G[u + hn].v); + f_re = _mm256_loadu_pd(&f[u].v); + f_im = _mm256_loadu_pd(&f[u + hn].v); + g_re = _mm256_loadu_pd(&g[u].v); + g_im = _mm256_loadu_pd(&g[u + hn].v); + + a_re = FMADD(F_re, f_re, + _mm256_mul_pd(F_im, f_im)); + a_im = FMSUB(F_im, f_re, + _mm256_mul_pd(F_re, f_im)); + b_re = FMADD(G_re, g_re, + _mm256_mul_pd(G_im, g_im)); + b_im = FMSUB(G_im, g_re, + _mm256_mul_pd(G_re, g_im)); + _mm256_storeu_pd(&d[u].v, + _mm256_add_pd(a_re, b_re)); + _mm256_storeu_pd(&d[u + hn].v, + _mm256_add_pd(a_im, b_im)); + } + } else { + for (u = 0; u < hn; u ++) { + fpr F_re, F_im, G_re, G_im; + fpr f_re, f_im, g_re, g_im; + fpr a_re, a_im, b_re, b_im; + + F_re = F[u]; + F_im = F[u + hn]; + G_re = G[u]; + G_im = G[u + hn]; + f_re = f[u]; + f_im = f[u + hn]; + g_re = g[u]; + g_im = g[u + hn]; + + FPC_MUL(a_re, a_im, F_re, F_im, f_re, fpr_neg(f_im)); + FPC_MUL(b_re, b_im, G_re, G_im, g_re, fpr_neg(g_im)); + d[u] = fpr_add(a_re, b_re); + d[u + hn] = fpr_add(a_im, b_im); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_poly_mul_autoadj_fft( + fpr *a, const fpr *b, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + if (n >= 8) { + for (u = 0; u < hn; u += 4) { + __m256d a_re, a_im, bv; + + a_re = _mm256_loadu_pd(&a[u].v); + a_im = _mm256_loadu_pd(&a[u + hn].v); + bv = _mm256_loadu_pd(&b[u].v); + _mm256_storeu_pd(&a[u].v, + _mm256_mul_pd(a_re, bv)); + _mm256_storeu_pd(&a[u + hn].v, + _mm256_mul_pd(a_im, bv)); + } + } else { + for (u = 0; u < hn; u ++) { + a[u] = fpr_mul(a[u], b[u]); + a[u + hn] = fpr_mul(a[u + hn], b[u]); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_poly_div_autoadj_fft( + fpr *a, const fpr *b, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + if (n >= 8) { + __m256d one; + + one = _mm256_set1_pd(1.0); + for (u = 0; u < hn; u += 4) { + __m256d ib, a_re, a_im; + + ib = _mm256_div_pd(one, _mm256_loadu_pd(&b[u].v)); + a_re = _mm256_loadu_pd(&a[u].v); + a_im = _mm256_loadu_pd(&a[u + hn].v); + _mm256_storeu_pd(&a[u].v, _mm256_mul_pd(a_re, ib)); + _mm256_storeu_pd(&a[u + hn].v, _mm256_mul_pd(a_im, ib)); + } + } else { + for (u = 0; u < hn; u ++) { + fpr ib; + + ib = fpr_inv(b[u]); + a[u] = fpr_mul(a[u], ib); + a[u + hn] = fpr_mul(a[u + hn], ib); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_poly_LDL_fft( + const fpr *g00, + fpr *g01, fpr *g11, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + if (n >= 8) { + __m256d one; + + one = _mm256_set1_pd(1.0); + for (u = 0; u < hn; u += 4) { + __m256d g00_re, g00_im, g01_re, g01_im, g11_re, g11_im; + __m256d t, mu_re, mu_im, xi_re, xi_im; + + g00_re = _mm256_loadu_pd(&g00[u].v); + g00_im = _mm256_loadu_pd(&g00[u + hn].v); + g01_re = _mm256_loadu_pd(&g01[u].v); + g01_im = _mm256_loadu_pd(&g01[u + hn].v); + g11_re = _mm256_loadu_pd(&g11[u].v); + g11_im = _mm256_loadu_pd(&g11[u + hn].v); + + t = _mm256_div_pd(one, + FMADD(g00_re, g00_re, + _mm256_mul_pd(g00_im, g00_im))); + g00_re = _mm256_mul_pd(g00_re, t); + g00_im = _mm256_mul_pd(g00_im, t); + mu_re = FMADD(g01_re, g00_re, + _mm256_mul_pd(g01_im, g00_im)); + mu_im = FMSUB(g01_re, g00_im, + _mm256_mul_pd(g01_im, g00_re)); + xi_re = FMSUB(mu_re, g01_re, + _mm256_mul_pd(mu_im, g01_im)); + xi_im = FMADD(mu_im, g01_re, + _mm256_mul_pd(mu_re, g01_im)); + _mm256_storeu_pd(&g11[u].v, + _mm256_sub_pd(g11_re, xi_re)); + _mm256_storeu_pd(&g11[u + hn].v, + _mm256_add_pd(g11_im, xi_im)); + _mm256_storeu_pd(&g01[u].v, mu_re); + _mm256_storeu_pd(&g01[u + hn].v, mu_im); + } + } else { + for (u = 0; u < hn; u ++) { + fpr g00_re, g00_im, g01_re, g01_im, g11_re, g11_im; + fpr mu_re, mu_im; + + g00_re = g00[u]; + g00_im = g00[u + hn]; + g01_re = g01[u]; + g01_im = g01[u + hn]; + g11_re = g11[u]; + g11_im = g11[u + hn]; + FPC_DIV(mu_re, mu_im, g01_re, g01_im, g00_re, g00_im); + FPC_MUL(g01_re, g01_im, + mu_re, mu_im, g01_re, fpr_neg(g01_im)); + FPC_SUB(g11[u], g11[u + hn], + g11_re, g11_im, g01_re, g01_im); + g01[u] = mu_re; + g01[u + hn] = fpr_neg(mu_im); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_poly_LDLmv_fft( + fpr *d11, fpr *l10, + const fpr *g00, const fpr *g01, + const fpr *g11, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + if (n >= 8) { + __m256d one; + + one = _mm256_set1_pd(1.0); + for (u = 0; u < hn; u += 4) { + __m256d g00_re, g00_im, g01_re, g01_im, g11_re, g11_im; + __m256d t, mu_re, mu_im, xi_re, xi_im; + + g00_re = _mm256_loadu_pd(&g00[u].v); + g00_im = _mm256_loadu_pd(&g00[u + hn].v); + g01_re = _mm256_loadu_pd(&g01[u].v); + g01_im = _mm256_loadu_pd(&g01[u + hn].v); + g11_re = _mm256_loadu_pd(&g11[u].v); + g11_im = _mm256_loadu_pd(&g11[u + hn].v); + + t = _mm256_div_pd(one, + FMADD(g00_re, g00_re, + _mm256_mul_pd(g00_im, g00_im))); + g00_re = _mm256_mul_pd(g00_re, t); + g00_im = _mm256_mul_pd(g00_im, t); + mu_re = FMADD(g01_re, g00_re, + _mm256_mul_pd(g01_im, g00_im)); + mu_im = FMSUB(g01_re, g00_im, + _mm256_mul_pd(g01_im, g00_re)); + xi_re = FMSUB(mu_re, g01_re, + _mm256_mul_pd(mu_im, g01_im)); + xi_im = FMADD(mu_im, g01_re, + _mm256_mul_pd(mu_re, g01_im)); + _mm256_storeu_pd(&d11[u].v, + _mm256_sub_pd(g11_re, xi_re)); + _mm256_storeu_pd(&d11[u + hn].v, + _mm256_add_pd(g11_im, xi_im)); + _mm256_storeu_pd(&l10[u].v, mu_re); + _mm256_storeu_pd(&l10[u + hn].v, mu_im); + } + } else { + for (u = 0; u < hn; u ++) { + fpr g00_re, g00_im, g01_re, g01_im, g11_re, g11_im; + fpr mu_re, mu_im; + + g00_re = g00[u]; + g00_im = g00[u + hn]; + g01_re = g01[u]; + g01_im = g01[u + hn]; + g11_re = g11[u]; + g11_im = g11[u + hn]; + FPC_DIV(mu_re, mu_im, g01_re, g01_im, g00_re, g00_im); + FPC_MUL(g01_re, g01_im, + mu_re, mu_im, g01_re, fpr_neg(g01_im)); + FPC_SUB(d11[u], d11[u + hn], + g11_re, g11_im, g01_re, g01_im); + l10[u] = mu_re; + l10[u + hn] = fpr_neg(mu_im); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_poly_split_fft( + fpr *f0, fpr *f1, + const fpr *f, unsigned logn) { + /* + * The FFT representation we use is in bit-reversed order + * (element i contains f(w^(rev(i))), where rev() is the + * bit-reversal function over the ring degree. This changes + * indexes with regards to the Falcon specification. + */ + size_t n, hn, qn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + qn = hn >> 1; + + if (n >= 8) { + __m256d half, sv; + + half = _mm256_set1_pd(0.5); + sv = _mm256_set_pd(-0.0, 0.0, -0.0, 0.0); + for (u = 0; u < qn; u += 2) { + __m256d ab_re, ab_im, ff0, ff1, ff2, ff3, gmt; + + ab_re = _mm256_loadu_pd(&f[(u << 1)].v); + ab_im = _mm256_loadu_pd(&f[(u << 1) + hn].v); + ff0 = _mm256_mul_pd(_mm256_hadd_pd(ab_re, ab_im), half); + ff0 = _mm256_permute4x64_pd(ff0, 0xD8); + _mm_storeu_pd(&f0[u].v, + _mm256_extractf128_pd(ff0, 0)); + _mm_storeu_pd(&f0[u + qn].v, + _mm256_extractf128_pd(ff0, 1)); + + ff1 = _mm256_mul_pd(_mm256_hsub_pd(ab_re, ab_im), half); + gmt = _mm256_loadu_pd(&fpr_gm_tab[(u + hn) << 1].v); + ff2 = _mm256_shuffle_pd(ff1, ff1, 0x5); + ff3 = _mm256_hadd_pd( + _mm256_mul_pd(ff1, gmt), + _mm256_xor_pd(_mm256_mul_pd(ff2, gmt), sv)); + ff3 = _mm256_permute4x64_pd(ff3, 0xD8); + _mm_storeu_pd(&f1[u].v, + _mm256_extractf128_pd(ff3, 0)); + _mm_storeu_pd(&f1[u + qn].v, + _mm256_extractf128_pd(ff3, 1)); + } + } else { + f0[0] = f[0]; + f1[0] = f[hn]; + + for (u = 0; u < qn; u ++) { + fpr a_re, a_im, b_re, b_im; + fpr t_re, t_im; + + a_re = f[(u << 1) + 0]; + a_im = f[(u << 1) + 0 + hn]; + b_re = f[(u << 1) + 1]; + b_im = f[(u << 1) + 1 + hn]; + + FPC_ADD(t_re, t_im, a_re, a_im, b_re, b_im); + f0[u] = fpr_half(t_re); + f0[u + qn] = fpr_half(t_im); + + FPC_SUB(t_re, t_im, a_re, a_im, b_re, b_im); + FPC_MUL(t_re, t_im, t_re, t_im, + fpr_gm_tab[((u + hn) << 1) + 0], + fpr_neg(fpr_gm_tab[((u + hn) << 1) + 1])); + f1[u] = fpr_half(t_re); + f1[u + qn] = fpr_half(t_im); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_poly_merge_fft( + fpr *f, + const fpr *f0, const fpr *f1, unsigned logn) { + size_t n, hn, qn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + qn = hn >> 1; + + if (n >= 16) { + for (u = 0; u < qn; u += 4) { + __m256d a_re, a_im, b_re, b_im, c_re, c_im; + __m256d gm1, gm2, g_re, g_im; + __m256d t_re, t_im, u_re, u_im; + __m256d tu1_re, tu2_re, tu1_im, tu2_im; + + a_re = _mm256_loadu_pd(&f0[u].v); + a_im = _mm256_loadu_pd(&f0[u + qn].v); + c_re = _mm256_loadu_pd(&f1[u].v); + c_im = _mm256_loadu_pd(&f1[u + qn].v); + + gm1 = _mm256_loadu_pd(&fpr_gm_tab[(u + hn) << 1].v); + gm2 = _mm256_loadu_pd(&fpr_gm_tab[(u + 2 + hn) << 1].v); + g_re = _mm256_unpacklo_pd(gm1, gm2); + g_im = _mm256_unpackhi_pd(gm1, gm2); + g_re = _mm256_permute4x64_pd(g_re, 0xD8); + g_im = _mm256_permute4x64_pd(g_im, 0xD8); + + b_re = FMSUB( + c_re, g_re, _mm256_mul_pd(c_im, g_im)); + b_im = FMADD( + c_re, g_im, _mm256_mul_pd(c_im, g_re)); + + t_re = _mm256_add_pd(a_re, b_re); + t_im = _mm256_add_pd(a_im, b_im); + u_re = _mm256_sub_pd(a_re, b_re); + u_im = _mm256_sub_pd(a_im, b_im); + + tu1_re = _mm256_unpacklo_pd(t_re, u_re); + tu2_re = _mm256_unpackhi_pd(t_re, u_re); + tu1_im = _mm256_unpacklo_pd(t_im, u_im); + tu2_im = _mm256_unpackhi_pd(t_im, u_im); + _mm256_storeu_pd(&f[(u << 1)].v, + _mm256_permute2f128_pd(tu1_re, tu2_re, 0x20)); + _mm256_storeu_pd(&f[(u << 1) + 4].v, + _mm256_permute2f128_pd(tu1_re, tu2_re, 0x31)); + _mm256_storeu_pd(&f[(u << 1) + hn].v, + _mm256_permute2f128_pd(tu1_im, tu2_im, 0x20)); + _mm256_storeu_pd(&f[(u << 1) + 4 + hn].v, + _mm256_permute2f128_pd(tu1_im, tu2_im, 0x31)); + } + } else { + f[0] = f0[0]; + f[hn] = f1[0]; + + for (u = 0; u < qn; u ++) { + fpr a_re, a_im, b_re, b_im; + fpr t_re, t_im; + + a_re = f0[u]; + a_im = f0[u + qn]; + FPC_MUL(b_re, b_im, f1[u], f1[u + qn], + fpr_gm_tab[((u + hn) << 1) + 0], + fpr_gm_tab[((u + hn) << 1) + 1]); + FPC_ADD(t_re, t_im, a_re, a_im, b_re, b_im); + f[(u << 1) + 0] = t_re; + f[(u << 1) + 0 + hn] = t_im; + FPC_SUB(t_re, t_im, a_re, a_im, b_re, b_im); + f[(u << 1) + 1] = t_re; + f[(u << 1) + 1 + hn] = t_im; + } + } +} diff --git a/crypto_sign/falcon/falcon-512/avx2/fpr.c b/crypto_sign/falcon/falcon-512/avx2/fpr.c new file mode 100644 index 00000000..2f04a35d --- /dev/null +++ b/crypto_sign/falcon/falcon-512/avx2/fpr.c @@ -0,0 +1,1078 @@ +#include "inner.h" + +/* + * Floating-point operations. + * + * This file implements the non-inline functions declared in + * fpr.h, as well as the constants for FFT / iFFT. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + + +const fpr fpr_gm_tab[] = { + {0}, {0}, /* unused */ + {-0.000000000000000000000000000}, { 1.000000000000000000000000000}, + { 0.707106781186547524400844362}, { 0.707106781186547524400844362}, + {-0.707106781186547524400844362}, { 0.707106781186547524400844362}, + { 0.923879532511286756128183189}, { 0.382683432365089771728459984}, + {-0.382683432365089771728459984}, { 0.923879532511286756128183189}, + { 0.382683432365089771728459984}, { 0.923879532511286756128183189}, + {-0.923879532511286756128183189}, { 0.382683432365089771728459984}, + { 0.980785280403230449126182236}, { 0.195090322016128267848284868}, + {-0.195090322016128267848284868}, { 0.980785280403230449126182236}, + { 0.555570233019602224742830814}, { 0.831469612302545237078788378}, + {-0.831469612302545237078788378}, { 0.555570233019602224742830814}, + { 0.831469612302545237078788378}, { 0.555570233019602224742830814}, + {-0.555570233019602224742830814}, { 0.831469612302545237078788378}, + { 0.195090322016128267848284868}, { 0.980785280403230449126182236}, + {-0.980785280403230449126182236}, { 0.195090322016128267848284868}, + { 0.995184726672196886244836953}, { 0.098017140329560601994195564}, + {-0.098017140329560601994195564}, { 0.995184726672196886244836953}, + { 0.634393284163645498215171613}, { 0.773010453362736960810906610}, + {-0.773010453362736960810906610}, { 0.634393284163645498215171613}, + { 0.881921264348355029712756864}, { 0.471396736825997648556387626}, + {-0.471396736825997648556387626}, { 0.881921264348355029712756864}, + { 0.290284677254462367636192376}, { 0.956940335732208864935797887}, + {-0.956940335732208864935797887}, { 0.290284677254462367636192376}, + { 0.956940335732208864935797887}, { 0.290284677254462367636192376}, + {-0.290284677254462367636192376}, { 0.956940335732208864935797887}, + { 0.471396736825997648556387626}, { 0.881921264348355029712756864}, + {-0.881921264348355029712756864}, { 0.471396736825997648556387626}, + { 0.773010453362736960810906610}, { 0.634393284163645498215171613}, + {-0.634393284163645498215171613}, { 0.773010453362736960810906610}, + { 0.098017140329560601994195564}, { 0.995184726672196886244836953}, + {-0.995184726672196886244836953}, { 0.098017140329560601994195564}, + { 0.998795456205172392714771605}, { 0.049067674327418014254954977}, + {-0.049067674327418014254954977}, { 0.998795456205172392714771605}, + { 0.671558954847018400625376850}, { 0.740951125354959091175616897}, + {-0.740951125354959091175616897}, { 0.671558954847018400625376850}, + { 0.903989293123443331586200297}, { 0.427555093430282094320966857}, + {-0.427555093430282094320966857}, { 0.903989293123443331586200297}, + { 0.336889853392220050689253213}, { 0.941544065183020778412509403}, + {-0.941544065183020778412509403}, { 0.336889853392220050689253213}, + { 0.970031253194543992603984207}, { 0.242980179903263889948274162}, + {-0.242980179903263889948274162}, { 0.970031253194543992603984207}, + { 0.514102744193221726593693839}, { 0.857728610000272069902269984}, + {-0.857728610000272069902269984}, { 0.514102744193221726593693839}, + { 0.803207531480644909806676513}, { 0.595699304492433343467036529}, + {-0.595699304492433343467036529}, { 0.803207531480644909806676513}, + { 0.146730474455361751658850130}, { 0.989176509964780973451673738}, + {-0.989176509964780973451673738}, { 0.146730474455361751658850130}, + { 0.989176509964780973451673738}, { 0.146730474455361751658850130}, + {-0.146730474455361751658850130}, { 0.989176509964780973451673738}, + { 0.595699304492433343467036529}, { 0.803207531480644909806676513}, + {-0.803207531480644909806676513}, { 0.595699304492433343467036529}, + { 0.857728610000272069902269984}, { 0.514102744193221726593693839}, + {-0.514102744193221726593693839}, { 0.857728610000272069902269984}, + { 0.242980179903263889948274162}, { 0.970031253194543992603984207}, + {-0.970031253194543992603984207}, { 0.242980179903263889948274162}, + { 0.941544065183020778412509403}, { 0.336889853392220050689253213}, + {-0.336889853392220050689253213}, { 0.941544065183020778412509403}, + { 0.427555093430282094320966857}, { 0.903989293123443331586200297}, + {-0.903989293123443331586200297}, { 0.427555093430282094320966857}, + { 0.740951125354959091175616897}, { 0.671558954847018400625376850}, + {-0.671558954847018400625376850}, { 0.740951125354959091175616897}, + { 0.049067674327418014254954977}, { 0.998795456205172392714771605}, + {-0.998795456205172392714771605}, { 0.049067674327418014254954977}, + { 0.999698818696204220115765650}, { 0.024541228522912288031734529}, + {-0.024541228522912288031734529}, { 0.999698818696204220115765650}, + { 0.689540544737066924616730630}, { 0.724247082951466920941069243}, + {-0.724247082951466920941069243}, { 0.689540544737066924616730630}, + { 0.914209755703530654635014829}, { 0.405241314004989870908481306}, + {-0.405241314004989870908481306}, { 0.914209755703530654635014829}, + { 0.359895036534988148775104572}, { 0.932992798834738887711660256}, + {-0.932992798834738887711660256}, { 0.359895036534988148775104572}, + { 0.975702130038528544460395766}, { 0.219101240156869797227737547}, + {-0.219101240156869797227737547}, { 0.975702130038528544460395766}, + { 0.534997619887097210663076905}, { 0.844853565249707073259571205}, + {-0.844853565249707073259571205}, { 0.534997619887097210663076905}, + { 0.817584813151583696504920884}, { 0.575808191417845300745972454}, + {-0.575808191417845300745972454}, { 0.817584813151583696504920884}, + { 0.170961888760301226363642357}, { 0.985277642388941244774018433}, + {-0.985277642388941244774018433}, { 0.170961888760301226363642357}, + { 0.992479534598709998156767252}, { 0.122410675199216198498704474}, + {-0.122410675199216198498704474}, { 0.992479534598709998156767252}, + { 0.615231590580626845484913563}, { 0.788346427626606262009164705}, + {-0.788346427626606262009164705}, { 0.615231590580626845484913563}, + { 0.870086991108711418652292404}, { 0.492898192229784036873026689}, + {-0.492898192229784036873026689}, { 0.870086991108711418652292404}, + { 0.266712757474898386325286515}, { 0.963776065795439866686464356}, + {-0.963776065795439866686464356}, { 0.266712757474898386325286515}, + { 0.949528180593036667195936074}, { 0.313681740398891476656478846}, + {-0.313681740398891476656478846}, { 0.949528180593036667195936074}, + { 0.449611329654606600046294579}, { 0.893224301195515320342416447}, + {-0.893224301195515320342416447}, { 0.449611329654606600046294579}, + { 0.757208846506484547575464054}, { 0.653172842953776764084203014}, + {-0.653172842953776764084203014}, { 0.757208846506484547575464054}, + { 0.073564563599667423529465622}, { 0.997290456678690216135597140}, + {-0.997290456678690216135597140}, { 0.073564563599667423529465622}, + { 0.997290456678690216135597140}, { 0.073564563599667423529465622}, + {-0.073564563599667423529465622}, { 0.997290456678690216135597140}, + { 0.653172842953776764084203014}, { 0.757208846506484547575464054}, + {-0.757208846506484547575464054}, { 0.653172842953776764084203014}, + { 0.893224301195515320342416447}, { 0.449611329654606600046294579}, + {-0.449611329654606600046294579}, { 0.893224301195515320342416447}, + { 0.313681740398891476656478846}, { 0.949528180593036667195936074}, + {-0.949528180593036667195936074}, { 0.313681740398891476656478846}, + { 0.963776065795439866686464356}, { 0.266712757474898386325286515}, + {-0.266712757474898386325286515}, { 0.963776065795439866686464356}, + { 0.492898192229784036873026689}, { 0.870086991108711418652292404}, + {-0.870086991108711418652292404}, { 0.492898192229784036873026689}, + { 0.788346427626606262009164705}, { 0.615231590580626845484913563}, + {-0.615231590580626845484913563}, { 0.788346427626606262009164705}, + { 0.122410675199216198498704474}, { 0.992479534598709998156767252}, + {-0.992479534598709998156767252}, { 0.122410675199216198498704474}, + { 0.985277642388941244774018433}, { 0.170961888760301226363642357}, + {-0.170961888760301226363642357}, { 0.985277642388941244774018433}, + { 0.575808191417845300745972454}, { 0.817584813151583696504920884}, + {-0.817584813151583696504920884}, { 0.575808191417845300745972454}, + { 0.844853565249707073259571205}, { 0.534997619887097210663076905}, + {-0.534997619887097210663076905}, { 0.844853565249707073259571205}, + { 0.219101240156869797227737547}, { 0.975702130038528544460395766}, + {-0.975702130038528544460395766}, { 0.219101240156869797227737547}, + { 0.932992798834738887711660256}, { 0.359895036534988148775104572}, + {-0.359895036534988148775104572}, { 0.932992798834738887711660256}, + { 0.405241314004989870908481306}, { 0.914209755703530654635014829}, + {-0.914209755703530654635014829}, { 0.405241314004989870908481306}, + { 0.724247082951466920941069243}, { 0.689540544737066924616730630}, + {-0.689540544737066924616730630}, { 0.724247082951466920941069243}, + { 0.024541228522912288031734529}, { 0.999698818696204220115765650}, + {-0.999698818696204220115765650}, { 0.024541228522912288031734529}, + { 0.999924701839144540921646491}, { 0.012271538285719926079408262}, + {-0.012271538285719926079408262}, { 0.999924701839144540921646491}, + { 0.698376249408972853554813503}, { 0.715730825283818654125532623}, + {-0.715730825283818654125532623}, { 0.698376249408972853554813503}, + { 0.919113851690057743908477789}, { 0.393992040061048108596188661}, + {-0.393992040061048108596188661}, { 0.919113851690057743908477789}, + { 0.371317193951837543411934967}, { 0.928506080473215565937167396}, + {-0.928506080473215565937167396}, { 0.371317193951837543411934967}, + { 0.978317370719627633106240097}, { 0.207111376192218549708116020}, + {-0.207111376192218549708116020}, { 0.978317370719627633106240097}, + { 0.545324988422046422313987347}, { 0.838224705554838043186996856}, + {-0.838224705554838043186996856}, { 0.545324988422046422313987347}, + { 0.824589302785025264474803737}, { 0.565731810783613197389765011}, + {-0.565731810783613197389765011}, { 0.824589302785025264474803737}, + { 0.183039887955140958516532578}, { 0.983105487431216327180301155}, + {-0.983105487431216327180301155}, { 0.183039887955140958516532578}, + { 0.993906970002356041546922813}, { 0.110222207293883058807899140}, + {-0.110222207293883058807899140}, { 0.993906970002356041546922813}, + { 0.624859488142386377084072816}, { 0.780737228572094478301588484}, + {-0.780737228572094478301588484}, { 0.624859488142386377084072816}, + { 0.876070094195406607095844268}, { 0.482183772079122748517344481}, + {-0.482183772079122748517344481}, { 0.876070094195406607095844268}, + { 0.278519689385053105207848526}, { 0.960430519415565811199035138}, + {-0.960430519415565811199035138}, { 0.278519689385053105207848526}, + { 0.953306040354193836916740383}, { 0.302005949319228067003463232}, + {-0.302005949319228067003463232}, { 0.953306040354193836916740383}, + { 0.460538710958240023633181487}, { 0.887639620402853947760181617}, + {-0.887639620402853947760181617}, { 0.460538710958240023633181487}, + { 0.765167265622458925888815999}, { 0.643831542889791465068086063}, + {-0.643831542889791465068086063}, { 0.765167265622458925888815999}, + { 0.085797312344439890461556332}, { 0.996312612182778012627226190}, + {-0.996312612182778012627226190}, { 0.085797312344439890461556332}, + { 0.998118112900149207125155861}, { 0.061320736302208577782614593}, + {-0.061320736302208577782614593}, { 0.998118112900149207125155861}, + { 0.662415777590171761113069817}, { 0.749136394523459325469203257}, + {-0.749136394523459325469203257}, { 0.662415777590171761113069817}, + { 0.898674465693953843041976744}, { 0.438616238538527637647025738}, + {-0.438616238538527637647025738}, { 0.898674465693953843041976744}, + { 0.325310292162262934135954708}, { 0.945607325380521325730945387}, + {-0.945607325380521325730945387}, { 0.325310292162262934135954708}, + { 0.966976471044852109087220226}, { 0.254865659604514571553980779}, + {-0.254865659604514571553980779}, { 0.966976471044852109087220226}, + { 0.503538383725717558691867071}, { 0.863972856121586737918147054}, + {-0.863972856121586737918147054}, { 0.503538383725717558691867071}, + { 0.795836904608883536262791915}, { 0.605511041404325513920626941}, + {-0.605511041404325513920626941}, { 0.795836904608883536262791915}, + { 0.134580708507126186316358409}, { 0.990902635427780025108237011}, + {-0.990902635427780025108237011}, { 0.134580708507126186316358409}, + { 0.987301418157858382399815802}, { 0.158858143333861441684385360}, + {-0.158858143333861441684385360}, { 0.987301418157858382399815802}, + { 0.585797857456438860328080838}, { 0.810457198252594791726703434}, + {-0.810457198252594791726703434}, { 0.585797857456438860328080838}, + { 0.851355193105265142261290312}, { 0.524589682678468906215098464}, + {-0.524589682678468906215098464}, { 0.851355193105265142261290312}, + { 0.231058108280671119643236018}, { 0.972939952205560145467720114}, + {-0.972939952205560145467720114}, { 0.231058108280671119643236018}, + { 0.937339011912574923201899593}, { 0.348418680249434568419308588}, + {-0.348418680249434568419308588}, { 0.937339011912574923201899593}, + { 0.416429560097637182562598911}, { 0.909167983090522376563884788}, + {-0.909167983090522376563884788}, { 0.416429560097637182562598911}, + { 0.732654271672412834615546649}, { 0.680600997795453050594430464}, + {-0.680600997795453050594430464}, { 0.732654271672412834615546649}, + { 0.036807222941358832324332691}, { 0.999322384588349500896221011}, + {-0.999322384588349500896221011}, { 0.036807222941358832324332691}, + { 0.999322384588349500896221011}, { 0.036807222941358832324332691}, + {-0.036807222941358832324332691}, { 0.999322384588349500896221011}, + { 0.680600997795453050594430464}, { 0.732654271672412834615546649}, + {-0.732654271672412834615546649}, { 0.680600997795453050594430464}, + { 0.909167983090522376563884788}, { 0.416429560097637182562598911}, + {-0.416429560097637182562598911}, { 0.909167983090522376563884788}, + { 0.348418680249434568419308588}, { 0.937339011912574923201899593}, + {-0.937339011912574923201899593}, { 0.348418680249434568419308588}, + { 0.972939952205560145467720114}, { 0.231058108280671119643236018}, + {-0.231058108280671119643236018}, { 0.972939952205560145467720114}, + { 0.524589682678468906215098464}, { 0.851355193105265142261290312}, + {-0.851355193105265142261290312}, { 0.524589682678468906215098464}, + { 0.810457198252594791726703434}, { 0.585797857456438860328080838}, + {-0.585797857456438860328080838}, { 0.810457198252594791726703434}, + { 0.158858143333861441684385360}, { 0.987301418157858382399815802}, + {-0.987301418157858382399815802}, { 0.158858143333861441684385360}, + { 0.990902635427780025108237011}, { 0.134580708507126186316358409}, + {-0.134580708507126186316358409}, { 0.990902635427780025108237011}, + { 0.605511041404325513920626941}, { 0.795836904608883536262791915}, + {-0.795836904608883536262791915}, { 0.605511041404325513920626941}, + { 0.863972856121586737918147054}, { 0.503538383725717558691867071}, + {-0.503538383725717558691867071}, { 0.863972856121586737918147054}, + { 0.254865659604514571553980779}, { 0.966976471044852109087220226}, + {-0.966976471044852109087220226}, { 0.254865659604514571553980779}, + { 0.945607325380521325730945387}, { 0.325310292162262934135954708}, + {-0.325310292162262934135954708}, { 0.945607325380521325730945387}, + { 0.438616238538527637647025738}, { 0.898674465693953843041976744}, + {-0.898674465693953843041976744}, { 0.438616238538527637647025738}, + { 0.749136394523459325469203257}, { 0.662415777590171761113069817}, + {-0.662415777590171761113069817}, { 0.749136394523459325469203257}, + { 0.061320736302208577782614593}, { 0.998118112900149207125155861}, + {-0.998118112900149207125155861}, { 0.061320736302208577782614593}, + { 0.996312612182778012627226190}, { 0.085797312344439890461556332}, + {-0.085797312344439890461556332}, { 0.996312612182778012627226190}, + { 0.643831542889791465068086063}, { 0.765167265622458925888815999}, + {-0.765167265622458925888815999}, { 0.643831542889791465068086063}, + { 0.887639620402853947760181617}, { 0.460538710958240023633181487}, + {-0.460538710958240023633181487}, { 0.887639620402853947760181617}, + { 0.302005949319228067003463232}, { 0.953306040354193836916740383}, + {-0.953306040354193836916740383}, { 0.302005949319228067003463232}, + { 0.960430519415565811199035138}, { 0.278519689385053105207848526}, + {-0.278519689385053105207848526}, { 0.960430519415565811199035138}, + { 0.482183772079122748517344481}, { 0.876070094195406607095844268}, + {-0.876070094195406607095844268}, { 0.482183772079122748517344481}, + { 0.780737228572094478301588484}, { 0.624859488142386377084072816}, + {-0.624859488142386377084072816}, { 0.780737228572094478301588484}, + { 0.110222207293883058807899140}, { 0.993906970002356041546922813}, + {-0.993906970002356041546922813}, { 0.110222207293883058807899140}, + { 0.983105487431216327180301155}, { 0.183039887955140958516532578}, + {-0.183039887955140958516532578}, { 0.983105487431216327180301155}, + { 0.565731810783613197389765011}, { 0.824589302785025264474803737}, + {-0.824589302785025264474803737}, { 0.565731810783613197389765011}, + { 0.838224705554838043186996856}, { 0.545324988422046422313987347}, + {-0.545324988422046422313987347}, { 0.838224705554838043186996856}, + { 0.207111376192218549708116020}, { 0.978317370719627633106240097}, + {-0.978317370719627633106240097}, { 0.207111376192218549708116020}, + { 0.928506080473215565937167396}, { 0.371317193951837543411934967}, + {-0.371317193951837543411934967}, { 0.928506080473215565937167396}, + { 0.393992040061048108596188661}, { 0.919113851690057743908477789}, + {-0.919113851690057743908477789}, { 0.393992040061048108596188661}, + { 0.715730825283818654125532623}, { 0.698376249408972853554813503}, + {-0.698376249408972853554813503}, { 0.715730825283818654125532623}, + { 0.012271538285719926079408262}, { 0.999924701839144540921646491}, + {-0.999924701839144540921646491}, { 0.012271538285719926079408262}, + { 0.999981175282601142656990438}, { 0.006135884649154475359640235}, + {-0.006135884649154475359640235}, { 0.999981175282601142656990438}, + { 0.702754744457225302452914421}, { 0.711432195745216441522130290}, + {-0.711432195745216441522130290}, { 0.702754744457225302452914421}, + { 0.921514039342041943465396332}, { 0.388345046698826291624993541}, + {-0.388345046698826291624993541}, { 0.921514039342041943465396332}, + { 0.377007410216418256726567823}, { 0.926210242138311341974793388}, + {-0.926210242138311341974793388}, { 0.377007410216418256726567823}, + { 0.979569765685440534439326110}, { 0.201104634842091911558443546}, + {-0.201104634842091911558443546}, { 0.979569765685440534439326110}, + { 0.550457972936604802977289893}, { 0.834862874986380056304401383}, + {-0.834862874986380056304401383}, { 0.550457972936604802977289893}, + { 0.828045045257755752067527592}, { 0.560661576197336023839710223}, + {-0.560661576197336023839710223}, { 0.828045045257755752067527592}, + { 0.189068664149806212754997837}, { 0.981963869109555264072848154}, + {-0.981963869109555264072848154}, { 0.189068664149806212754997837}, + { 0.994564570734255452119106243}, { 0.104121633872054579120943880}, + {-0.104121633872054579120943880}, { 0.994564570734255452119106243}, + { 0.629638238914927025372981341}, { 0.776888465673232450040827983}, + {-0.776888465673232450040827983}, { 0.629638238914927025372981341}, + { 0.879012226428633477831323711}, { 0.476799230063322133342158117}, + {-0.476799230063322133342158117}, { 0.879012226428633477831323711}, + { 0.284407537211271843618310615}, { 0.958703474895871555374645792}, + {-0.958703474895871555374645792}, { 0.284407537211271843618310615}, + { 0.955141168305770721498157712}, { 0.296150888243623824121786128}, + {-0.296150888243623824121786128}, { 0.955141168305770721498157712}, + { 0.465976495767966177902756065}, { 0.884797098430937780104007041}, + {-0.884797098430937780104007041}, { 0.465976495767966177902756065}, + { 0.769103337645579639346626069}, { 0.639124444863775743801488193}, + {-0.639124444863775743801488193}, { 0.769103337645579639346626069}, + { 0.091908956497132728624990979}, { 0.995767414467659793982495643}, + {-0.995767414467659793982495643}, { 0.091908956497132728624990979}, + { 0.998475580573294752208559038}, { 0.055195244349689939809447526}, + {-0.055195244349689939809447526}, { 0.998475580573294752208559038}, + { 0.666999922303637506650154222}, { 0.745057785441465962407907310}, + {-0.745057785441465962407907310}, { 0.666999922303637506650154222}, + { 0.901348847046022014570746093}, { 0.433093818853151968484222638}, + {-0.433093818853151968484222638}, { 0.901348847046022014570746093}, + { 0.331106305759876401737190737}, { 0.943593458161960361495301445}, + {-0.943593458161960361495301445}, { 0.331106305759876401737190737}, + { 0.968522094274417316221088329}, { 0.248927605745720168110682816}, + {-0.248927605745720168110682816}, { 0.968522094274417316221088329}, + { 0.508830142543107036931749324}, { 0.860866938637767279344583877}, + {-0.860866938637767279344583877}, { 0.508830142543107036931749324}, + { 0.799537269107905033500246232}, { 0.600616479383868926653875896}, + {-0.600616479383868926653875896}, { 0.799537269107905033500246232}, + { 0.140658239332849230714788846}, { 0.990058210262297105505906464}, + {-0.990058210262297105505906464}, { 0.140658239332849230714788846}, + { 0.988257567730749491404792538}, { 0.152797185258443427720336613}, + {-0.152797185258443427720336613}, { 0.988257567730749491404792538}, + { 0.590759701858874228423887908}, { 0.806847553543799272206514313}, + {-0.806847553543799272206514313}, { 0.590759701858874228423887908}, + { 0.854557988365400520767862276}, { 0.519355990165589587361829932}, + {-0.519355990165589587361829932}, { 0.854557988365400520767862276}, + { 0.237023605994367206867735915}, { 0.971503890986251775537099622}, + {-0.971503890986251775537099622}, { 0.237023605994367206867735915}, + { 0.939459223602189911962669246}, { 0.342660717311994397592781983}, + {-0.342660717311994397592781983}, { 0.939459223602189911962669246}, + { 0.422000270799799685941287941}, { 0.906595704514915365332960588}, + {-0.906595704514915365332960588}, { 0.422000270799799685941287941}, + { 0.736816568877369875090132520}, { 0.676092703575315960360419228}, + {-0.676092703575315960360419228}, { 0.736816568877369875090132520}, + { 0.042938256934940823077124540}, { 0.999077727752645382888781997}, + {-0.999077727752645382888781997}, { 0.042938256934940823077124540}, + { 0.999529417501093163079703322}, { 0.030674803176636625934021028}, + {-0.030674803176636625934021028}, { 0.999529417501093163079703322}, + { 0.685083667772700381362052545}, { 0.728464390448225196492035438}, + {-0.728464390448225196492035438}, { 0.685083667772700381362052545}, + { 0.911706032005429851404397325}, { 0.410843171057903942183466675}, + {-0.410843171057903942183466675}, { 0.911706032005429851404397325}, + { 0.354163525420490382357395796}, { 0.935183509938947577642207480}, + {-0.935183509938947577642207480}, { 0.354163525420490382357395796}, + { 0.974339382785575860518721668}, { 0.225083911359792835991642120}, + {-0.225083911359792835991642120}, { 0.974339382785575860518721668}, + { 0.529803624686294668216054671}, { 0.848120344803297251279133563}, + {-0.848120344803297251279133563}, { 0.529803624686294668216054671}, + { 0.814036329705948361654516690}, { 0.580813958095764545075595272}, + {-0.580813958095764545075595272}, { 0.814036329705948361654516690}, + { 0.164913120489969921418189113}, { 0.986308097244598647863297524}, + {-0.986308097244598647863297524}, { 0.164913120489969921418189113}, + { 0.991709753669099522860049931}, { 0.128498110793793172624415589}, + {-0.128498110793793172624415589}, { 0.991709753669099522860049931}, + { 0.610382806276309452716352152}, { 0.792106577300212351782342879}, + {-0.792106577300212351782342879}, { 0.610382806276309452716352152}, + { 0.867046245515692651480195629}, { 0.498227666972781852410983869}, + {-0.498227666972781852410983869}, { 0.867046245515692651480195629}, + { 0.260794117915275518280186509}, { 0.965394441697689374550843858}, + {-0.965394441697689374550843858}, { 0.260794117915275518280186509}, + { 0.947585591017741134653387321}, { 0.319502030816015677901518272}, + {-0.319502030816015677901518272}, { 0.947585591017741134653387321}, + { 0.444122144570429231642069418}, { 0.895966249756185155914560282}, + {-0.895966249756185155914560282}, { 0.444122144570429231642069418}, + { 0.753186799043612482483430486}, { 0.657806693297078656931182264}, + {-0.657806693297078656931182264}, { 0.753186799043612482483430486}, + { 0.067443919563664057897972422}, { 0.997723066644191609848546728}, + {-0.997723066644191609848546728}, { 0.067443919563664057897972422}, + { 0.996820299291165714972629398}, { 0.079682437971430121147120656}, + {-0.079682437971430121147120656}, { 0.996820299291165714972629398}, + { 0.648514401022112445084560551}, { 0.761202385484261814029709836}, + {-0.761202385484261814029709836}, { 0.648514401022112445084560551}, + { 0.890448723244757889952150560}, { 0.455083587126343823535869268}, + {-0.455083587126343823535869268}, { 0.890448723244757889952150560}, + { 0.307849640041534893682063646}, { 0.951435020969008369549175569}, + {-0.951435020969008369549175569}, { 0.307849640041534893682063646}, + { 0.962121404269041595429604316}, { 0.272621355449948984493347477}, + {-0.272621355449948984493347477}, { 0.962121404269041595429604316}, + { 0.487550160148435954641485027}, { 0.873094978418290098636085973}, + {-0.873094978418290098636085973}, { 0.487550160148435954641485027}, + { 0.784556597155575233023892575}, { 0.620057211763289178646268191}, + {-0.620057211763289178646268191}, { 0.784556597155575233023892575}, + { 0.116318630911904767252544319}, { 0.993211949234794533104601012}, + {-0.993211949234794533104601012}, { 0.116318630911904767252544319}, + { 0.984210092386929073193874387}, { 0.177004220412148756196839844}, + {-0.177004220412148756196839844}, { 0.984210092386929073193874387}, + { 0.570780745886967280232652864}, { 0.821102514991104679060430820}, + {-0.821102514991104679060430820}, { 0.570780745886967280232652864}, + { 0.841554977436898409603499520}, { 0.540171472729892881297845480}, + {-0.540171472729892881297845480}, { 0.841554977436898409603499520}, + { 0.213110319916091373967757518}, { 0.977028142657754351485866211}, + {-0.977028142657754351485866211}, { 0.213110319916091373967757518}, + { 0.930766961078983731944872340}, { 0.365612997804773870011745909}, + {-0.365612997804773870011745909}, { 0.930766961078983731944872340}, + { 0.399624199845646828544117031}, { 0.916679059921042663116457013}, + {-0.916679059921042663116457013}, { 0.399624199845646828544117031}, + { 0.720002507961381629076682999}, { 0.693971460889654009003734389}, + {-0.693971460889654009003734389}, { 0.720002507961381629076682999}, + { 0.018406729905804820927366313}, { 0.999830581795823422015722275}, + {-0.999830581795823422015722275}, { 0.018406729905804820927366313}, + { 0.999830581795823422015722275}, { 0.018406729905804820927366313}, + {-0.018406729905804820927366313}, { 0.999830581795823422015722275}, + { 0.693971460889654009003734389}, { 0.720002507961381629076682999}, + {-0.720002507961381629076682999}, { 0.693971460889654009003734389}, + { 0.916679059921042663116457013}, { 0.399624199845646828544117031}, + {-0.399624199845646828544117031}, { 0.916679059921042663116457013}, + { 0.365612997804773870011745909}, { 0.930766961078983731944872340}, + {-0.930766961078983731944872340}, { 0.365612997804773870011745909}, + { 0.977028142657754351485866211}, { 0.213110319916091373967757518}, + {-0.213110319916091373967757518}, { 0.977028142657754351485866211}, + { 0.540171472729892881297845480}, { 0.841554977436898409603499520}, + {-0.841554977436898409603499520}, { 0.540171472729892881297845480}, + { 0.821102514991104679060430820}, { 0.570780745886967280232652864}, + {-0.570780745886967280232652864}, { 0.821102514991104679060430820}, + { 0.177004220412148756196839844}, { 0.984210092386929073193874387}, + {-0.984210092386929073193874387}, { 0.177004220412148756196839844}, + { 0.993211949234794533104601012}, { 0.116318630911904767252544319}, + {-0.116318630911904767252544319}, { 0.993211949234794533104601012}, + { 0.620057211763289178646268191}, { 0.784556597155575233023892575}, + {-0.784556597155575233023892575}, { 0.620057211763289178646268191}, + { 0.873094978418290098636085973}, { 0.487550160148435954641485027}, + {-0.487550160148435954641485027}, { 0.873094978418290098636085973}, + { 0.272621355449948984493347477}, { 0.962121404269041595429604316}, + {-0.962121404269041595429604316}, { 0.272621355449948984493347477}, + { 0.951435020969008369549175569}, { 0.307849640041534893682063646}, + {-0.307849640041534893682063646}, { 0.951435020969008369549175569}, + { 0.455083587126343823535869268}, { 0.890448723244757889952150560}, + {-0.890448723244757889952150560}, { 0.455083587126343823535869268}, + { 0.761202385484261814029709836}, { 0.648514401022112445084560551}, + {-0.648514401022112445084560551}, { 0.761202385484261814029709836}, + { 0.079682437971430121147120656}, { 0.996820299291165714972629398}, + {-0.996820299291165714972629398}, { 0.079682437971430121147120656}, + { 0.997723066644191609848546728}, { 0.067443919563664057897972422}, + {-0.067443919563664057897972422}, { 0.997723066644191609848546728}, + { 0.657806693297078656931182264}, { 0.753186799043612482483430486}, + {-0.753186799043612482483430486}, { 0.657806693297078656931182264}, + { 0.895966249756185155914560282}, { 0.444122144570429231642069418}, + {-0.444122144570429231642069418}, { 0.895966249756185155914560282}, + { 0.319502030816015677901518272}, { 0.947585591017741134653387321}, + {-0.947585591017741134653387321}, { 0.319502030816015677901518272}, + { 0.965394441697689374550843858}, { 0.260794117915275518280186509}, + {-0.260794117915275518280186509}, { 0.965394441697689374550843858}, + { 0.498227666972781852410983869}, { 0.867046245515692651480195629}, + {-0.867046245515692651480195629}, { 0.498227666972781852410983869}, + { 0.792106577300212351782342879}, { 0.610382806276309452716352152}, + {-0.610382806276309452716352152}, { 0.792106577300212351782342879}, + { 0.128498110793793172624415589}, { 0.991709753669099522860049931}, + {-0.991709753669099522860049931}, { 0.128498110793793172624415589}, + { 0.986308097244598647863297524}, { 0.164913120489969921418189113}, + {-0.164913120489969921418189113}, { 0.986308097244598647863297524}, + { 0.580813958095764545075595272}, { 0.814036329705948361654516690}, + {-0.814036329705948361654516690}, { 0.580813958095764545075595272}, + { 0.848120344803297251279133563}, { 0.529803624686294668216054671}, + {-0.529803624686294668216054671}, { 0.848120344803297251279133563}, + { 0.225083911359792835991642120}, { 0.974339382785575860518721668}, + {-0.974339382785575860518721668}, { 0.225083911359792835991642120}, + { 0.935183509938947577642207480}, { 0.354163525420490382357395796}, + {-0.354163525420490382357395796}, { 0.935183509938947577642207480}, + { 0.410843171057903942183466675}, { 0.911706032005429851404397325}, + {-0.911706032005429851404397325}, { 0.410843171057903942183466675}, + { 0.728464390448225196492035438}, { 0.685083667772700381362052545}, + {-0.685083667772700381362052545}, { 0.728464390448225196492035438}, + { 0.030674803176636625934021028}, { 0.999529417501093163079703322}, + {-0.999529417501093163079703322}, { 0.030674803176636625934021028}, + { 0.999077727752645382888781997}, { 0.042938256934940823077124540}, + {-0.042938256934940823077124540}, { 0.999077727752645382888781997}, + { 0.676092703575315960360419228}, { 0.736816568877369875090132520}, + {-0.736816568877369875090132520}, { 0.676092703575315960360419228}, + { 0.906595704514915365332960588}, { 0.422000270799799685941287941}, + {-0.422000270799799685941287941}, { 0.906595704514915365332960588}, + { 0.342660717311994397592781983}, { 0.939459223602189911962669246}, + {-0.939459223602189911962669246}, { 0.342660717311994397592781983}, + { 0.971503890986251775537099622}, { 0.237023605994367206867735915}, + {-0.237023605994367206867735915}, { 0.971503890986251775537099622}, + { 0.519355990165589587361829932}, { 0.854557988365400520767862276}, + {-0.854557988365400520767862276}, { 0.519355990165589587361829932}, + { 0.806847553543799272206514313}, { 0.590759701858874228423887908}, + {-0.590759701858874228423887908}, { 0.806847553543799272206514313}, + { 0.152797185258443427720336613}, { 0.988257567730749491404792538}, + {-0.988257567730749491404792538}, { 0.152797185258443427720336613}, + { 0.990058210262297105505906464}, { 0.140658239332849230714788846}, + {-0.140658239332849230714788846}, { 0.990058210262297105505906464}, + { 0.600616479383868926653875896}, { 0.799537269107905033500246232}, + {-0.799537269107905033500246232}, { 0.600616479383868926653875896}, + { 0.860866938637767279344583877}, { 0.508830142543107036931749324}, + {-0.508830142543107036931749324}, { 0.860866938637767279344583877}, + { 0.248927605745720168110682816}, { 0.968522094274417316221088329}, + {-0.968522094274417316221088329}, { 0.248927605745720168110682816}, + { 0.943593458161960361495301445}, { 0.331106305759876401737190737}, + {-0.331106305759876401737190737}, { 0.943593458161960361495301445}, + { 0.433093818853151968484222638}, { 0.901348847046022014570746093}, + {-0.901348847046022014570746093}, { 0.433093818853151968484222638}, + { 0.745057785441465962407907310}, { 0.666999922303637506650154222}, + {-0.666999922303637506650154222}, { 0.745057785441465962407907310}, + { 0.055195244349689939809447526}, { 0.998475580573294752208559038}, + {-0.998475580573294752208559038}, { 0.055195244349689939809447526}, + { 0.995767414467659793982495643}, { 0.091908956497132728624990979}, + {-0.091908956497132728624990979}, { 0.995767414467659793982495643}, + { 0.639124444863775743801488193}, { 0.769103337645579639346626069}, + {-0.769103337645579639346626069}, { 0.639124444863775743801488193}, + { 0.884797098430937780104007041}, { 0.465976495767966177902756065}, + {-0.465976495767966177902756065}, { 0.884797098430937780104007041}, + { 0.296150888243623824121786128}, { 0.955141168305770721498157712}, + {-0.955141168305770721498157712}, { 0.296150888243623824121786128}, + { 0.958703474895871555374645792}, { 0.284407537211271843618310615}, + {-0.284407537211271843618310615}, { 0.958703474895871555374645792}, + { 0.476799230063322133342158117}, { 0.879012226428633477831323711}, + {-0.879012226428633477831323711}, { 0.476799230063322133342158117}, + { 0.776888465673232450040827983}, { 0.629638238914927025372981341}, + {-0.629638238914927025372981341}, { 0.776888465673232450040827983}, + { 0.104121633872054579120943880}, { 0.994564570734255452119106243}, + {-0.994564570734255452119106243}, { 0.104121633872054579120943880}, + { 0.981963869109555264072848154}, { 0.189068664149806212754997837}, + {-0.189068664149806212754997837}, { 0.981963869109555264072848154}, + { 0.560661576197336023839710223}, { 0.828045045257755752067527592}, + {-0.828045045257755752067527592}, { 0.560661576197336023839710223}, + { 0.834862874986380056304401383}, { 0.550457972936604802977289893}, + {-0.550457972936604802977289893}, { 0.834862874986380056304401383}, + { 0.201104634842091911558443546}, { 0.979569765685440534439326110}, + {-0.979569765685440534439326110}, { 0.201104634842091911558443546}, + { 0.926210242138311341974793388}, { 0.377007410216418256726567823}, + {-0.377007410216418256726567823}, { 0.926210242138311341974793388}, + { 0.388345046698826291624993541}, { 0.921514039342041943465396332}, + {-0.921514039342041943465396332}, { 0.388345046698826291624993541}, + { 0.711432195745216441522130290}, { 0.702754744457225302452914421}, + {-0.702754744457225302452914421}, { 0.711432195745216441522130290}, + { 0.006135884649154475359640235}, { 0.999981175282601142656990438}, + {-0.999981175282601142656990438}, { 0.006135884649154475359640235}, + { 0.999995293809576171511580126}, { 0.003067956762965976270145365}, + {-0.003067956762965976270145365}, { 0.999995293809576171511580126}, + { 0.704934080375904908852523758}, { 0.709272826438865651316533772}, + {-0.709272826438865651316533772}, { 0.704934080375904908852523758}, + { 0.922701128333878570437264227}, { 0.385516053843918864075607949}, + {-0.385516053843918864075607949}, { 0.922701128333878570437264227}, + { 0.379847208924051170576281147}, { 0.925049240782677590302371869}, + {-0.925049240782677590302371869}, { 0.379847208924051170576281147}, + { 0.980182135968117392690210009}, { 0.198098410717953586179324918}, + {-0.198098410717953586179324918}, { 0.980182135968117392690210009}, + { 0.553016705580027531764226988}, { 0.833170164701913186439915922}, + {-0.833170164701913186439915922}, { 0.553016705580027531764226988}, + { 0.829761233794523042469023765}, { 0.558118531220556115693702964}, + {-0.558118531220556115693702964}, { 0.829761233794523042469023765}, + { 0.192080397049892441679288205}, { 0.981379193313754574318224190}, + {-0.981379193313754574318224190}, { 0.192080397049892441679288205}, + { 0.994879330794805620591166107}, { 0.101069862754827824987887585}, + {-0.101069862754827824987887585}, { 0.994879330794805620591166107}, + { 0.632018735939809021909403706}, { 0.774953106594873878359129282}, + {-0.774953106594873878359129282}, { 0.632018735939809021909403706}, + { 0.880470889052160770806542929}, { 0.474100214650550014398580015}, + {-0.474100214650550014398580015}, { 0.880470889052160770806542929}, + { 0.287347459544729526477331841}, { 0.957826413027532890321037029}, + {-0.957826413027532890321037029}, { 0.287347459544729526477331841}, + { 0.956045251349996443270479823}, { 0.293219162694258650606608599}, + {-0.293219162694258650606608599}, { 0.956045251349996443270479823}, + { 0.468688822035827933697617870}, { 0.883363338665731594736308015}, + {-0.883363338665731594736308015}, { 0.468688822035827933697617870}, + { 0.771060524261813773200605759}, { 0.636761861236284230413943435}, + {-0.636761861236284230413943435}, { 0.771060524261813773200605759}, + { 0.094963495329638998938034312}, { 0.995480755491926941769171600}, + {-0.995480755491926941769171600}, { 0.094963495329638998938034312}, + { 0.998640218180265222418199049}, { 0.052131704680283321236358216}, + {-0.052131704680283321236358216}, { 0.998640218180265222418199049}, + { 0.669282588346636065720696366}, { 0.743007952135121693517362293}, + {-0.743007952135121693517362293}, { 0.669282588346636065720696366}, + { 0.902673318237258806751502391}, { 0.430326481340082633908199031}, + {-0.430326481340082633908199031}, { 0.902673318237258806751502391}, + { 0.333999651442009404650865481}, { 0.942573197601446879280758735}, + {-0.942573197601446879280758735}, { 0.333999651442009404650865481}, + { 0.969281235356548486048290738}, { 0.245955050335794611599924709}, + {-0.245955050335794611599924709}, { 0.969281235356548486048290738}, + { 0.511468850437970399504391001}, { 0.859301818357008404783582139}, + {-0.859301818357008404783582139}, { 0.511468850437970399504391001}, + { 0.801376171723140219430247777}, { 0.598160706996342311724958652}, + {-0.598160706996342311724958652}, { 0.801376171723140219430247777}, + { 0.143695033150294454819773349}, { 0.989622017463200834623694454}, + {-0.989622017463200834623694454}, { 0.143695033150294454819773349}, + { 0.988721691960323767604516485}, { 0.149764534677321517229695737}, + {-0.149764534677321517229695737}, { 0.988721691960323767604516485}, + { 0.593232295039799808047809426}, { 0.805031331142963597922659282}, + {-0.805031331142963597922659282}, { 0.593232295039799808047809426}, + { 0.856147328375194481019630732}, { 0.516731799017649881508753876}, + {-0.516731799017649881508753876}, { 0.856147328375194481019630732}, + { 0.240003022448741486568922365}, { 0.970772140728950302138169611}, + {-0.970772140728950302138169611}, { 0.240003022448741486568922365}, + { 0.940506070593268323787291309}, { 0.339776884406826857828825803}, + {-0.339776884406826857828825803}, { 0.940506070593268323787291309}, + { 0.424779681209108833357226189}, { 0.905296759318118774354048329}, + {-0.905296759318118774354048329}, { 0.424779681209108833357226189}, + { 0.738887324460615147933116508}, { 0.673829000378756060917568372}, + {-0.673829000378756060917568372}, { 0.738887324460615147933116508}, + { 0.046003182130914628814301788}, { 0.998941293186856850633930266}, + {-0.998941293186856850633930266}, { 0.046003182130914628814301788}, + { 0.999618822495178597116830637}, { 0.027608145778965741612354872}, + {-0.027608145778965741612354872}, { 0.999618822495178597116830637}, + { 0.687315340891759108199186948}, { 0.726359155084345976817494315}, + {-0.726359155084345976817494315}, { 0.687315340891759108199186948}, + { 0.912962190428398164628018233}, { 0.408044162864978680820747499}, + {-0.408044162864978680820747499}, { 0.912962190428398164628018233}, + { 0.357030961233430032614954036}, { 0.934092550404258914729877883}, + {-0.934092550404258914729877883}, { 0.357030961233430032614954036}, + { 0.975025345066994146844913468}, { 0.222093620973203534094094721}, + {-0.222093620973203534094094721}, { 0.975025345066994146844913468}, + { 0.532403127877197971442805218}, { 0.846490938774052078300544488}, + {-0.846490938774052078300544488}, { 0.532403127877197971442805218}, + { 0.815814410806733789010772660}, { 0.578313796411655563342245019}, + {-0.578313796411655563342245019}, { 0.815814410806733789010772660}, + { 0.167938294974731178054745536}, { 0.985797509167567424700995000}, + {-0.985797509167567424700995000}, { 0.167938294974731178054745536}, + { 0.992099313142191757112085445}, { 0.125454983411546238542336453}, + {-0.125454983411546238542336453}, { 0.992099313142191757112085445}, + { 0.612810082429409703935211936}, { 0.790230221437310055030217152}, + {-0.790230221437310055030217152}, { 0.612810082429409703935211936}, + { 0.868570705971340895340449876}, { 0.495565261825772531150266670}, + {-0.495565261825772531150266670}, { 0.868570705971340895340449876}, + { 0.263754678974831383611349322}, { 0.964589793289812723836432159}, + {-0.964589793289812723836432159}, { 0.263754678974831383611349322}, + { 0.948561349915730288158494826}, { 0.316593375556165867243047035}, + {-0.316593375556165867243047035}, { 0.948561349915730288158494826}, + { 0.446868840162374195353044389}, { 0.894599485631382678433072126}, + {-0.894599485631382678433072126}, { 0.446868840162374195353044389}, + { 0.755201376896536527598710756}, { 0.655492852999615385312679701}, + {-0.655492852999615385312679701}, { 0.755201376896536527598710756}, + { 0.070504573389613863027351471}, { 0.997511456140303459699448390}, + {-0.997511456140303459699448390}, { 0.070504573389613863027351471}, + { 0.997060070339482978987989949}, { 0.076623861392031492278332463}, + {-0.076623861392031492278332463}, { 0.997060070339482978987989949}, + { 0.650846684996380915068975573}, { 0.759209188978388033485525443}, + {-0.759209188978388033485525443}, { 0.650846684996380915068975573}, + { 0.891840709392342727796478697}, { 0.452349587233770874133026703}, + {-0.452349587233770874133026703}, { 0.891840709392342727796478697}, + { 0.310767152749611495835997250}, { 0.950486073949481721759926101}, + {-0.950486073949481721759926101}, { 0.310767152749611495835997250}, + { 0.962953266873683886347921481}, { 0.269668325572915106525464462}, + {-0.269668325572915106525464462}, { 0.962953266873683886347921481}, + { 0.490226483288291154229598449}, { 0.871595086655951034842481435}, + {-0.871595086655951034842481435}, { 0.490226483288291154229598449}, + { 0.786455213599085757522319464}, { 0.617647307937803932403979402}, + {-0.617647307937803932403979402}, { 0.786455213599085757522319464}, + { 0.119365214810991364593637790}, { 0.992850414459865090793563344}, + {-0.992850414459865090793563344}, { 0.119365214810991364593637790}, + { 0.984748501801904218556553176}, { 0.173983873387463827950700807}, + {-0.173983873387463827950700807}, { 0.984748501801904218556553176}, + { 0.573297166698042212820171239}, { 0.819347520076796960824689637}, + {-0.819347520076796960824689637}, { 0.573297166698042212820171239}, + { 0.843208239641845437161743865}, { 0.537587076295645482502214932}, + {-0.537587076295645482502214932}, { 0.843208239641845437161743865}, + { 0.216106797076219509948385131}, { 0.976369731330021149312732194}, + {-0.976369731330021149312732194}, { 0.216106797076219509948385131}, + { 0.931884265581668106718557199}, { 0.362755724367397216204854462}, + {-0.362755724367397216204854462}, { 0.931884265581668106718557199}, + { 0.402434650859418441082533934}, { 0.915448716088267819566431292}, + {-0.915448716088267819566431292}, { 0.402434650859418441082533934}, + { 0.722128193929215321243607198}, { 0.691759258364157774906734132}, + {-0.691759258364157774906734132}, { 0.722128193929215321243607198}, + { 0.021474080275469507418374898}, { 0.999769405351215321657617036}, + {-0.999769405351215321657617036}, { 0.021474080275469507418374898}, + { 0.999882347454212525633049627}, { 0.015339206284988101044151868}, + {-0.015339206284988101044151868}, { 0.999882347454212525633049627}, + { 0.696177131491462944788582591}, { 0.717870045055731736211325329}, + {-0.717870045055731736211325329}, { 0.696177131491462944788582591}, + { 0.917900775621390457642276297}, { 0.396809987416710328595290911}, + {-0.396809987416710328595290911}, { 0.917900775621390457642276297}, + { 0.368466829953372331712746222}, { 0.929640895843181265457918066}, + {-0.929640895843181265457918066}, { 0.368466829953372331712746222}, + { 0.977677357824509979943404762}, { 0.210111836880469621717489972}, + {-0.210111836880469621717489972}, { 0.977677357824509979943404762}, + { 0.542750784864515906586768661}, { 0.839893794195999504583383987}, + {-0.839893794195999504583383987}, { 0.542750784864515906586768661}, + { 0.822849781375826332046780034}, { 0.568258952670131549790548489}, + {-0.568258952670131549790548489}, { 0.822849781375826332046780034}, + { 0.180022901405699522679906590}, { 0.983662419211730274396237776}, + {-0.983662419211730274396237776}, { 0.180022901405699522679906590}, + { 0.993564135520595333782021697}, { 0.113270952177564349018228733}, + {-0.113270952177564349018228733}, { 0.993564135520595333782021697}, + { 0.622461279374149972519166721}, { 0.782650596166575738458949301}, + {-0.782650596166575738458949301}, { 0.622461279374149972519166721}, + { 0.874586652278176112634431897}, { 0.484869248000791101822951699}, + {-0.484869248000791101822951699}, { 0.874586652278176112634431897}, + { 0.275571819310958163076425168}, { 0.961280485811320641748659653}, + {-0.961280485811320641748659653}, { 0.275571819310958163076425168}, + { 0.952375012719765858529893608}, { 0.304929229735402406490728633}, + {-0.304929229735402406490728633}, { 0.952375012719765858529893608}, + { 0.457813303598877221904961155}, { 0.889048355854664562540777729}, + {-0.889048355854664562540777729}, { 0.457813303598877221904961155}, + { 0.763188417263381271704838297}, { 0.646176012983316364832802220}, + {-0.646176012983316364832802220}, { 0.763188417263381271704838297}, + { 0.082740264549375693111987083}, { 0.996571145790554847093566910}, + {-0.996571145790554847093566910}, { 0.082740264549375693111987083}, + { 0.997925286198596012623025462}, { 0.064382630929857460819324537}, + {-0.064382630929857460819324537}, { 0.997925286198596012623025462}, + { 0.660114342067420478559490747}, { 0.751165131909686411205819422}, + {-0.751165131909686411205819422}, { 0.660114342067420478559490747}, + { 0.897324580705418281231391836}, { 0.441371268731716692879988968}, + {-0.441371268731716692879988968}, { 0.897324580705418281231391836}, + { 0.322407678801069848384807478}, { 0.946600913083283570044599823}, + {-0.946600913083283570044599823}, { 0.322407678801069848384807478}, + { 0.966190003445412555433832961}, { 0.257831102162159005614471295}, + {-0.257831102162159005614471295}, { 0.966190003445412555433832961}, + { 0.500885382611240786241285004}, { 0.865513624090569082825488358}, + {-0.865513624090569082825488358}, { 0.500885382611240786241285004}, + { 0.793975477554337164895083757}, { 0.607949784967773667243642671}, + {-0.607949784967773667243642671}, { 0.793975477554337164895083757}, + { 0.131540028702883111103387493}, { 0.991310859846115418957349799}, + {-0.991310859846115418957349799}, { 0.131540028702883111103387493}, + { 0.986809401814185476970235952}, { 0.161886393780111837641387995}, + {-0.161886393780111837641387995}, { 0.986809401814185476970235952}, + { 0.583308652937698294392830961}, { 0.812250586585203913049744181}, + {-0.812250586585203913049744181}, { 0.583308652937698294392830961}, + { 0.849741768000852489471268395}, { 0.527199134781901348464274575}, + {-0.527199134781901348464274575}, { 0.849741768000852489471268395}, + { 0.228072083170885739254457379}, { 0.973644249650811925318383912}, + {-0.973644249650811925318383912}, { 0.228072083170885739254457379}, + { 0.936265667170278246576310996}, { 0.351292756085567125601307623}, + {-0.351292756085567125601307623}, { 0.936265667170278246576310996}, + { 0.413638312238434547471944324}, { 0.910441292258067196934095369}, + {-0.910441292258067196934095369}, { 0.413638312238434547471944324}, + { 0.730562769227827561177758850}, { 0.682845546385248068164596123}, + {-0.682845546385248068164596123}, { 0.730562769227827561177758850}, + { 0.033741171851377584833716112}, { 0.999430604555461772019008327}, + {-0.999430604555461772019008327}, { 0.033741171851377584833716112}, + { 0.999204758618363895492950001}, { 0.039872927587739811128578738}, + {-0.039872927587739811128578738}, { 0.999204758618363895492950001}, + { 0.678350043129861486873655042}, { 0.734738878095963464563223604}, + {-0.734738878095963464563223604}, { 0.678350043129861486873655042}, + { 0.907886116487666212038681480}, { 0.419216888363223956433010020}, + {-0.419216888363223956433010020}, { 0.907886116487666212038681480}, + { 0.345541324963989065539191723}, { 0.938403534063108112192420774}, + {-0.938403534063108112192420774}, { 0.345541324963989065539191723}, + { 0.972226497078936305708321144}, { 0.234041958583543423191242045}, + {-0.234041958583543423191242045}, { 0.972226497078936305708321144}, + { 0.521975292937154342694258318}, { 0.852960604930363657746588082}, + {-0.852960604930363657746588082}, { 0.521975292937154342694258318}, + { 0.808656181588174991946968128}, { 0.588281548222645304786439813}, + {-0.588281548222645304786439813}, { 0.808656181588174991946968128}, + { 0.155828397654265235743101486}, { 0.987784141644572154230969032}, + {-0.987784141644572154230969032}, { 0.155828397654265235743101486}, + { 0.990485084256457037998682243}, { 0.137620121586486044948441663}, + {-0.137620121586486044948441663}, { 0.990485084256457037998682243}, + { 0.603066598540348201693430617}, { 0.797690840943391108362662755}, + {-0.797690840943391108362662755}, { 0.603066598540348201693430617}, + { 0.862423956111040538690933878}, { 0.506186645345155291048942344}, + {-0.506186645345155291048942344}, { 0.862423956111040538690933878}, + { 0.251897818154216950498106628}, { 0.967753837093475465243391912}, + {-0.967753837093475465243391912}, { 0.251897818154216950498106628}, + { 0.944604837261480265659265493}, { 0.328209843579092526107916817}, + {-0.328209843579092526107916817}, { 0.944604837261480265659265493}, + { 0.435857079922255491032544080}, { 0.900015892016160228714535267}, + {-0.900015892016160228714535267}, { 0.435857079922255491032544080}, + { 0.747100605980180144323078847}, { 0.664710978203344868130324985}, + {-0.664710978203344868130324985}, { 0.747100605980180144323078847}, + { 0.058258264500435759613979782}, { 0.998301544933892840738782163}, + {-0.998301544933892840738782163}, { 0.058258264500435759613979782}, + { 0.996044700901251989887944810}, { 0.088853552582524596561586535}, + {-0.088853552582524596561586535}, { 0.996044700901251989887944810}, + { 0.641481012808583151988739898}, { 0.767138911935820381181694573}, + {-0.767138911935820381181694573}, { 0.641481012808583151988739898}, + { 0.886222530148880631647990821}, { 0.463259783551860197390719637}, + {-0.463259783551860197390719637}, { 0.886222530148880631647990821}, + { 0.299079826308040476750336973}, { 0.954228095109105629780430732}, + {-0.954228095109105629780430732}, { 0.299079826308040476750336973}, + { 0.959571513081984528335528181}, { 0.281464937925757984095231007}, + {-0.281464937925757984095231007}, { 0.959571513081984528335528181}, + { 0.479493757660153026679839798}, { 0.877545290207261291668470750}, + {-0.877545290207261291668470750}, { 0.479493757660153026679839798}, + { 0.778816512381475953374724325}, { 0.627251815495144113509622565}, + {-0.627251815495144113509622565}, { 0.778816512381475953374724325}, + { 0.107172424956808849175529148}, { 0.994240449453187946358413442}, + {-0.994240449453187946358413442}, { 0.107172424956808849175529148}, + { 0.982539302287441255907040396}, { 0.186055151663446648105438304}, + {-0.186055151663446648105438304}, { 0.982539302287441255907040396}, + { 0.563199344013834115007363772}, { 0.826321062845663480311195452}, + {-0.826321062845663480311195452}, { 0.563199344013834115007363772}, + { 0.836547727223511984524285790}, { 0.547894059173100165608820571}, + {-0.547894059173100165608820571}, { 0.836547727223511984524285790}, + { 0.204108966092816874181696950}, { 0.978948175319062194715480124}, + {-0.978948175319062194715480124}, { 0.204108966092816874181696950}, + { 0.927362525650401087274536959}, { 0.374164062971457997104393020}, + {-0.374164062971457997104393020}, { 0.927362525650401087274536959}, + { 0.391170384302253888687512949}, { 0.920318276709110566440076541}, + {-0.920318276709110566440076541}, { 0.391170384302253888687512949}, + { 0.713584868780793592903125099}, { 0.700568793943248366792866380}, + {-0.700568793943248366792866380}, { 0.713584868780793592903125099}, + { 0.009203754782059819315102378}, { 0.999957644551963866333120920}, + {-0.999957644551963866333120920}, { 0.009203754782059819315102378}, + { 0.999957644551963866333120920}, { 0.009203754782059819315102378}, + {-0.009203754782059819315102378}, { 0.999957644551963866333120920}, + { 0.700568793943248366792866380}, { 0.713584868780793592903125099}, + {-0.713584868780793592903125099}, { 0.700568793943248366792866380}, + { 0.920318276709110566440076541}, { 0.391170384302253888687512949}, + {-0.391170384302253888687512949}, { 0.920318276709110566440076541}, + { 0.374164062971457997104393020}, { 0.927362525650401087274536959}, + {-0.927362525650401087274536959}, { 0.374164062971457997104393020}, + { 0.978948175319062194715480124}, { 0.204108966092816874181696950}, + {-0.204108966092816874181696950}, { 0.978948175319062194715480124}, + { 0.547894059173100165608820571}, { 0.836547727223511984524285790}, + {-0.836547727223511984524285790}, { 0.547894059173100165608820571}, + { 0.826321062845663480311195452}, { 0.563199344013834115007363772}, + {-0.563199344013834115007363772}, { 0.826321062845663480311195452}, + { 0.186055151663446648105438304}, { 0.982539302287441255907040396}, + {-0.982539302287441255907040396}, { 0.186055151663446648105438304}, + { 0.994240449453187946358413442}, { 0.107172424956808849175529148}, + {-0.107172424956808849175529148}, { 0.994240449453187946358413442}, + { 0.627251815495144113509622565}, { 0.778816512381475953374724325}, + {-0.778816512381475953374724325}, { 0.627251815495144113509622565}, + { 0.877545290207261291668470750}, { 0.479493757660153026679839798}, + {-0.479493757660153026679839798}, { 0.877545290207261291668470750}, + { 0.281464937925757984095231007}, { 0.959571513081984528335528181}, + {-0.959571513081984528335528181}, { 0.281464937925757984095231007}, + { 0.954228095109105629780430732}, { 0.299079826308040476750336973}, + {-0.299079826308040476750336973}, { 0.954228095109105629780430732}, + { 0.463259783551860197390719637}, { 0.886222530148880631647990821}, + {-0.886222530148880631647990821}, { 0.463259783551860197390719637}, + { 0.767138911935820381181694573}, { 0.641481012808583151988739898}, + {-0.641481012808583151988739898}, { 0.767138911935820381181694573}, + { 0.088853552582524596561586535}, { 0.996044700901251989887944810}, + {-0.996044700901251989887944810}, { 0.088853552582524596561586535}, + { 0.998301544933892840738782163}, { 0.058258264500435759613979782}, + {-0.058258264500435759613979782}, { 0.998301544933892840738782163}, + { 0.664710978203344868130324985}, { 0.747100605980180144323078847}, + {-0.747100605980180144323078847}, { 0.664710978203344868130324985}, + { 0.900015892016160228714535267}, { 0.435857079922255491032544080}, + {-0.435857079922255491032544080}, { 0.900015892016160228714535267}, + { 0.328209843579092526107916817}, { 0.944604837261480265659265493}, + {-0.944604837261480265659265493}, { 0.328209843579092526107916817}, + { 0.967753837093475465243391912}, { 0.251897818154216950498106628}, + {-0.251897818154216950498106628}, { 0.967753837093475465243391912}, + { 0.506186645345155291048942344}, { 0.862423956111040538690933878}, + {-0.862423956111040538690933878}, { 0.506186645345155291048942344}, + { 0.797690840943391108362662755}, { 0.603066598540348201693430617}, + {-0.603066598540348201693430617}, { 0.797690840943391108362662755}, + { 0.137620121586486044948441663}, { 0.990485084256457037998682243}, + {-0.990485084256457037998682243}, { 0.137620121586486044948441663}, + { 0.987784141644572154230969032}, { 0.155828397654265235743101486}, + {-0.155828397654265235743101486}, { 0.987784141644572154230969032}, + { 0.588281548222645304786439813}, { 0.808656181588174991946968128}, + {-0.808656181588174991946968128}, { 0.588281548222645304786439813}, + { 0.852960604930363657746588082}, { 0.521975292937154342694258318}, + {-0.521975292937154342694258318}, { 0.852960604930363657746588082}, + { 0.234041958583543423191242045}, { 0.972226497078936305708321144}, + {-0.972226497078936305708321144}, { 0.234041958583543423191242045}, + { 0.938403534063108112192420774}, { 0.345541324963989065539191723}, + {-0.345541324963989065539191723}, { 0.938403534063108112192420774}, + { 0.419216888363223956433010020}, { 0.907886116487666212038681480}, + {-0.907886116487666212038681480}, { 0.419216888363223956433010020}, + { 0.734738878095963464563223604}, { 0.678350043129861486873655042}, + {-0.678350043129861486873655042}, { 0.734738878095963464563223604}, + { 0.039872927587739811128578738}, { 0.999204758618363895492950001}, + {-0.999204758618363895492950001}, { 0.039872927587739811128578738}, + { 0.999430604555461772019008327}, { 0.033741171851377584833716112}, + {-0.033741171851377584833716112}, { 0.999430604555461772019008327}, + { 0.682845546385248068164596123}, { 0.730562769227827561177758850}, + {-0.730562769227827561177758850}, { 0.682845546385248068164596123}, + { 0.910441292258067196934095369}, { 0.413638312238434547471944324}, + {-0.413638312238434547471944324}, { 0.910441292258067196934095369}, + { 0.351292756085567125601307623}, { 0.936265667170278246576310996}, + {-0.936265667170278246576310996}, { 0.351292756085567125601307623}, + { 0.973644249650811925318383912}, { 0.228072083170885739254457379}, + {-0.228072083170885739254457379}, { 0.973644249650811925318383912}, + { 0.527199134781901348464274575}, { 0.849741768000852489471268395}, + {-0.849741768000852489471268395}, { 0.527199134781901348464274575}, + { 0.812250586585203913049744181}, { 0.583308652937698294392830961}, + {-0.583308652937698294392830961}, { 0.812250586585203913049744181}, + { 0.161886393780111837641387995}, { 0.986809401814185476970235952}, + {-0.986809401814185476970235952}, { 0.161886393780111837641387995}, + { 0.991310859846115418957349799}, { 0.131540028702883111103387493}, + {-0.131540028702883111103387493}, { 0.991310859846115418957349799}, + { 0.607949784967773667243642671}, { 0.793975477554337164895083757}, + {-0.793975477554337164895083757}, { 0.607949784967773667243642671}, + { 0.865513624090569082825488358}, { 0.500885382611240786241285004}, + {-0.500885382611240786241285004}, { 0.865513624090569082825488358}, + { 0.257831102162159005614471295}, { 0.966190003445412555433832961}, + {-0.966190003445412555433832961}, { 0.257831102162159005614471295}, + { 0.946600913083283570044599823}, { 0.322407678801069848384807478}, + {-0.322407678801069848384807478}, { 0.946600913083283570044599823}, + { 0.441371268731716692879988968}, { 0.897324580705418281231391836}, + {-0.897324580705418281231391836}, { 0.441371268731716692879988968}, + { 0.751165131909686411205819422}, { 0.660114342067420478559490747}, + {-0.660114342067420478559490747}, { 0.751165131909686411205819422}, + { 0.064382630929857460819324537}, { 0.997925286198596012623025462}, + {-0.997925286198596012623025462}, { 0.064382630929857460819324537}, + { 0.996571145790554847093566910}, { 0.082740264549375693111987083}, + {-0.082740264549375693111987083}, { 0.996571145790554847093566910}, + { 0.646176012983316364832802220}, { 0.763188417263381271704838297}, + {-0.763188417263381271704838297}, { 0.646176012983316364832802220}, + { 0.889048355854664562540777729}, { 0.457813303598877221904961155}, + {-0.457813303598877221904961155}, { 0.889048355854664562540777729}, + { 0.304929229735402406490728633}, { 0.952375012719765858529893608}, + {-0.952375012719765858529893608}, { 0.304929229735402406490728633}, + { 0.961280485811320641748659653}, { 0.275571819310958163076425168}, + {-0.275571819310958163076425168}, { 0.961280485811320641748659653}, + { 0.484869248000791101822951699}, { 0.874586652278176112634431897}, + {-0.874586652278176112634431897}, { 0.484869248000791101822951699}, + { 0.782650596166575738458949301}, { 0.622461279374149972519166721}, + {-0.622461279374149972519166721}, { 0.782650596166575738458949301}, + { 0.113270952177564349018228733}, { 0.993564135520595333782021697}, + {-0.993564135520595333782021697}, { 0.113270952177564349018228733}, + { 0.983662419211730274396237776}, { 0.180022901405699522679906590}, + {-0.180022901405699522679906590}, { 0.983662419211730274396237776}, + { 0.568258952670131549790548489}, { 0.822849781375826332046780034}, + {-0.822849781375826332046780034}, { 0.568258952670131549790548489}, + { 0.839893794195999504583383987}, { 0.542750784864515906586768661}, + {-0.542750784864515906586768661}, { 0.839893794195999504583383987}, + { 0.210111836880469621717489972}, { 0.977677357824509979943404762}, + {-0.977677357824509979943404762}, { 0.210111836880469621717489972}, + { 0.929640895843181265457918066}, { 0.368466829953372331712746222}, + {-0.368466829953372331712746222}, { 0.929640895843181265457918066}, + { 0.396809987416710328595290911}, { 0.917900775621390457642276297}, + {-0.917900775621390457642276297}, { 0.396809987416710328595290911}, + { 0.717870045055731736211325329}, { 0.696177131491462944788582591}, + {-0.696177131491462944788582591}, { 0.717870045055731736211325329}, + { 0.015339206284988101044151868}, { 0.999882347454212525633049627}, + {-0.999882347454212525633049627}, { 0.015339206284988101044151868}, + { 0.999769405351215321657617036}, { 0.021474080275469507418374898}, + {-0.021474080275469507418374898}, { 0.999769405351215321657617036}, + { 0.691759258364157774906734132}, { 0.722128193929215321243607198}, + {-0.722128193929215321243607198}, { 0.691759258364157774906734132}, + { 0.915448716088267819566431292}, { 0.402434650859418441082533934}, + {-0.402434650859418441082533934}, { 0.915448716088267819566431292}, + { 0.362755724367397216204854462}, { 0.931884265581668106718557199}, + {-0.931884265581668106718557199}, { 0.362755724367397216204854462}, + { 0.976369731330021149312732194}, { 0.216106797076219509948385131}, + {-0.216106797076219509948385131}, { 0.976369731330021149312732194}, + { 0.537587076295645482502214932}, { 0.843208239641845437161743865}, + {-0.843208239641845437161743865}, { 0.537587076295645482502214932}, + { 0.819347520076796960824689637}, { 0.573297166698042212820171239}, + {-0.573297166698042212820171239}, { 0.819347520076796960824689637}, + { 0.173983873387463827950700807}, { 0.984748501801904218556553176}, + {-0.984748501801904218556553176}, { 0.173983873387463827950700807}, + { 0.992850414459865090793563344}, { 0.119365214810991364593637790}, + {-0.119365214810991364593637790}, { 0.992850414459865090793563344}, + { 0.617647307937803932403979402}, { 0.786455213599085757522319464}, + {-0.786455213599085757522319464}, { 0.617647307937803932403979402}, + { 0.871595086655951034842481435}, { 0.490226483288291154229598449}, + {-0.490226483288291154229598449}, { 0.871595086655951034842481435}, + { 0.269668325572915106525464462}, { 0.962953266873683886347921481}, + {-0.962953266873683886347921481}, { 0.269668325572915106525464462}, + { 0.950486073949481721759926101}, { 0.310767152749611495835997250}, + {-0.310767152749611495835997250}, { 0.950486073949481721759926101}, + { 0.452349587233770874133026703}, { 0.891840709392342727796478697}, + {-0.891840709392342727796478697}, { 0.452349587233770874133026703}, + { 0.759209188978388033485525443}, { 0.650846684996380915068975573}, + {-0.650846684996380915068975573}, { 0.759209188978388033485525443}, + { 0.076623861392031492278332463}, { 0.997060070339482978987989949}, + {-0.997060070339482978987989949}, { 0.076623861392031492278332463}, + { 0.997511456140303459699448390}, { 0.070504573389613863027351471}, + {-0.070504573389613863027351471}, { 0.997511456140303459699448390}, + { 0.655492852999615385312679701}, { 0.755201376896536527598710756}, + {-0.755201376896536527598710756}, { 0.655492852999615385312679701}, + { 0.894599485631382678433072126}, { 0.446868840162374195353044389}, + {-0.446868840162374195353044389}, { 0.894599485631382678433072126}, + { 0.316593375556165867243047035}, { 0.948561349915730288158494826}, + {-0.948561349915730288158494826}, { 0.316593375556165867243047035}, + { 0.964589793289812723836432159}, { 0.263754678974831383611349322}, + {-0.263754678974831383611349322}, { 0.964589793289812723836432159}, + { 0.495565261825772531150266670}, { 0.868570705971340895340449876}, + {-0.868570705971340895340449876}, { 0.495565261825772531150266670}, + { 0.790230221437310055030217152}, { 0.612810082429409703935211936}, + {-0.612810082429409703935211936}, { 0.790230221437310055030217152}, + { 0.125454983411546238542336453}, { 0.992099313142191757112085445}, + {-0.992099313142191757112085445}, { 0.125454983411546238542336453}, + { 0.985797509167567424700995000}, { 0.167938294974731178054745536}, + {-0.167938294974731178054745536}, { 0.985797509167567424700995000}, + { 0.578313796411655563342245019}, { 0.815814410806733789010772660}, + {-0.815814410806733789010772660}, { 0.578313796411655563342245019}, + { 0.846490938774052078300544488}, { 0.532403127877197971442805218}, + {-0.532403127877197971442805218}, { 0.846490938774052078300544488}, + { 0.222093620973203534094094721}, { 0.975025345066994146844913468}, + {-0.975025345066994146844913468}, { 0.222093620973203534094094721}, + { 0.934092550404258914729877883}, { 0.357030961233430032614954036}, + {-0.357030961233430032614954036}, { 0.934092550404258914729877883}, + { 0.408044162864978680820747499}, { 0.912962190428398164628018233}, + {-0.912962190428398164628018233}, { 0.408044162864978680820747499}, + { 0.726359155084345976817494315}, { 0.687315340891759108199186948}, + {-0.687315340891759108199186948}, { 0.726359155084345976817494315}, + { 0.027608145778965741612354872}, { 0.999618822495178597116830637}, + {-0.999618822495178597116830637}, { 0.027608145778965741612354872}, + { 0.998941293186856850633930266}, { 0.046003182130914628814301788}, + {-0.046003182130914628814301788}, { 0.998941293186856850633930266}, + { 0.673829000378756060917568372}, { 0.738887324460615147933116508}, + {-0.738887324460615147933116508}, { 0.673829000378756060917568372}, + { 0.905296759318118774354048329}, { 0.424779681209108833357226189}, + {-0.424779681209108833357226189}, { 0.905296759318118774354048329}, + { 0.339776884406826857828825803}, { 0.940506070593268323787291309}, + {-0.940506070593268323787291309}, { 0.339776884406826857828825803}, + { 0.970772140728950302138169611}, { 0.240003022448741486568922365}, + {-0.240003022448741486568922365}, { 0.970772140728950302138169611}, + { 0.516731799017649881508753876}, { 0.856147328375194481019630732}, + {-0.856147328375194481019630732}, { 0.516731799017649881508753876}, + { 0.805031331142963597922659282}, { 0.593232295039799808047809426}, + {-0.593232295039799808047809426}, { 0.805031331142963597922659282}, + { 0.149764534677321517229695737}, { 0.988721691960323767604516485}, + {-0.988721691960323767604516485}, { 0.149764534677321517229695737}, + { 0.989622017463200834623694454}, { 0.143695033150294454819773349}, + {-0.143695033150294454819773349}, { 0.989622017463200834623694454}, + { 0.598160706996342311724958652}, { 0.801376171723140219430247777}, + {-0.801376171723140219430247777}, { 0.598160706996342311724958652}, + { 0.859301818357008404783582139}, { 0.511468850437970399504391001}, + {-0.511468850437970399504391001}, { 0.859301818357008404783582139}, + { 0.245955050335794611599924709}, { 0.969281235356548486048290738}, + {-0.969281235356548486048290738}, { 0.245955050335794611599924709}, + { 0.942573197601446879280758735}, { 0.333999651442009404650865481}, + {-0.333999651442009404650865481}, { 0.942573197601446879280758735}, + { 0.430326481340082633908199031}, { 0.902673318237258806751502391}, + {-0.902673318237258806751502391}, { 0.430326481340082633908199031}, + { 0.743007952135121693517362293}, { 0.669282588346636065720696366}, + {-0.669282588346636065720696366}, { 0.743007952135121693517362293}, + { 0.052131704680283321236358216}, { 0.998640218180265222418199049}, + {-0.998640218180265222418199049}, { 0.052131704680283321236358216}, + { 0.995480755491926941769171600}, { 0.094963495329638998938034312}, + {-0.094963495329638998938034312}, { 0.995480755491926941769171600}, + { 0.636761861236284230413943435}, { 0.771060524261813773200605759}, + {-0.771060524261813773200605759}, { 0.636761861236284230413943435}, + { 0.883363338665731594736308015}, { 0.468688822035827933697617870}, + {-0.468688822035827933697617870}, { 0.883363338665731594736308015}, + { 0.293219162694258650606608599}, { 0.956045251349996443270479823}, + {-0.956045251349996443270479823}, { 0.293219162694258650606608599}, + { 0.957826413027532890321037029}, { 0.287347459544729526477331841}, + {-0.287347459544729526477331841}, { 0.957826413027532890321037029}, + { 0.474100214650550014398580015}, { 0.880470889052160770806542929}, + {-0.880470889052160770806542929}, { 0.474100214650550014398580015}, + { 0.774953106594873878359129282}, { 0.632018735939809021909403706}, + {-0.632018735939809021909403706}, { 0.774953106594873878359129282}, + { 0.101069862754827824987887585}, { 0.994879330794805620591166107}, + {-0.994879330794805620591166107}, { 0.101069862754827824987887585}, + { 0.981379193313754574318224190}, { 0.192080397049892441679288205}, + {-0.192080397049892441679288205}, { 0.981379193313754574318224190}, + { 0.558118531220556115693702964}, { 0.829761233794523042469023765}, + {-0.829761233794523042469023765}, { 0.558118531220556115693702964}, + { 0.833170164701913186439915922}, { 0.553016705580027531764226988}, + {-0.553016705580027531764226988}, { 0.833170164701913186439915922}, + { 0.198098410717953586179324918}, { 0.980182135968117392690210009}, + {-0.980182135968117392690210009}, { 0.198098410717953586179324918}, + { 0.925049240782677590302371869}, { 0.379847208924051170576281147}, + {-0.379847208924051170576281147}, { 0.925049240782677590302371869}, + { 0.385516053843918864075607949}, { 0.922701128333878570437264227}, + {-0.922701128333878570437264227}, { 0.385516053843918864075607949}, + { 0.709272826438865651316533772}, { 0.704934080375904908852523758}, + {-0.704934080375904908852523758}, { 0.709272826438865651316533772}, + { 0.003067956762965976270145365}, { 0.999995293809576171511580126}, + {-0.999995293809576171511580126}, { 0.003067956762965976270145365} +}; + +const fpr fpr_p2_tab[] = { + { 2.00000000000 }, + { 1.00000000000 }, + { 0.50000000000 }, + { 0.25000000000 }, + { 0.12500000000 }, + { 0.06250000000 }, + { 0.03125000000 }, + { 0.01562500000 }, + { 0.00781250000 }, + { 0.00390625000 }, + { 0.00195312500 } +}; diff --git a/crypto_sign/falcon/falcon-512/avx2/fpr.h b/crypto_sign/falcon/falcon-512/avx2/fpr.h new file mode 100644 index 00000000..5c7df25c --- /dev/null +++ b/crypto_sign/falcon/falcon-512/avx2/fpr.h @@ -0,0 +1,349 @@ +#ifndef PQCLEAN_FALCON512_AVX2_FPR_H +#define PQCLEAN_FALCON512_AVX2_FPR_H + +/* + * Floating-point operations. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +/* ====================================================================== */ + +#include +#include + +#define FMADD(a, b, c) _mm256_add_pd(_mm256_mul_pd(a, b), c) +#define FMSUB(a, b, c) _mm256_sub_pd(_mm256_mul_pd(a, b), c) + +/* + * We wrap the native 'double' type into a structure so that the C compiler + * complains if we inadvertently use raw arithmetic operators on the 'fpr' + * type instead of using the inline functions below. This should have no + * extra runtime cost, since all the functions below are 'inline'. + */ +typedef struct { + double v; +} fpr; + +static inline fpr +FPR(double v) { + fpr x; + + x.v = v; + return x; +} + +static inline fpr +fpr_of(int64_t i) { + return FPR((double)i); +} + +static const fpr fpr_q = { 12289.0 }; +static const fpr fpr_inverse_of_q = { 1.0 / 12289.0 }; +static const fpr fpr_inv_2sqrsigma0 = { .150865048875372721532312163019 }; +static const fpr fpr_inv_sigma = { .005819826392951607426919370871 }; +static const fpr fpr_sigma_min_9 = { 1.291500756233514568549480827642 }; +static const fpr fpr_sigma_min_10 = { 1.311734375905083682667395805765 }; +static const fpr fpr_log2 = { 0.69314718055994530941723212146 }; +static const fpr fpr_inv_log2 = { 1.4426950408889634073599246810 }; +static const fpr fpr_bnorm_max = { 16822.4121 }; +static const fpr fpr_zero = { 0.0 }; +static const fpr fpr_one = { 1.0 }; +static const fpr fpr_two = { 2.0 }; +static const fpr fpr_onehalf = { 0.5 }; +static const fpr fpr_invsqrt2 = { 0.707106781186547524400844362105 }; +static const fpr fpr_invsqrt8 = { 0.353553390593273762200422181052 }; +static const fpr fpr_ptwo31 = { 2147483648.0 }; +static const fpr fpr_ptwo31m1 = { 2147483647.0 }; +static const fpr fpr_mtwo31m1 = { -2147483647.0 }; +static const fpr fpr_ptwo63m1 = { 9223372036854775807.0 }; +static const fpr fpr_mtwo63m1 = { -9223372036854775807.0 }; +static const fpr fpr_ptwo63 = { 9223372036854775808.0 }; + +static inline int64_t +fpr_rint(fpr x) { + /* + * We do not want to use llrint() since it might be not + * constant-time. + * + * Suppose that x >= 0. If x >= 2^52, then it is already an + * integer. Otherwise, if x < 2^52, then computing x+2^52 will + * yield a value that will be rounded to the nearest integer + * with exactly the right rules (round-to-nearest-even). + * + * In order to have constant-time processing, we must do the + * computation for both x >= 0 and x < 0 cases, and use a + * cast to an integer to access the sign and select the proper + * value. Such casts also allow us to find out if |x| < 2^52. + */ + int64_t sx, tx, rp, rn, m; + uint32_t ub; + + sx = (int64_t)(x.v - 1.0); + tx = (int64_t)x.v; + rp = (int64_t)(x.v + 4503599627370496.0) - 4503599627370496; + rn = (int64_t)(x.v - 4503599627370496.0) + 4503599627370496; + + /* + * If tx >= 2^52 or tx < -2^52, then result is tx. + * Otherwise, if sx >= 0, then result is rp. + * Otherwise, result is rn. We use the fact that when x is + * close to 0 (|x| <= 0.25) then both rp and rn are correct; + * and if x is not close to 0, then trunc(x-1.0) yields the + * appropriate sign. + */ + + /* + * Clamp rp to zero if tx < 0. + * Clamp rn to zero if tx >= 0. + */ + m = sx >> 63; + rn &= m; + rp &= ~m; + + /* + * Get the 12 upper bits of tx; if they are not all zeros or + * all ones, then tx >= 2^52 or tx < -2^52, and we clamp both + * rp and rn to zero. Otherwise, we clamp tx to zero. + */ + ub = (uint32_t)((uint64_t)tx >> 52); + m = -(int64_t)((((ub + 1) & 0xFFF) - 2) >> 31); + rp &= m; + rn &= m; + tx &= ~m; + + /* + * Only one of tx, rn or rp (at most) can be non-zero at this + * point. + */ + return tx | rn | rp; +} + +static inline int64_t +fpr_floor(fpr x) { + int64_t r; + + /* + * The cast performs a trunc() (rounding toward 0) and thus is + * wrong by 1 for most negative values. The correction below is + * constant-time as long as the compiler turns the + * floating-point conversion result into a 0/1 integer without a + * conditional branch or another non-constant-time construction. + * This should hold on all modern architectures with an FPU (and + * if it is false on a given arch, then chances are that the FPU + * itself is not constant-time, making the point moot). + */ + r = (int64_t)x.v; + return r - (x.v < (double)r); +} + +static inline int64_t +fpr_trunc(fpr x) { + return (int64_t)x.v; +} + +static inline fpr +fpr_add(fpr x, fpr y) { + return FPR(x.v + y.v); +} + +static inline fpr +fpr_sub(fpr x, fpr y) { + return FPR(x.v - y.v); +} + +static inline fpr +fpr_neg(fpr x) { + return FPR(-x.v); +} + +static inline fpr +fpr_half(fpr x) { + return FPR(x.v * 0.5); +} + +static inline fpr +fpr_double(fpr x) { + return FPR(x.v + x.v); +} + +static inline fpr +fpr_mul(fpr x, fpr y) { + return FPR(x.v * y.v); +} + +static inline fpr +fpr_sqr(fpr x) { + return FPR(x.v * x.v); +} + +static inline fpr +fpr_inv(fpr x) { + return FPR(1.0 / x.v); +} + +static inline fpr +fpr_div(fpr x, fpr y) { + return FPR(x.v / y.v); +} + +static inline void +fpr_sqrt_avx2(double *t) { + __m128d x; + + x = _mm_load1_pd(t); + x = _mm_sqrt_pd(x); + _mm_storel_pd(t, x); +} + +static inline fpr +fpr_sqrt(fpr x) { + /* + * We prefer not to have a dependency on libm when it can be + * avoided. On x86, calling the sqrt() libm function inlines + * the relevant opcode (fsqrt or sqrtsd, depending on whether + * the 387 FPU or SSE2 is used for floating-point operations) + * but then makes an optional call to the library function + * for proper error handling, in case the operand is negative. + * + * To avoid this dependency, we use intrinsics or inline assembly + * on recognized platforms: + * + * - If AVX2 is explicitly enabled, then we use SSE2 intrinsics. + * + * - On GCC/Clang with SSE maths, we use SSE2 intrinsics. + * + * - On GCC/Clang on i386, or MSVC on i386, we use inline assembly + * to call the 387 FPU fsqrt opcode. + * + * - On GCC/Clang/XLC on PowerPC, we use inline assembly to call + * the fsqrt opcode (Clang needs a special hack). + * + * - On GCC/Clang on ARM with hardware floating-point, we use + * inline assembly to call the vqsrt.f64 opcode. Due to a + * complex ecosystem of compilers and assembly syntaxes, we + * have to call it "fsqrt" or "fsqrtd", depending on case. + * + * If the platform is not recognized, a call to the system + * library function sqrt() is performed. On some compilers, this + * may actually inline the relevant opcode, and call the library + * function only when the input is invalid (e.g. negative); + * Falcon never actually calls sqrt() on a negative value, but + * the dependency to libm will still be there. + */ + + fpr_sqrt_avx2(&x.v); + return x; +} + +static inline int +fpr_lt(fpr x, fpr y) { + return x.v < y.v; +} + +static inline uint64_t +fpr_expm_p63(fpr x, fpr ccs) { + /* + * Polynomial approximation of exp(-x) is taken from FACCT: + * https://eprint.iacr.org/2018/1234 + * Specifically, values are extracted from the implementation + * referenced from the FACCT article, and available at: + * https://github.com/raykzhao/gaussian + * Tests over more than 24 billions of random inputs in the + * 0..log(2) range have never shown a deviation larger than + * 2^(-50) from the true mathematical value. + */ + + + /* + * AVX2 implementation uses more operations than Horner's method, + * but with a lower expression tree depth. This helps because + * additions and multiplications have a latency of 4 cycles on + * a Skylake, but the CPU can issue two of them per cycle. + */ + + static const union { + double d[12]; + __m256d v[3]; + } c = { + { + 0.999999999999994892974086724280, + 0.500000000000019206858326015208, + 0.166666666666984014666397229121, + 0.041666666666110491190622155955, + 0.008333333327800835146903501993, + 0.001388888894063186997887560103, + 0.000198412739277311890541063977, + 0.000024801566833585381209939524, + 0.000002755586350219122514855659, + 0.000000275607356160477811864927, + 0.000000025299506379442070029551, + 0.000000002073772366009083061987 + } + }; + + double d1, d2, d4, d8, y; + __m256d d14, d58, d9c; + + d1 = -x.v; + d2 = d1 * d1; + d4 = d2 * d2; + d8 = d4 * d4; + d14 = _mm256_set_pd(d4, d2 * d1, d2, d1); + d58 = _mm256_mul_pd(d14, _mm256_set1_pd(d4)); + d9c = _mm256_mul_pd(d14, _mm256_set1_pd(d8)); + d14 = _mm256_mul_pd(d14, _mm256_loadu_pd(&c.d[0])); + d58 = FMADD(d58, _mm256_loadu_pd(&c.d[4]), d14); + d9c = FMADD(d9c, _mm256_loadu_pd(&c.d[8]), d58); + d9c = _mm256_hadd_pd(d9c, d9c); + y = 1.0 + _mm_cvtsd_f64(_mm256_castpd256_pd128(d9c)) // _mm256_cvtsd_f64(d9c) + + _mm_cvtsd_f64(_mm256_extractf128_pd(d9c, 1)); + y *= ccs.v; + + /* + * Final conversion goes through int64_t first, because that's what + * the underlying opcode (vcvttsd2si) will do, and we know that the + * result will fit, since x >= 0 and ccs < 1. If we did the + * conversion directly to uint64_t, then the compiler would add some + * extra code to cover the case of a source value of 2^63 or more, + * and though the alternate path would never be exercised, the + * extra comparison would cost us some cycles. + */ + return (uint64_t)(int64_t)(y * fpr_ptwo63.v); + +} + +#define fpr_gm_tab PQCLEAN_FALCON512_AVX2_fpr_gm_tab +extern const fpr fpr_gm_tab[]; + +#define fpr_p2_tab PQCLEAN_FALCON512_AVX2_fpr_p2_tab +extern const fpr fpr_p2_tab[]; + +/* ====================================================================== */ +#endif diff --git a/crypto_sign/falcon/falcon-512/avx2/inner.h b/crypto_sign/falcon/falcon-512/avx2/inner.h new file mode 100644 index 00000000..22c34564 --- /dev/null +++ b/crypto_sign/falcon/falcon-512/avx2/inner.h @@ -0,0 +1,826 @@ +#ifndef PQCLEAN_FALCON512_AVX2_INNER_H +#define PQCLEAN_FALCON512_AVX2_INNER_H + + +/* + * Internal functions for Falcon. This is not the API intended to be + * used by applications; instead, this internal API provides all the + * primitives on which wrappers build to provide external APIs. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +/* + * IMPORTANT API RULES + * ------------------- + * + * This API has some non-trivial usage rules: + * + * + * - All public functions (i.e. the non-static ones) must be referenced + * with the PQCLEAN_FALCON512_AVX2_ macro (e.g. PQCLEAN_FALCON512_AVX2_verify_raw for the verify_raw() + * function). That macro adds a prefix to the name, which is + * configurable with the FALCON_PREFIX macro. This allows compiling + * the code into a specific "namespace" and potentially including + * several versions of this code into a single application (e.g. to + * have an AVX2 and a non-AVX2 variants and select the one to use at + * runtime based on availability of AVX2 opcodes). + * + * - Functions that need temporary buffers expects them as a final + * tmp[] array of type uint8_t*, with a size which is documented for + * each function. However, most have some alignment requirements, + * because they will use the array to store 16-bit, 32-bit or 64-bit + * values (e.g. uint64_t or double). The caller must ensure proper + * alignment. What happens on unaligned access depends on the + * underlying architecture, ranging from a slight time penalty + * to immediate termination of the process. + * + * - Some functions rely on specific rounding rules and precision for + * floating-point numbers. On some systems (in particular 32-bit x86 + * with the 387 FPU), this requires setting an hardware control + * word. The caller MUST use set_fpu_cw() to ensure proper precision: + * + * oldcw = set_fpu_cw(2); + * PQCLEAN_FALCON512_AVX2_sign_dyn(...); + * set_fpu_cw(oldcw); + * + * On systems where the native floating-point precision is already + * proper, or integer-based emulation is used, the set_fpu_cw() + * function does nothing, so it can be called systematically. + */ +#include "fips202.h" +#include "fpr.h" +#include +#include +#include + +/* + * Some computations with floating-point elements, in particular + * rounding to the nearest integer, rely on operations using _exactly_ + * the precision of IEEE-754 binary64 type (i.e. 52 bits). On 32-bit + * x86, the 387 FPU may be used (depending on the target OS) and, in + * that case, may use more precision bits (i.e. 64 bits, for an 80-bit + * total type length); to prevent miscomputations, we define an explicit + * function that modifies the precision in the FPU control word. + * + * set_fpu_cw() sets the precision to the provided value, and returns + * the previously set precision; callers are supposed to restore the + * previous precision on exit. The correct (52-bit) precision is + * configured with the value "2". On unsupported compilers, or on + * targets other than 32-bit x86, or when the native 'double' type is + * not used, the set_fpu_cw() function does nothing at all. + */ +static inline unsigned +set_fpu_cw(unsigned x) { + return x; +} + + + + +/* ==================================================================== */ +/* + * SHAKE256 implementation (shake.c). + * + * API is defined to be easily replaced with the fips202.h API defined + * as part of PQClean. + */ + + + +#define inner_shake256_context shake256incctx +#define inner_shake256_init(sc) shake256_inc_init(sc) +#define inner_shake256_inject(sc, in, len) shake256_inc_absorb(sc, in, len) +#define inner_shake256_flip(sc) shake256_inc_finalize(sc) +#define inner_shake256_extract(sc, out, len) shake256_inc_squeeze(out, len, sc) +#define inner_shake256_ctx_release(sc) shake256_inc_ctx_release(sc) + + +/* ==================================================================== */ +/* + * Encoding/decoding functions (codec.c). + * + * Encoding functions take as parameters an output buffer (out) with + * a given maximum length (max_out_len); returned value is the actual + * number of bytes which have been written. If the output buffer is + * not large enough, then 0 is returned (some bytes may have been + * written to the buffer). If 'out' is NULL, then 'max_out_len' is + * ignored; instead, the function computes and returns the actual + * required output length (in bytes). + * + * Decoding functions take as parameters an input buffer (in) with + * its maximum length (max_in_len); returned value is the actual number + * of bytes that have been read from the buffer. If the provided length + * is too short, then 0 is returned. + * + * Values to encode or decode are vectors of integers, with N = 2^logn + * elements. + * + * Three encoding formats are defined: + * + * - modq: sequence of values modulo 12289, each encoded over exactly + * 14 bits. The encoder and decoder verify that integers are within + * the valid range (0..12288). Values are arrays of uint16. + * + * - trim: sequence of signed integers, a specified number of bits + * each. The number of bits is provided as parameter and includes + * the sign bit. Each integer x must be such that |x| < 2^(bits-1) + * (which means that the -2^(bits-1) value is forbidden); encode and + * decode functions check that property. Values are arrays of + * int16_t or int8_t, corresponding to names 'trim_i16' and + * 'trim_i8', respectively. + * + * - comp: variable-length encoding for signed integers; each integer + * uses a minimum of 9 bits, possibly more. This is normally used + * only for signatures. + * + */ + +size_t PQCLEAN_FALCON512_AVX2_modq_encode(void *out, size_t max_out_len, + const uint16_t *x, unsigned logn); +size_t PQCLEAN_FALCON512_AVX2_trim_i16_encode(void *out, size_t max_out_len, + const int16_t *x, unsigned logn, unsigned bits); +size_t PQCLEAN_FALCON512_AVX2_trim_i8_encode(void *out, size_t max_out_len, + const int8_t *x, unsigned logn, unsigned bits); +size_t PQCLEAN_FALCON512_AVX2_comp_encode(void *out, size_t max_out_len, + const int16_t *x, unsigned logn); + +size_t PQCLEAN_FALCON512_AVX2_modq_decode(uint16_t *x, unsigned logn, + const void *in, size_t max_in_len); +size_t PQCLEAN_FALCON512_AVX2_trim_i16_decode(int16_t *x, unsigned logn, unsigned bits, + const void *in, size_t max_in_len); +size_t PQCLEAN_FALCON512_AVX2_trim_i8_decode(int8_t *x, unsigned logn, unsigned bits, + const void *in, size_t max_in_len); +size_t PQCLEAN_FALCON512_AVX2_comp_decode(int16_t *x, unsigned logn, + const void *in, size_t max_in_len); + +/* + * Number of bits for key elements, indexed by logn (1 to 10). This + * is at most 8 bits for all degrees, but some degrees may have shorter + * elements. + */ +extern const uint8_t PQCLEAN_FALCON512_AVX2_max_fg_bits[]; +extern const uint8_t PQCLEAN_FALCON512_AVX2_max_FG_bits[]; + +/* + * Maximum size, in bits, of elements in a signature, indexed by logn + * (1 to 10). The size includes the sign bit. + */ +extern const uint8_t PQCLEAN_FALCON512_AVX2_max_sig_bits[]; + +/* ==================================================================== */ +/* + * Support functions used for both signature generation and signature + * verification (common.c). + */ + +/* + * From a SHAKE256 context (must be already flipped), produce a new + * point. This is the non-constant-time version, which may leak enough + * information to serve as a stop condition on a brute force attack on + * the hashed message (provided that the nonce value is known). + */ +void PQCLEAN_FALCON512_AVX2_hash_to_point_vartime(inner_shake256_context *sc, + uint16_t *x, unsigned logn); + +/* + * From a SHAKE256 context (must be already flipped), produce a new + * point. The temporary buffer (tmp) must have room for 2*2^logn bytes. + * This function is constant-time but is typically more expensive than + * PQCLEAN_FALCON512_AVX2_hash_to_point_vartime(). + * + * tmp[] must have 16-bit alignment. + */ +void PQCLEAN_FALCON512_AVX2_hash_to_point_ct(inner_shake256_context *sc, + uint16_t *x, unsigned logn, uint8_t *tmp); + +/* + * Tell whether a given vector (2N coordinates, in two halves) is + * acceptable as a signature. This compares the appropriate norm of the + * vector with the acceptance bound. Returned value is 1 on success + * (vector is short enough to be acceptable), 0 otherwise. + */ +int PQCLEAN_FALCON512_AVX2_is_short(const int16_t *s1, const int16_t *s2, unsigned logn); + +/* + * Tell whether a given vector (2N coordinates, in two halves) is + * acceptable as a signature. Instead of the first half s1, this + * function receives the "saturated squared norm" of s1, i.e. the + * sum of the squares of the coordinates of s1 (saturated at 2^32-1 + * if the sum exceeds 2^31-1). + * + * Returned value is 1 on success (vector is short enough to be + * acceptable), 0 otherwise. + */ +int PQCLEAN_FALCON512_AVX2_is_short_half(uint32_t sqn, const int16_t *s2, unsigned logn); + +/* ==================================================================== */ +/* + * Signature verification functions (vrfy.c). + */ + +/* + * Convert a public key to NTT + Montgomery format. Conversion is done + * in place. + */ +void PQCLEAN_FALCON512_AVX2_to_ntt_monty(uint16_t *h, unsigned logn); + +/* + * Internal signature verification code: + * c0[] contains the hashed nonce+message + * s2[] is the decoded signature + * h[] contains the public key, in NTT + Montgomery format + * logn is the degree log + * tmp[] temporary, must have at least 2*2^logn bytes + * Returned value is 1 on success, 0 on error. + * + * tmp[] must have 16-bit alignment. + */ +int PQCLEAN_FALCON512_AVX2_verify_raw(const uint16_t *c0, const int16_t *s2, + const uint16_t *h, unsigned logn, uint8_t *tmp); + +/* + * Compute the public key h[], given the private key elements f[] and + * g[]. This computes h = g/f mod phi mod q, where phi is the polynomial + * modulus. This function returns 1 on success, 0 on error (an error is + * reported if f is not invertible mod phi mod q). + * + * The tmp[] array must have room for at least 2*2^logn elements. + * tmp[] must have 16-bit alignment. + */ +int PQCLEAN_FALCON512_AVX2_compute_public(uint16_t *h, + const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp); + +/* + * Recompute the fourth private key element. Private key consists in + * four polynomials with small coefficients f, g, F and G, which are + * such that fG - gF = q mod phi; furthermore, f is invertible modulo + * phi and modulo q. This function recomputes G from f, g and F. + * + * The tmp[] array must have room for at least 4*2^logn bytes. + * + * Returned value is 1 in success, 0 on error (f not invertible). + * tmp[] must have 16-bit alignment. + */ +int PQCLEAN_FALCON512_AVX2_complete_private(int8_t *G, + const int8_t *f, const int8_t *g, const int8_t *F, + unsigned logn, uint8_t *tmp); + +/* + * Test whether a given polynomial is invertible modulo phi and q. + * Polynomial coefficients are small integers. + * + * tmp[] must have 16-bit alignment. + */ +int PQCLEAN_FALCON512_AVX2_is_invertible( + const int16_t *s2, unsigned logn, uint8_t *tmp); + +/* + * Count the number of elements of value zero in the NTT representation + * of the given polynomial: this is the number of primitive 2n-th roots + * of unity (modulo q = 12289) that are roots of the provided polynomial + * (taken modulo q). + * + * tmp[] must have 16-bit alignment. + */ +int PQCLEAN_FALCON512_AVX2_count_nttzero(const int16_t *sig, unsigned logn, uint8_t *tmp); + +/* + * Internal signature verification with public key recovery: + * h[] receives the public key (NOT in NTT/Montgomery format) + * c0[] contains the hashed nonce+message + * s1[] is the first signature half + * s2[] is the second signature half + * logn is the degree log + * tmp[] temporary, must have at least 2*2^logn bytes + * Returned value is 1 on success, 0 on error. Success is returned if + * the signature is a short enough vector; in that case, the public + * key has been written to h[]. However, the caller must still + * verify that h[] is the correct value (e.g. with regards to a known + * hash of the public key). + * + * h[] may not overlap with any of the other arrays. + * + * tmp[] must have 16-bit alignment. + */ +int PQCLEAN_FALCON512_AVX2_verify_recover(uint16_t *h, + const uint16_t *c0, const int16_t *s1, const int16_t *s2, + unsigned logn, uint8_t *tmp); + +/* ==================================================================== */ +/* + * Implementation of floating-point real numbers (fpr.h, fpr.c). + */ + +/* + * Real numbers are implemented by an extra header file, included below. + * This is meant to support pluggable implementations. The default + * implementation relies on the C type 'double'. + * + * The included file must define the following types, functions and + * constants: + * + * fpr + * type for a real number + * + * fpr fpr_of(int64_t i) + * cast an integer into a real number; source must be in the + * -(2^63-1)..+(2^63-1) range + * + * fpr fpr_scaled(int64_t i, int sc) + * compute i*2^sc as a real number; source 'i' must be in the + * -(2^63-1)..+(2^63-1) range + * + * fpr fpr_ldexp(fpr x, int e) + * compute x*2^e + * + * int64_t fpr_rint(fpr x) + * round x to the nearest integer; x must be in the -(2^63-1) + * to +(2^63-1) range + * + * int64_t fpr_trunc(fpr x) + * round to an integer; this rounds towards zero; value must + * be in the -(2^63-1) to +(2^63-1) range + * + * fpr fpr_add(fpr x, fpr y) + * compute x + y + * + * fpr fpr_sub(fpr x, fpr y) + * compute x - y + * + * fpr fpr_neg(fpr x) + * compute -x + * + * fpr fpr_half(fpr x) + * compute x/2 + * + * fpr fpr_double(fpr x) + * compute x*2 + * + * fpr fpr_mul(fpr x, fpr y) + * compute x * y + * + * fpr fpr_sqr(fpr x) + * compute x * x + * + * fpr fpr_inv(fpr x) + * compute 1/x + * + * fpr fpr_div(fpr x, fpr y) + * compute x/y + * + * fpr fpr_sqrt(fpr x) + * compute the square root of x + * + * int fpr_lt(fpr x, fpr y) + * return 1 if x < y, 0 otherwise + * + * uint64_t fpr_expm_p63(fpr x) + * return exp(x), assuming that 0 <= x < log(2). Returned value + * is scaled to 63 bits (i.e. it really returns 2^63*exp(-x), + * rounded to the nearest integer). Computation should have a + * precision of at least 45 bits. + * + * const fpr fpr_gm_tab[] + * array of constants for FFT / iFFT + * + * const fpr fpr_p2_tab[] + * precomputed powers of 2 (by index, 0 to 10) + * + * Constants of type 'fpr': + * + * fpr fpr_q 12289 + * fpr fpr_inverse_of_q 1/12289 + * fpr fpr_inv_2sqrsigma0 1/(2*(1.8205^2)) + * fpr fpr_inv_sigma 1/(1.55*sqrt(12289)) + * fpr fpr_sigma_min_9 1.291500756233514568549480827642 + * fpr fpr_sigma_min_10 1.311734375905083682667395805765 + * fpr fpr_log2 log(2) + * fpr fpr_inv_log2 1/log(2) + * fpr fpr_bnorm_max 16822.4121 + * fpr fpr_zero 0 + * fpr fpr_one 1 + * fpr fpr_two 2 + * fpr fpr_onehalf 0.5 + * fpr fpr_ptwo31 2^31 + * fpr fpr_ptwo31m1 2^31-1 + * fpr fpr_mtwo31m1 -(2^31-1) + * fpr fpr_ptwo63m1 2^63-1 + * fpr fpr_mtwo63m1 -(2^63-1) + * fpr fpr_ptwo63 2^63 + */ + +/* ==================================================================== */ +/* + * RNG (rng.c). + * + * A PRNG based on ChaCha20 is implemented; it is seeded from a SHAKE256 + * context (flipped) and is used for bulk pseudorandom generation. + * A system-dependent seed generator is also provided. + */ + +/* + * Obtain a random seed from the system RNG. + * + * Returned value is 1 on success, 0 on error. + */ +int PQCLEAN_FALCON512_AVX2_get_seed(void *seed, size_t seed_len); + +/* + * Structure for a PRNG. This includes a large buffer so that values + * get generated in advance. The 'state' is used to keep the current + * PRNG algorithm state (contents depend on the selected algorithm). + * + * The unions with 'dummy_u64' are there to ensure proper alignment for + * 64-bit direct access. + */ +typedef struct { + union { + uint8_t d[512]; /* MUST be 512, exactly */ + uint64_t dummy_u64; + } buf; + size_t ptr; + union { + uint8_t d[256]; + uint64_t dummy_u64; + } state; + int type; +} prng; + +/* + * Instantiate a PRNG. That PRNG will feed over the provided SHAKE256 + * context (in "flipped" state) to obtain its initial state. + */ +void PQCLEAN_FALCON512_AVX2_prng_init(prng *p, inner_shake256_context *src); + +/* + * Refill the PRNG buffer. This is normally invoked automatically, and + * is declared here only so that prng_get_u64() may be inlined. + */ +void PQCLEAN_FALCON512_AVX2_prng_refill(prng *p); + +/* + * Get some bytes from a PRNG. + */ +void PQCLEAN_FALCON512_AVX2_prng_get_bytes(prng *p, void *dst, size_t len); + +/* + * Get a 64-bit random value from a PRNG. + */ +static inline uint64_t +prng_get_u64(prng *p) { + size_t u; + + /* + * If there are less than 9 bytes in the buffer, we refill it. + * This means that we may drop the last few bytes, but this allows + * for faster extraction code. Also, it means that we never leave + * an empty buffer. + */ + u = p->ptr; + if (u >= (sizeof p->buf.d) - 9) { + PQCLEAN_FALCON512_AVX2_prng_refill(p); + u = 0; + } + p->ptr = u + 8; + + return (uint64_t)p->buf.d[u + 0] + | ((uint64_t)p->buf.d[u + 1] << 8) + | ((uint64_t)p->buf.d[u + 2] << 16) + | ((uint64_t)p->buf.d[u + 3] << 24) + | ((uint64_t)p->buf.d[u + 4] << 32) + | ((uint64_t)p->buf.d[u + 5] << 40) + | ((uint64_t)p->buf.d[u + 6] << 48) + | ((uint64_t)p->buf.d[u + 7] << 56); +} + +/* + * Get an 8-bit random value from a PRNG. + */ +static inline unsigned +prng_get_u8(prng *p) { + unsigned v; + + v = p->buf.d[p->ptr ++]; + if (p->ptr == sizeof p->buf.d) { + PQCLEAN_FALCON512_AVX2_prng_refill(p); + } + return v; +} + +/* ==================================================================== */ +/* + * FFT (falcon-fft.c). + * + * A real polynomial is represented as an array of N 'fpr' elements. + * The FFT representation of a real polynomial contains N/2 complex + * elements; each is stored as two real numbers, for the real and + * imaginary parts, respectively. See falcon-fft.c for details on the + * internal representation. + */ + +/* + * Compute FFT in-place: the source array should contain a real + * polynomial (N coefficients); its storage area is reused to store + * the FFT representation of that polynomial (N/2 complex numbers). + * + * 'logn' MUST lie between 1 and 10 (inclusive). + */ +void PQCLEAN_FALCON512_AVX2_FFT(fpr *f, unsigned logn); + +/* + * Compute the inverse FFT in-place: the source array should contain the + * FFT representation of a real polynomial (N/2 elements); the resulting + * real polynomial (N coefficients of type 'fpr') is written over the + * array. + * + * 'logn' MUST lie between 1 and 10 (inclusive). + */ +void PQCLEAN_FALCON512_AVX2_iFFT(fpr *f, unsigned logn); + +/* + * Add polynomial b to polynomial a. a and b MUST NOT overlap. This + * function works in both normal and FFT representations. + */ +void PQCLEAN_FALCON512_AVX2_poly_add(fpr *a, const fpr *b, unsigned logn); + +/* + * Subtract polynomial b from polynomial a. a and b MUST NOT overlap. This + * function works in both normal and FFT representations. + */ +void PQCLEAN_FALCON512_AVX2_poly_sub(fpr *a, const fpr *b, unsigned logn); + +/* + * Negate polynomial a. This function works in both normal and FFT + * representations. + */ +void PQCLEAN_FALCON512_AVX2_poly_neg(fpr *a, unsigned logn); + +/* + * Compute adjoint of polynomial a. This function works only in FFT + * representation. + */ +void PQCLEAN_FALCON512_AVX2_poly_adj_fft(fpr *a, unsigned logn); + +/* + * Multiply polynomial a with polynomial b. a and b MUST NOT overlap. + * This function works only in FFT representation. + */ +void PQCLEAN_FALCON512_AVX2_poly_mul_fft(fpr *a, const fpr *b, unsigned logn); + +/* + * Multiply polynomial a with the adjoint of polynomial b. a and b MUST NOT + * overlap. This function works only in FFT representation. + */ +void PQCLEAN_FALCON512_AVX2_poly_muladj_fft(fpr *a, const fpr *b, unsigned logn); + +/* + * Multiply polynomial with its own adjoint. This function works only in FFT + * representation. + */ +void PQCLEAN_FALCON512_AVX2_poly_mulselfadj_fft(fpr *a, unsigned logn); + +/* + * Multiply polynomial with a real constant. This function works in both + * normal and FFT representations. + */ +void PQCLEAN_FALCON512_AVX2_poly_mulconst(fpr *a, fpr x, unsigned logn); + +/* + * Divide polynomial a by polynomial b, modulo X^N+1 (FFT representation). + * a and b MUST NOT overlap. + */ +void PQCLEAN_FALCON512_AVX2_poly_div_fft(fpr *a, const fpr *b, unsigned logn); + +/* + * Given f and g (in FFT representation), compute 1/(f*adj(f)+g*adj(g)) + * (also in FFT representation). Since the result is auto-adjoint, all its + * coordinates in FFT representation are real; as such, only the first N/2 + * values of d[] are filled (the imaginary parts are skipped). + * + * Array d MUST NOT overlap with either a or b. + */ +void PQCLEAN_FALCON512_AVX2_poly_invnorm2_fft(fpr *d, + const fpr *a, const fpr *b, unsigned logn); + +/* + * Given F, G, f and g (in FFT representation), compute F*adj(f)+G*adj(g) + * (also in FFT representation). Destination d MUST NOT overlap with + * any of the source arrays. + */ +void PQCLEAN_FALCON512_AVX2_poly_add_muladj_fft(fpr *d, + const fpr *F, const fpr *G, + const fpr *f, const fpr *g, unsigned logn); + +/* + * Multiply polynomial a by polynomial b, where b is autoadjoint. Both + * a and b are in FFT representation. Since b is autoadjoint, all its + * FFT coefficients are real, and the array b contains only N/2 elements. + * a and b MUST NOT overlap. + */ +void PQCLEAN_FALCON512_AVX2_poly_mul_autoadj_fft(fpr *a, + const fpr *b, unsigned logn); + +/* + * Divide polynomial a by polynomial b, where b is autoadjoint. Both + * a and b are in FFT representation. Since b is autoadjoint, all its + * FFT coefficients are real, and the array b contains only N/2 elements. + * a and b MUST NOT overlap. + */ +void PQCLEAN_FALCON512_AVX2_poly_div_autoadj_fft(fpr *a, + const fpr *b, unsigned logn); + +/* + * Perform an LDL decomposition of an auto-adjoint matrix G, in FFT + * representation. On input, g00, g01 and g11 are provided (where the + * matrix G = [[g00, g01], [adj(g01), g11]]). On output, the d00, l10 + * and d11 values are written in g00, g01 and g11, respectively + * (with D = [[d00, 0], [0, d11]] and L = [[1, 0], [l10, 1]]). + * (In fact, d00 = g00, so the g00 operand is left unmodified.) + */ +void PQCLEAN_FALCON512_AVX2_poly_LDL_fft(const fpr *g00, + fpr *g01, fpr *g11, unsigned logn); + +/* + * Perform an LDL decomposition of an auto-adjoint matrix G, in FFT + * representation. This is identical to poly_LDL_fft() except that + * g00, g01 and g11 are unmodified; the outputs d11 and l10 are written + * in two other separate buffers provided as extra parameters. + */ +void PQCLEAN_FALCON512_AVX2_poly_LDLmv_fft(fpr *d11, fpr *l10, + const fpr *g00, const fpr *g01, + const fpr *g11, unsigned logn); + +/* + * Apply "split" operation on a polynomial in FFT representation: + * f = f0(x^2) + x*f1(x^2), for half-size polynomials f0 and f1 + * (polynomials modulo X^(N/2)+1). f0, f1 and f MUST NOT overlap. + */ +void PQCLEAN_FALCON512_AVX2_poly_split_fft(fpr *f0, fpr *f1, + const fpr *f, unsigned logn); + +/* + * Apply "merge" operation on two polynomials in FFT representation: + * given f0 and f1, polynomials moduo X^(N/2)+1, this function computes + * f = f0(x^2) + x*f1(x^2), in FFT representation modulo X^N+1. + * f MUST NOT overlap with either f0 or f1. + */ +void PQCLEAN_FALCON512_AVX2_poly_merge_fft(fpr *f, + const fpr *f0, const fpr *f1, unsigned logn); + +/* ==================================================================== */ +/* + * Key pair generation. + */ + +/* + * Required sizes of the temporary buffer (in bytes). + * + * This size is 28*2^logn bytes, except for degrees 2 and 4 (logn = 1 + * or 2) where it is slightly greater. + */ +#define FALCON_KEYGEN_TEMP_1 136 +#define FALCON_KEYGEN_TEMP_2 272 +#define FALCON_KEYGEN_TEMP_3 224 +#define FALCON_KEYGEN_TEMP_4 448 +#define FALCON_KEYGEN_TEMP_5 896 +#define FALCON_KEYGEN_TEMP_6 1792 +#define FALCON_KEYGEN_TEMP_7 3584 +#define FALCON_KEYGEN_TEMP_8 7168 +#define FALCON_KEYGEN_TEMP_9 14336 +#define FALCON_KEYGEN_TEMP_10 28672 + +/* + * Generate a new key pair. Randomness is extracted from the provided + * SHAKE256 context, which must have already been seeded and flipped. + * The tmp[] array must have suitable size (see FALCON_KEYGEN_TEMP_* + * macros) and be aligned for the uint32_t, uint64_t and fpr types. + * + * The private key elements are written in f, g, F and G, and the + * public key is written in h. Either or both of G and h may be NULL, + * in which case the corresponding element is not returned (they can + * be recomputed from f, g and F). + * + * tmp[] must have 64-bit alignment. + * This function uses floating-point rounding (see set_fpu_cw()). + */ +void PQCLEAN_FALCON512_AVX2_keygen(inner_shake256_context *rng, + int8_t *f, int8_t *g, int8_t *F, int8_t *G, uint16_t *h, + unsigned logn, uint8_t *tmp); + +/* ==================================================================== */ +/* + * Signature generation. + */ + +/* + * Expand a private key into the B0 matrix in FFT representation and + * the LDL tree. All the values are written in 'expanded_key', for + * a total of (8*logn+40)*2^logn bytes. + * + * The tmp[] array must have room for at least 48*2^logn bytes. + * + * tmp[] must have 64-bit alignment. + * This function uses floating-point rounding (see set_fpu_cw()). + */ +void PQCLEAN_FALCON512_AVX2_expand_privkey(fpr *expanded_key, + const int8_t *f, const int8_t *g, const int8_t *F, const int8_t *G, + unsigned logn, uint8_t *tmp); + +/* + * Compute a signature over the provided hashed message (hm); the + * signature value is one short vector. This function uses an + * expanded key (as generated by PQCLEAN_FALCON512_AVX2_expand_privkey()). + * + * The sig[] and hm[] buffers may overlap. + * + * On successful output, the start of the tmp[] buffer contains the s1 + * vector (as int16_t elements). + * + * The minimal size (in bytes) of tmp[] is 48*2^logn bytes. + * + * tmp[] must have 64-bit alignment. + * This function uses floating-point rounding (see set_fpu_cw()). + */ +void PQCLEAN_FALCON512_AVX2_sign_tree(int16_t *sig, inner_shake256_context *rng, + const fpr *expanded_key, + const uint16_t *hm, unsigned logn, uint8_t *tmp); + +/* + * Compute a signature over the provided hashed message (hm); the + * signature value is one short vector. This function uses a raw + * key and dynamically recompute the B0 matrix and LDL tree; this + * saves RAM since there is no needed for an expanded key, but + * increases the signature cost. + * + * The sig[] and hm[] buffers may overlap. + * + * On successful output, the start of the tmp[] buffer contains the s1 + * vector (as int16_t elements). + * + * The minimal size (in bytes) of tmp[] is 72*2^logn bytes. + * + * tmp[] must have 64-bit alignment. + * This function uses floating-point rounding (see set_fpu_cw()). + */ +void PQCLEAN_FALCON512_AVX2_sign_dyn(int16_t *sig, inner_shake256_context *rng, + const int8_t *f, const int8_t *g, + const int8_t *F, const int8_t *G, + const uint16_t *hm, unsigned logn, uint8_t *tmp); + +/* + * Internal sampler engine. Exported for tests. + * + * sampler_context wraps around a source of random numbers (PRNG) and + * the sigma_min value (nominally dependent on the degree). + * + * sampler() takes as parameters: + * ctx pointer to the sampler_context structure + * mu center for the distribution + * isigma inverse of the distribution standard deviation + * It returns an integer sampled along the Gaussian distribution centered + * on mu and of standard deviation sigma = 1/isigma. + * + * gaussian0_sampler() takes as parameter a pointer to a PRNG, and + * returns an integer sampled along a half-Gaussian with standard + * deviation sigma0 = 1.8205 (center is 0, returned value is + * nonnegative). + */ + +typedef struct { + prng p; + fpr sigma_min; +} sampler_context; + +int PQCLEAN_FALCON512_AVX2_sampler(void *ctx, fpr mu, fpr isigma); + +int PQCLEAN_FALCON512_AVX2_gaussian0_sampler(prng *p); + +/* ==================================================================== */ + +#endif diff --git a/crypto_sign/falcon/falcon-512/avx2/keygen.c b/crypto_sign/falcon/falcon-512/avx2/keygen.c new file mode 100644 index 00000000..818755f8 --- /dev/null +++ b/crypto_sign/falcon/falcon-512/avx2/keygen.c @@ -0,0 +1,4231 @@ +#include "inner.h" + +/* + * Falcon key pair generation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +#define MKN(logn) ((size_t)1 << (logn)) + +/* ==================================================================== */ +/* + * Modular arithmetics. + * + * We implement a few functions for computing modulo a small integer p. + * + * All functions require that 2^30 < p < 2^31. Moreover, operands must + * be in the 0..p-1 range. + * + * Modular addition and subtraction work for all such p. + * + * Montgomery multiplication requires that p is odd, and must be provided + * with an additional value p0i = -1/p mod 2^31. See below for some basics + * on Montgomery multiplication. + * + * Division computes an inverse modulo p by an exponentiation (with + * exponent p-2): this works only if p is prime. Multiplication + * requirements also apply, i.e. p must be odd and p0i must be provided. + * + * The NTT and inverse NTT need all of the above, and also that + * p = 1 mod 2048. + * + * ----------------------------------------------------------------------- + * + * We use Montgomery representation with 31-bit values: + * + * Let R = 2^31 mod p. When 2^30 < p < 2^31, R = 2^31 - p. + * Montgomery representation of an integer x modulo p is x*R mod p. + * + * Montgomery multiplication computes (x*y)/R mod p for + * operands x and y. Therefore: + * + * - if operands are x*R and y*R (Montgomery representations of x and + * y), then Montgomery multiplication computes (x*R*y*R)/R = (x*y)*R + * mod p, which is the Montgomery representation of the product x*y; + * + * - if operands are x*R and y (or x and y*R), then Montgomery + * multiplication returns x*y mod p: mixed-representation + * multiplications yield results in normal representation. + * + * To convert to Montgomery representation, we multiply by R, which is done + * by Montgomery-multiplying by R^2. Stand-alone conversion back from + * Montgomery representation is Montgomery-multiplication by 1. + */ + +/* + * Precomputed small primes. Each element contains the following: + * + * p The prime itself. + * + * g A primitive root of phi = X^N+1 (in field Z_p). + * + * s The inverse of the product of all previous primes in the array, + * computed modulo p and in Montgomery representation. + * + * All primes are such that p = 1 mod 2048, and are lower than 2^31. They + * are listed in decreasing order. + */ + +typedef struct { + uint32_t p; + uint32_t g; + uint32_t s; +} small_prime; + +static const small_prime PRIMES[] = { + { 2147473409, 383167813, 10239 }, + { 2147389441, 211808905, 471403745 }, + { 2147387393, 37672282, 1329335065 }, + { 2147377153, 1977035326, 968223422 }, + { 2147358721, 1067163706, 132460015 }, + { 2147352577, 1606082042, 598693809 }, + { 2147346433, 2033915641, 1056257184 }, + { 2147338241, 1653770625, 421286710 }, + { 2147309569, 631200819, 1111201074 }, + { 2147297281, 2038364663, 1042003613 }, + { 2147295233, 1962540515, 19440033 }, + { 2147239937, 2100082663, 353296760 }, + { 2147235841, 1991153006, 1703918027 }, + { 2147217409, 516405114, 1258919613 }, + { 2147205121, 409347988, 1089726929 }, + { 2147196929, 927788991, 1946238668 }, + { 2147178497, 1136922411, 1347028164 }, + { 2147100673, 868626236, 701164723 }, + { 2147082241, 1897279176, 617820870 }, + { 2147074049, 1888819123, 158382189 }, + { 2147051521, 25006327, 522758543 }, + { 2147043329, 327546255, 37227845 }, + { 2147039233, 766324424, 1133356428 }, + { 2146988033, 1862817362, 73861329 }, + { 2146963457, 404622040, 653019435 }, + { 2146959361, 1936581214, 995143093 }, + { 2146938881, 1559770096, 634921513 }, + { 2146908161, 422623708, 1985060172 }, + { 2146885633, 1751189170, 298238186 }, + { 2146871297, 578919515, 291810829 }, + { 2146846721, 1114060353, 915902322 }, + { 2146834433, 2069565474, 47859524 }, + { 2146818049, 1552824584, 646281055 }, + { 2146775041, 1906267847, 1597832891 }, + { 2146756609, 1847414714, 1228090888 }, + { 2146744321, 1818792070, 1176377637 }, + { 2146738177, 1118066398, 1054971214 }, + { 2146736129, 52057278, 933422153 }, + { 2146713601, 592259376, 1406621510 }, + { 2146695169, 263161877, 1514178701 }, + { 2146656257, 685363115, 384505091 }, + { 2146650113, 927727032, 537575289 }, + { 2146646017, 52575506, 1799464037 }, + { 2146643969, 1276803876, 1348954416 }, + { 2146603009, 814028633, 1521547704 }, + { 2146572289, 1846678872, 1310832121 }, + { 2146547713, 919368090, 1019041349 }, + { 2146508801, 671847612, 38582496 }, + { 2146492417, 283911680, 532424562 }, + { 2146490369, 1780044827, 896447978 }, + { 2146459649, 327980850, 1327906900 }, + { 2146447361, 1310561493, 958645253 }, + { 2146441217, 412148926, 287271128 }, + { 2146437121, 293186449, 2009822534 }, + { 2146430977, 179034356, 1359155584 }, + { 2146418689, 1517345488, 1790248672 }, + { 2146406401, 1615820390, 1584833571 }, + { 2146404353, 826651445, 607120498 }, + { 2146379777, 3816988, 1897049071 }, + { 2146363393, 1221409784, 1986921567 }, + { 2146355201, 1388081168, 849968120 }, + { 2146336769, 1803473237, 1655544036 }, + { 2146312193, 1023484977, 273671831 }, + { 2146293761, 1074591448, 467406983 }, + { 2146283521, 831604668, 1523950494 }, + { 2146203649, 712865423, 1170834574 }, + { 2146154497, 1764991362, 1064856763 }, + { 2146142209, 627386213, 1406840151 }, + { 2146127873, 1638674429, 2088393537 }, + { 2146099201, 1516001018, 690673370 }, + { 2146093057, 1294931393, 315136610 }, + { 2146091009, 1942399533, 973539425 }, + { 2146078721, 1843461814, 2132275436 }, + { 2146060289, 1098740778, 360423481 }, + { 2146048001, 1617213232, 1951981294 }, + { 2146041857, 1805783169, 2075683489 }, + { 2146019329, 272027909, 1753219918 }, + { 2145986561, 1206530344, 2034028118 }, + { 2145976321, 1243769360, 1173377644 }, + { 2145964033, 887200839, 1281344586 }, + { 2145906689, 1651026455, 906178216 }, + { 2145875969, 1673238256, 1043521212 }, + { 2145871873, 1226591210, 1399796492 }, + { 2145841153, 1465353397, 1324527802 }, + { 2145832961, 1150638905, 554084759 }, + { 2145816577, 221601706, 427340863 }, + { 2145785857, 608896761, 316590738 }, + { 2145755137, 1712054942, 1684294304 }, + { 2145742849, 1302302867, 724873116 }, + { 2145728513, 516717693, 431671476 }, + { 2145699841, 524575579, 1619722537 }, + { 2145691649, 1925625239, 982974435 }, + { 2145687553, 463795662, 1293154300 }, + { 2145673217, 771716636, 881778029 }, + { 2145630209, 1509556977, 837364988 }, + { 2145595393, 229091856, 851648427 }, + { 2145587201, 1796903241, 635342424 }, + { 2145525761, 715310882, 1677228081 }, + { 2145495041, 1040930522, 200685896 }, + { 2145466369, 949804237, 1809146322 }, + { 2145445889, 1673903706, 95316881 }, + { 2145390593, 806941852, 1428671135 }, + { 2145372161, 1402525292, 159350694 }, + { 2145361921, 2124760298, 1589134749 }, + { 2145359873, 1217503067, 1561543010 }, + { 2145355777, 338341402, 83865711 }, + { 2145343489, 1381532164, 641430002 }, + { 2145325057, 1883895478, 1528469895 }, + { 2145318913, 1335370424, 65809740 }, + { 2145312769, 2000008042, 1919775760 }, + { 2145300481, 961450962, 1229540578 }, + { 2145282049, 910466767, 1964062701 }, + { 2145232897, 816527501, 450152063 }, + { 2145218561, 1435128058, 1794509700 }, + { 2145187841, 33505311, 1272467582 }, + { 2145181697, 269767433, 1380363849 }, + { 2145175553, 56386299, 1316870546 }, + { 2145079297, 2106880293, 1391797340 }, + { 2145021953, 1347906152, 720510798 }, + { 2145015809, 206769262, 1651459955 }, + { 2145003521, 1885513236, 1393381284 }, + { 2144960513, 1810381315, 31937275 }, + { 2144944129, 1306487838, 2019419520 }, + { 2144935937, 37304730, 1841489054 }, + { 2144894977, 1601434616, 157985831 }, + { 2144888833, 98749330, 2128592228 }, + { 2144880641, 1772327002, 2076128344 }, + { 2144864257, 1404514762, 2029969964 }, + { 2144827393, 801236594, 406627220 }, + { 2144806913, 349217443, 1501080290 }, + { 2144796673, 1542656776, 2084736519 }, + { 2144778241, 1210734884, 1746416203 }, + { 2144759809, 1146598851, 716464489 }, + { 2144757761, 286328400, 1823728177 }, + { 2144729089, 1347555695, 1836644881 }, + { 2144727041, 1795703790, 520296412 }, + { 2144696321, 1302475157, 852964281 }, + { 2144667649, 1075877614, 504992927 }, + { 2144573441, 198765808, 1617144982 }, + { 2144555009, 321528767, 155821259 }, + { 2144550913, 814139516, 1819937644 }, + { 2144536577, 571143206, 962942255 }, + { 2144524289, 1746733766, 2471321 }, + { 2144512001, 1821415077, 124190939 }, + { 2144468993, 917871546, 1260072806 }, + { 2144458753, 378417981, 1569240563 }, + { 2144421889, 175229668, 1825620763 }, + { 2144409601, 1699216963, 351648117 }, + { 2144370689, 1071885991, 958186029 }, + { 2144348161, 1763151227, 540353574 }, + { 2144335873, 1060214804, 919598847 }, + { 2144329729, 663515846, 1448552668 }, + { 2144327681, 1057776305, 590222840 }, + { 2144309249, 1705149168, 1459294624 }, + { 2144296961, 325823721, 1649016934 }, + { 2144290817, 738775789, 447427206 }, + { 2144243713, 962347618, 893050215 }, + { 2144237569, 1655257077, 900860862 }, + { 2144161793, 242206694, 1567868672 }, + { 2144155649, 769415308, 1247993134 }, + { 2144137217, 320492023, 515841070 }, + { 2144120833, 1639388522, 770877302 }, + { 2144071681, 1761785233, 964296120 }, + { 2144065537, 419817825, 204564472 }, + { 2144028673, 666050597, 2091019760 }, + { 2144010241, 1413657615, 1518702610 }, + { 2143952897, 1238327946, 475672271 }, + { 2143940609, 307063413, 1176750846 }, + { 2143918081, 2062905559, 786785803 }, + { 2143899649, 1338112849, 1562292083 }, + { 2143891457, 68149545, 87166451 }, + { 2143885313, 921750778, 394460854 }, + { 2143854593, 719766593, 133877196 }, + { 2143836161, 1149399850, 1861591875 }, + { 2143762433, 1848739366, 1335934145 }, + { 2143756289, 1326674710, 102999236 }, + { 2143713281, 808061791, 1156900308 }, + { 2143690753, 388399459, 1926468019 }, + { 2143670273, 1427891374, 1756689401 }, + { 2143666177, 1912173949, 986629565 }, + { 2143645697, 2041160111, 371842865 }, + { 2143641601, 1279906897, 2023974350 }, + { 2143635457, 720473174, 1389027526 }, + { 2143621121, 1298309455, 1732632006 }, + { 2143598593, 1548762216, 1825417506 }, + { 2143567873, 620475784, 1073787233 }, + { 2143561729, 1932954575, 949167309 }, + { 2143553537, 354315656, 1652037534 }, + { 2143541249, 577424288, 1097027618 }, + { 2143531009, 357862822, 478640055 }, + { 2143522817, 2017706025, 1550531668 }, + { 2143506433, 2078127419, 1824320165 }, + { 2143488001, 613475285, 1604011510 }, + { 2143469569, 1466594987, 502095196 }, + { 2143426561, 1115430331, 1044637111 }, + { 2143383553, 9778045, 1902463734 }, + { 2143377409, 1557401276, 2056861771 }, + { 2143363073, 652036455, 1965915971 }, + { 2143260673, 1464581171, 1523257541 }, + { 2143246337, 1876119649, 764541916 }, + { 2143209473, 1614992673, 1920672844 }, + { 2143203329, 981052047, 2049774209 }, + { 2143160321, 1847355533, 728535665 }, + { 2143129601, 965558457, 603052992 }, + { 2143123457, 2140817191, 8348679 }, + { 2143100929, 1547263683, 694209023 }, + { 2143092737, 643459066, 1979934533 }, + { 2143082497, 188603778, 2026175670 }, + { 2143062017, 1657329695, 377451099 }, + { 2143051777, 114967950, 979255473 }, + { 2143025153, 1698431342, 1449196896 }, + { 2143006721, 1862741675, 1739650365 }, + { 2142996481, 756660457, 996160050 }, + { 2142976001, 927864010, 1166847574 }, + { 2142965761, 905070557, 661974566 }, + { 2142916609, 40932754, 1787161127 }, + { 2142892033, 1987985648, 675335382 }, + { 2142885889, 797497211, 1323096997 }, + { 2142871553, 2068025830, 1411877159 }, + { 2142861313, 1217177090, 1438410687 }, + { 2142830593, 409906375, 1767860634 }, + { 2142803969, 1197788993, 359782919 }, + { 2142785537, 643817365, 513932862 }, + { 2142779393, 1717046338, 218943121 }, + { 2142724097, 89336830, 416687049 }, + { 2142707713, 5944581, 1356813523 }, + { 2142658561, 887942135, 2074011722 }, + { 2142638081, 151851972, 1647339939 }, + { 2142564353, 1691505537, 1483107336 }, + { 2142533633, 1989920200, 1135938817 }, + { 2142529537, 959263126, 1531961857 }, + { 2142527489, 453251129, 1725566162 }, + { 2142502913, 1536028102, 182053257 }, + { 2142498817, 570138730, 701443447 }, + { 2142416897, 326965800, 411931819 }, + { 2142363649, 1675665410, 1517191733 }, + { 2142351361, 968529566, 1575712703 }, + { 2142330881, 1384953238, 1769087884 }, + { 2142314497, 1977173242, 1833745524 }, + { 2142289921, 95082313, 1714775493 }, + { 2142283777, 109377615, 1070584533 }, + { 2142277633, 16960510, 702157145 }, + { 2142263297, 553850819, 431364395 }, + { 2142208001, 241466367, 2053967982 }, + { 2142164993, 1795661326, 1031836848 }, + { 2142097409, 1212530046, 712772031 }, + { 2142087169, 1763869720, 822276067 }, + { 2142078977, 644065713, 1765268066 }, + { 2142074881, 112671944, 643204925 }, + { 2142044161, 1387785471, 1297890174 }, + { 2142025729, 783885537, 1000425730 }, + { 2142011393, 905662232, 1679401033 }, + { 2141974529, 799788433, 468119557 }, + { 2141943809, 1932544124, 449305555 }, + { 2141933569, 1527403256, 841867925 }, + { 2141931521, 1247076451, 743823916 }, + { 2141902849, 1199660531, 401687910 }, + { 2141890561, 150132350, 1720336972 }, + { 2141857793, 1287438162, 663880489 }, + { 2141833217, 618017731, 1819208266 }, + { 2141820929, 999578638, 1403090096 }, + { 2141786113, 81834325, 1523542501 }, + { 2141771777, 120001928, 463556492 }, + { 2141759489, 122455485, 2124928282 }, + { 2141749249, 141986041, 940339153 }, + { 2141685761, 889088734, 477141499 }, + { 2141673473, 324212681, 1122558298 }, + { 2141669377, 1175806187, 1373818177 }, + { 2141655041, 1113654822, 296887082 }, + { 2141587457, 991103258, 1585913875 }, + { 2141583361, 1401451409, 1802457360 }, + { 2141575169, 1571977166, 712760980 }, + { 2141546497, 1107849376, 1250270109 }, + { 2141515777, 196544219, 356001130 }, + { 2141495297, 1733571506, 1060744866 }, + { 2141483009, 321552363, 1168297026 }, + { 2141458433, 505818251, 733225819 }, + { 2141360129, 1026840098, 948342276 }, + { 2141325313, 945133744, 2129965998 }, + { 2141317121, 1871100260, 1843844634 }, + { 2141286401, 1790639498, 1750465696 }, + { 2141267969, 1376858592, 186160720 }, + { 2141255681, 2129698296, 1876677959 }, + { 2141243393, 2138900688, 1340009628 }, + { 2141214721, 1933049835, 1087819477 }, + { 2141212673, 1898664939, 1786328049 }, + { 2141202433, 990234828, 940682169 }, + { 2141175809, 1406392421, 993089586 }, + { 2141165569, 1263518371, 289019479 }, + { 2141073409, 1485624211, 507864514 }, + { 2141052929, 1885134788, 311252465 }, + { 2141040641, 1285021247, 280941862 }, + { 2141028353, 1527610374, 375035110 }, + { 2141011969, 1400626168, 164696620 }, + { 2140999681, 632959608, 966175067 }, + { 2140997633, 2045628978, 1290889438 }, + { 2140993537, 1412755491, 375366253 }, + { 2140942337, 719477232, 785367828 }, + { 2140925953, 45224252, 836552317 }, + { 2140917761, 1157376588, 1001839569 }, + { 2140887041, 278480752, 2098732796 }, + { 2140837889, 1663139953, 924094810 }, + { 2140788737, 802501511, 2045368990 }, + { 2140766209, 1820083885, 1800295504 }, + { 2140764161, 1169561905, 2106792035 }, + { 2140696577, 127781498, 1885987531 }, + { 2140684289, 16014477, 1098116827 }, + { 2140653569, 665960598, 1796728247 }, + { 2140594177, 1043085491, 377310938 }, + { 2140579841, 1732838211, 1504505945 }, + { 2140569601, 302071939, 358291016 }, + { 2140567553, 192393733, 1909137143 }, + { 2140557313, 406595731, 1175330270 }, + { 2140549121, 1748850918, 525007007 }, + { 2140477441, 499436566, 1031159814 }, + { 2140469249, 1886004401, 1029951320 }, + { 2140426241, 1483168100, 1676273461 }, + { 2140420097, 1779917297, 846024476 }, + { 2140413953, 522948893, 1816354149 }, + { 2140383233, 1931364473, 1296921241 }, + { 2140366849, 1917356555, 147196204 }, + { 2140354561, 16466177, 1349052107 }, + { 2140348417, 1875366972, 1860485634 }, + { 2140323841, 456498717, 1790256483 }, + { 2140321793, 1629493973, 150031888 }, + { 2140315649, 1904063898, 395510935 }, + { 2140280833, 1784104328, 831417909 }, + { 2140250113, 256087139, 697349101 }, + { 2140229633, 388553070, 243875754 }, + { 2140223489, 747459608, 1396270850 }, + { 2140200961, 507423743, 1895572209 }, + { 2140162049, 580106016, 2045297469 }, + { 2140149761, 712426444, 785217995 }, + { 2140137473, 1441607584, 536866543 }, + { 2140119041, 346538902, 1740434653 }, + { 2140090369, 282642885, 21051094 }, + { 2140076033, 1407456228, 319910029 }, + { 2140047361, 1619330500, 1488632070 }, + { 2140041217, 2089408064, 2012026134 }, + { 2140008449, 1705524800, 1613440760 }, + { 2139924481, 1846208233, 1280649481 }, + { 2139906049, 989438755, 1185646076 }, + { 2139867137, 1522314850, 372783595 }, + { 2139842561, 1681587377, 216848235 }, + { 2139826177, 2066284988, 1784999464 }, + { 2139824129, 480888214, 1513323027 }, + { 2139789313, 847937200, 858192859 }, + { 2139783169, 1642000434, 1583261448 }, + { 2139770881, 940699589, 179702100 }, + { 2139768833, 315623242, 964612676 }, + { 2139666433, 331649203, 764666914 }, + { 2139641857, 2118730799, 1313764644 }, + { 2139635713, 519149027, 519212449 }, + { 2139598849, 1526413634, 1769667104 }, + { 2139574273, 551148610, 820739925 }, + { 2139568129, 1386800242, 472447405 }, + { 2139549697, 813760130, 1412328531 }, + { 2139537409, 1615286260, 1609362979 }, + { 2139475969, 1352559299, 1696720421 }, + { 2139455489, 1048691649, 1584935400 }, + { 2139432961, 836025845, 950121150 }, + { 2139424769, 1558281165, 1635486858 }, + { 2139406337, 1728402143, 1674423301 }, + { 2139396097, 1727715782, 1483470544 }, + { 2139383809, 1092853491, 1741699084 }, + { 2139369473, 690776899, 1242798709 }, + { 2139351041, 1768782380, 2120712049 }, + { 2139334657, 1739968247, 1427249225 }, + { 2139332609, 1547189119, 623011170 }, + { 2139310081, 1346827917, 1605466350 }, + { 2139303937, 369317948, 828392831 }, + { 2139301889, 1560417239, 1788073219 }, + { 2139283457, 1303121623, 595079358 }, + { 2139248641, 1354555286, 573424177 }, + { 2139240449, 60974056, 885781403 }, + { 2139222017, 355573421, 1221054839 }, + { 2139215873, 566477826, 1724006500 }, + { 2139150337, 871437673, 1609133294 }, + { 2139144193, 1478130914, 1137491905 }, + { 2139117569, 1854880922, 964728507 }, + { 2139076609, 202405335, 756508944 }, + { 2139062273, 1399715741, 884826059 }, + { 2139045889, 1051045798, 1202295476 }, + { 2139033601, 1707715206, 632234634 }, + { 2139006977, 2035853139, 231626690 }, + { 2138951681, 183867876, 838350879 }, + { 2138945537, 1403254661, 404460202 }, + { 2138920961, 310865011, 1282911681 }, + { 2138910721, 1328496553, 103472415 }, + { 2138904577, 78831681, 993513549 }, + { 2138902529, 1319697451, 1055904361 }, + { 2138816513, 384338872, 1706202469 }, + { 2138810369, 1084868275, 405677177 }, + { 2138787841, 401181788, 1964773901 }, + { 2138775553, 1850532988, 1247087473 }, + { 2138767361, 874261901, 1576073565 }, + { 2138757121, 1187474742, 993541415 }, + { 2138748929, 1782458888, 1043206483 }, + { 2138744833, 1221500487, 800141243 }, + { 2138738689, 413465368, 1450660558 }, + { 2138695681, 739045140, 342611472 }, + { 2138658817, 1355845756, 672674190 }, + { 2138644481, 608379162, 1538874380 }, + { 2138632193, 1444914034, 686911254 }, + { 2138607617, 484707818, 1435142134 }, + { 2138591233, 539460669, 1290458549 }, + { 2138572801, 2093538990, 2011138646 }, + { 2138552321, 1149786988, 1076414907 }, + { 2138546177, 840688206, 2108985273 }, + { 2138533889, 209669619, 198172413 }, + { 2138523649, 1975879426, 1277003968 }, + { 2138490881, 1351891144, 1976858109 }, + { 2138460161, 1817321013, 1979278293 }, + { 2138429441, 1950077177, 203441928 }, + { 2138400769, 908970113, 628395069 }, + { 2138398721, 219890864, 758486760 }, + { 2138376193, 1306654379, 977554090 }, + { 2138351617, 298822498, 2004708503 }, + { 2138337281, 441457816, 1049002108 }, + { 2138320897, 1517731724, 1442269609 }, + { 2138290177, 1355911197, 1647139103 }, + { 2138234881, 531313247, 1746591962 }, + { 2138214401, 1899410930, 781416444 }, + { 2138202113, 1813477173, 1622508515 }, + { 2138191873, 1086458299, 1025408615 }, + { 2138183681, 1998800427, 827063290 }, + { 2138173441, 1921308898, 749670117 }, + { 2138103809, 1620902804, 2126787647 }, + { 2138099713, 828647069, 1892961817 }, + { 2138085377, 179405355, 1525506535 }, + { 2138060801, 615683235, 1259580138 }, + { 2138044417, 2030277840, 1731266562 }, + { 2138042369, 2087222316, 1627902259 }, + { 2138032129, 126388712, 1108640984 }, + { 2138011649, 715026550, 1017980050 }, + { 2137993217, 1693714349, 1351778704 }, + { 2137888769, 1289762259, 1053090405 }, + { 2137853953, 199991890, 1254192789 }, + { 2137833473, 941421685, 896995556 }, + { 2137817089, 750416446, 1251031181 }, + { 2137792513, 798075119, 368077456 }, + { 2137786369, 878543495, 1035375025 }, + { 2137767937, 9351178, 1156563902 }, + { 2137755649, 1382297614, 1686559583 }, + { 2137724929, 1345472850, 1681096331 }, + { 2137704449, 834666929, 630551727 }, + { 2137673729, 1646165729, 1892091571 }, + { 2137620481, 778943821, 48456461 }, + { 2137618433, 1730837875, 1713336725 }, + { 2137581569, 805610339, 1378891359 }, + { 2137538561, 204342388, 1950165220 }, + { 2137526273, 1947629754, 1500789441 }, + { 2137516033, 719902645, 1499525372 }, + { 2137491457, 230451261, 556382829 }, + { 2137440257, 979573541, 412760291 }, + { 2137374721, 927841248, 1954137185 }, + { 2137362433, 1243778559, 861024672 }, + { 2137313281, 1341338501, 980638386 }, + { 2137311233, 937415182, 1793212117 }, + { 2137255937, 795331324, 1410253405 }, + { 2137243649, 150756339, 1966999887 }, + { 2137182209, 163346914, 1939301431 }, + { 2137171969, 1952552395, 758913141 }, + { 2137159681, 570788721, 218668666 }, + { 2137147393, 1896656810, 2045670345 }, + { 2137141249, 358493842, 518199643 }, + { 2137139201, 1505023029, 674695848 }, + { 2137133057, 27911103, 830956306 }, + { 2137122817, 439771337, 1555268614 }, + { 2137116673, 790988579, 1871449599 }, + { 2137110529, 432109234, 811805080 }, + { 2137102337, 1357900653, 1184997641 }, + { 2137098241, 515119035, 1715693095 }, + { 2137090049, 408575203, 2085660657 }, + { 2137085953, 2097793407, 1349626963 }, + { 2137055233, 1556739954, 1449960883 }, + { 2137030657, 1545758650, 1369303716 }, + { 2136987649, 332602570, 103875114 }, + { 2136969217, 1499989506, 1662964115 }, + { 2136924161, 857040753, 4738842 }, + { 2136895489, 1948872712, 570436091 }, + { 2136893441, 58969960, 1568349634 }, + { 2136887297, 2127193379, 273612548 }, + { 2136850433, 111208983, 1181257116 }, + { 2136809473, 1627275942, 1680317971 }, + { 2136764417, 1574888217, 14011331 }, + { 2136741889, 14011055, 1129154251 }, + { 2136727553, 35862563, 1838555253 }, + { 2136721409, 310235666, 1363928244 }, + { 2136698881, 1612429202, 1560383828 }, + { 2136649729, 1138540131, 800014364 }, + { 2136606721, 602323503, 1433096652 }, + { 2136563713, 182209265, 1919611038 }, + { 2136555521, 324156477, 165591039 }, + { 2136549377, 195513113, 217165345 }, + { 2136526849, 1050768046, 939647887 }, + { 2136508417, 1886286237, 1619926572 }, + { 2136477697, 609647664, 35065157 }, + { 2136471553, 679352216, 1452259468 }, + { 2136457217, 128630031, 824816521 }, + { 2136422401, 19787464, 1526049830 }, + { 2136420353, 698316836, 1530623527 }, + { 2136371201, 1651862373, 1804812805 }, + { 2136334337, 326596005, 336977082 }, + { 2136322049, 63253370, 1904972151 }, + { 2136297473, 312176076, 172182411 }, + { 2136248321, 381261841, 369032670 }, + { 2136242177, 358688773, 1640007994 }, + { 2136229889, 512677188, 75585225 }, + { 2136219649, 2095003250, 1970086149 }, + { 2136207361, 1909650722, 537760675 }, + { 2136176641, 1334616195, 1533487619 }, + { 2136158209, 2096285632, 1793285210 }, + { 2136143873, 1897347517, 293843959 }, + { 2136133633, 923586222, 1022655978 }, + { 2136096769, 1464868191, 1515074410 }, + { 2136094721, 2020679520, 2061636104 }, + { 2136076289, 290798503, 1814726809 }, + { 2136041473, 156415894, 1250757633 }, + { 2135996417, 297459940, 1132158924 }, + { 2135955457, 538755304, 1688831340 }, + { 0, 0, 0 } +}; + +/* + * Reduce a small signed integer modulo a small prime. The source + * value x MUST be such that -p < x < p. + */ +static inline uint32_t +modp_set(int32_t x, uint32_t p) { + uint32_t w; + + w = (uint32_t)x; + w += p & -(w >> 31); + return w; +} + +/* + * Normalize a modular integer around 0. + */ +static inline int32_t +modp_norm(uint32_t x, uint32_t p) { + return (int32_t)(x - (p & (((x - ((p + 1) >> 1)) >> 31) - 1))); +} + +/* + * Compute -1/p mod 2^31. This works for all odd integers p that fit + * on 31 bits. + */ +static uint32_t +modp_ninv31(uint32_t p) { + uint32_t y; + + y = 2 - p; + y *= 2 - p * y; + y *= 2 - p * y; + y *= 2 - p * y; + y *= 2 - p * y; + return (uint32_t)0x7FFFFFFF & -y; +} + +/* + * Compute R = 2^31 mod p. + */ +static inline uint32_t +modp_R(uint32_t p) { + /* + * Since 2^30 < p < 2^31, we know that 2^31 mod p is simply + * 2^31 - p. + */ + return ((uint32_t)1 << 31) - p; +} + +/* + * Addition modulo p. + */ +static inline uint32_t +modp_add(uint32_t a, uint32_t b, uint32_t p) { + uint32_t d; + + d = a + b - p; + d += p & -(d >> 31); + return d; +} + +/* + * Subtraction modulo p. + */ +static inline uint32_t +modp_sub(uint32_t a, uint32_t b, uint32_t p) { + uint32_t d; + + d = a - b; + d += p & -(d >> 31); + return d; +} + +/* + * Halving modulo p. + */ +/* unused +static inline uint32_t +modp_half(uint32_t a, uint32_t p) +{ + a += p & -(a & 1); + return a >> 1; +} +*/ + +/* + * Montgomery multiplication modulo p. The 'p0i' value is -1/p mod 2^31. + * It is required that p is an odd integer. + */ +static inline uint32_t +modp_montymul(uint32_t a, uint32_t b, uint32_t p, uint32_t p0i) { + uint64_t z, w; + uint32_t d; + + z = (uint64_t)a * (uint64_t)b; + w = ((z * p0i) & (uint64_t)0x7FFFFFFF) * p; + d = (uint32_t)((z + w) >> 31) - p; + d += p & -(d >> 31); + return d; +} + +/* + * Compute R2 = 2^62 mod p. + */ +static uint32_t +modp_R2(uint32_t p, uint32_t p0i) { + uint32_t z; + + /* + * Compute z = 2^31 mod p (this is the value 1 in Montgomery + * representation), then double it with an addition. + */ + z = modp_R(p); + z = modp_add(z, z, p); + + /* + * Square it five times to obtain 2^32 in Montgomery representation + * (i.e. 2^63 mod p). + */ + z = modp_montymul(z, z, p, p0i); + z = modp_montymul(z, z, p, p0i); + z = modp_montymul(z, z, p, p0i); + z = modp_montymul(z, z, p, p0i); + z = modp_montymul(z, z, p, p0i); + + /* + * Halve the value mod p to get 2^62. + */ + z = (z + (p & -(z & 1))) >> 1; + return z; +} + +/* + * Compute 2^(31*x) modulo p. This works for integers x up to 2^11. + * p must be prime such that 2^30 < p < 2^31; p0i must be equal to + * -1/p mod 2^31; R2 must be equal to 2^62 mod p. + */ +static inline uint32_t +modp_Rx(unsigned x, uint32_t p, uint32_t p0i, uint32_t R2) { + int i; + uint32_t r, z; + + /* + * 2^(31*x) = (2^31)*(2^(31*(x-1))); i.e. we want the Montgomery + * representation of (2^31)^e mod p, where e = x-1. + * R2 is 2^31 in Montgomery representation. + */ + x --; + r = R2; + z = modp_R(p); + for (i = 0; (1U << i) <= x; i ++) { + if ((x & (1U << i)) != 0) { + z = modp_montymul(z, r, p, p0i); + } + r = modp_montymul(r, r, p, p0i); + } + return z; +} + +/* + * Division modulo p. If the divisor (b) is 0, then 0 is returned. + * This function computes proper results only when p is prime. + * Parameters: + * a dividend + * b divisor + * p odd prime modulus + * p0i -1/p mod 2^31 + * R 2^31 mod R + */ +static uint32_t +modp_div(uint32_t a, uint32_t b, uint32_t p, uint32_t p0i, uint32_t R) { + uint32_t z, e; + int i; + + e = p - 2; + z = R; + for (i = 30; i >= 0; i --) { + uint32_t z2; + + z = modp_montymul(z, z, p, p0i); + z2 = modp_montymul(z, b, p, p0i); + z ^= (z ^ z2) & -(uint32_t)((e >> i) & 1); + } + + /* + * The loop above just assumed that b was in Montgomery + * representation, i.e. really contained b*R; under that + * assumption, it returns 1/b in Montgomery representation, + * which is R/b. But we gave it b in normal representation, + * so the loop really returned R/(b/R) = R^2/b. + * + * We want a/b, so we need one Montgomery multiplication with a, + * which also remove one of the R factors, and another such + * multiplication to remove the second R factor. + */ + z = modp_montymul(z, 1, p, p0i); + return modp_montymul(a, z, p, p0i); +} + +/* + * Bit-reversal index table. + */ +static const uint16_t REV10[] = { + 0, 512, 256, 768, 128, 640, 384, 896, 64, 576, 320, 832, + 192, 704, 448, 960, 32, 544, 288, 800, 160, 672, 416, 928, + 96, 608, 352, 864, 224, 736, 480, 992, 16, 528, 272, 784, + 144, 656, 400, 912, 80, 592, 336, 848, 208, 720, 464, 976, + 48, 560, 304, 816, 176, 688, 432, 944, 112, 624, 368, 880, + 240, 752, 496, 1008, 8, 520, 264, 776, 136, 648, 392, 904, + 72, 584, 328, 840, 200, 712, 456, 968, 40, 552, 296, 808, + 168, 680, 424, 936, 104, 616, 360, 872, 232, 744, 488, 1000, + 24, 536, 280, 792, 152, 664, 408, 920, 88, 600, 344, 856, + 216, 728, 472, 984, 56, 568, 312, 824, 184, 696, 440, 952, + 120, 632, 376, 888, 248, 760, 504, 1016, 4, 516, 260, 772, + 132, 644, 388, 900, 68, 580, 324, 836, 196, 708, 452, 964, + 36, 548, 292, 804, 164, 676, 420, 932, 100, 612, 356, 868, + 228, 740, 484, 996, 20, 532, 276, 788, 148, 660, 404, 916, + 84, 596, 340, 852, 212, 724, 468, 980, 52, 564, 308, 820, + 180, 692, 436, 948, 116, 628, 372, 884, 244, 756, 500, 1012, + 12, 524, 268, 780, 140, 652, 396, 908, 76, 588, 332, 844, + 204, 716, 460, 972, 44, 556, 300, 812, 172, 684, 428, 940, + 108, 620, 364, 876, 236, 748, 492, 1004, 28, 540, 284, 796, + 156, 668, 412, 924, 92, 604, 348, 860, 220, 732, 476, 988, + 60, 572, 316, 828, 188, 700, 444, 956, 124, 636, 380, 892, + 252, 764, 508, 1020, 2, 514, 258, 770, 130, 642, 386, 898, + 66, 578, 322, 834, 194, 706, 450, 962, 34, 546, 290, 802, + 162, 674, 418, 930, 98, 610, 354, 866, 226, 738, 482, 994, + 18, 530, 274, 786, 146, 658, 402, 914, 82, 594, 338, 850, + 210, 722, 466, 978, 50, 562, 306, 818, 178, 690, 434, 946, + 114, 626, 370, 882, 242, 754, 498, 1010, 10, 522, 266, 778, + 138, 650, 394, 906, 74, 586, 330, 842, 202, 714, 458, 970, + 42, 554, 298, 810, 170, 682, 426, 938, 106, 618, 362, 874, + 234, 746, 490, 1002, 26, 538, 282, 794, 154, 666, 410, 922, + 90, 602, 346, 858, 218, 730, 474, 986, 58, 570, 314, 826, + 186, 698, 442, 954, 122, 634, 378, 890, 250, 762, 506, 1018, + 6, 518, 262, 774, 134, 646, 390, 902, 70, 582, 326, 838, + 198, 710, 454, 966, 38, 550, 294, 806, 166, 678, 422, 934, + 102, 614, 358, 870, 230, 742, 486, 998, 22, 534, 278, 790, + 150, 662, 406, 918, 86, 598, 342, 854, 214, 726, 470, 982, + 54, 566, 310, 822, 182, 694, 438, 950, 118, 630, 374, 886, + 246, 758, 502, 1014, 14, 526, 270, 782, 142, 654, 398, 910, + 78, 590, 334, 846, 206, 718, 462, 974, 46, 558, 302, 814, + 174, 686, 430, 942, 110, 622, 366, 878, 238, 750, 494, 1006, + 30, 542, 286, 798, 158, 670, 414, 926, 94, 606, 350, 862, + 222, 734, 478, 990, 62, 574, 318, 830, 190, 702, 446, 958, + 126, 638, 382, 894, 254, 766, 510, 1022, 1, 513, 257, 769, + 129, 641, 385, 897, 65, 577, 321, 833, 193, 705, 449, 961, + 33, 545, 289, 801, 161, 673, 417, 929, 97, 609, 353, 865, + 225, 737, 481, 993, 17, 529, 273, 785, 145, 657, 401, 913, + 81, 593, 337, 849, 209, 721, 465, 977, 49, 561, 305, 817, + 177, 689, 433, 945, 113, 625, 369, 881, 241, 753, 497, 1009, + 9, 521, 265, 777, 137, 649, 393, 905, 73, 585, 329, 841, + 201, 713, 457, 969, 41, 553, 297, 809, 169, 681, 425, 937, + 105, 617, 361, 873, 233, 745, 489, 1001, 25, 537, 281, 793, + 153, 665, 409, 921, 89, 601, 345, 857, 217, 729, 473, 985, + 57, 569, 313, 825, 185, 697, 441, 953, 121, 633, 377, 889, + 249, 761, 505, 1017, 5, 517, 261, 773, 133, 645, 389, 901, + 69, 581, 325, 837, 197, 709, 453, 965, 37, 549, 293, 805, + 165, 677, 421, 933, 101, 613, 357, 869, 229, 741, 485, 997, + 21, 533, 277, 789, 149, 661, 405, 917, 85, 597, 341, 853, + 213, 725, 469, 981, 53, 565, 309, 821, 181, 693, 437, 949, + 117, 629, 373, 885, 245, 757, 501, 1013, 13, 525, 269, 781, + 141, 653, 397, 909, 77, 589, 333, 845, 205, 717, 461, 973, + 45, 557, 301, 813, 173, 685, 429, 941, 109, 621, 365, 877, + 237, 749, 493, 1005, 29, 541, 285, 797, 157, 669, 413, 925, + 93, 605, 349, 861, 221, 733, 477, 989, 61, 573, 317, 829, + 189, 701, 445, 957, 125, 637, 381, 893, 253, 765, 509, 1021, + 3, 515, 259, 771, 131, 643, 387, 899, 67, 579, 323, 835, + 195, 707, 451, 963, 35, 547, 291, 803, 163, 675, 419, 931, + 99, 611, 355, 867, 227, 739, 483, 995, 19, 531, 275, 787, + 147, 659, 403, 915, 83, 595, 339, 851, 211, 723, 467, 979, + 51, 563, 307, 819, 179, 691, 435, 947, 115, 627, 371, 883, + 243, 755, 499, 1011, 11, 523, 267, 779, 139, 651, 395, 907, + 75, 587, 331, 843, 203, 715, 459, 971, 43, 555, 299, 811, + 171, 683, 427, 939, 107, 619, 363, 875, 235, 747, 491, 1003, + 27, 539, 283, 795, 155, 667, 411, 923, 91, 603, 347, 859, + 219, 731, 475, 987, 59, 571, 315, 827, 187, 699, 443, 955, + 123, 635, 379, 891, 251, 763, 507, 1019, 7, 519, 263, 775, + 135, 647, 391, 903, 71, 583, 327, 839, 199, 711, 455, 967, + 39, 551, 295, 807, 167, 679, 423, 935, 103, 615, 359, 871, + 231, 743, 487, 999, 23, 535, 279, 791, 151, 663, 407, 919, + 87, 599, 343, 855, 215, 727, 471, 983, 55, 567, 311, 823, + 183, 695, 439, 951, 119, 631, 375, 887, 247, 759, 503, 1015, + 15, 527, 271, 783, 143, 655, 399, 911, 79, 591, 335, 847, + 207, 719, 463, 975, 47, 559, 303, 815, 175, 687, 431, 943, + 111, 623, 367, 879, 239, 751, 495, 1007, 31, 543, 287, 799, + 159, 671, 415, 927, 95, 607, 351, 863, 223, 735, 479, 991, + 63, 575, 319, 831, 191, 703, 447, 959, 127, 639, 383, 895, + 255, 767, 511, 1023 +}; + +/* + * Compute the roots for NTT and inverse NTT (binary case). Input + * parameter g is a primitive 2048-th root of 1 modulo p (i.e. g^1024 = + * -1 mod p). This fills gm[] and igm[] with powers of g and 1/g: + * gm[rev(i)] = g^i mod p + * igm[rev(i)] = (1/g)^i mod p + * where rev() is the "bit reversal" function over 10 bits. It fills + * the arrays only up to N = 2^logn values. + * + * The values stored in gm[] and igm[] are in Montgomery representation. + * + * p must be a prime such that p = 1 mod 2048. + */ +static void +modp_mkgm2(uint32_t *gm, uint32_t *igm, unsigned logn, + uint32_t g, uint32_t p, uint32_t p0i) { + size_t u, n; + unsigned k; + uint32_t ig, x1, x2, R2; + + n = (size_t)1 << logn; + + /* + * We want g such that g^(2N) = 1 mod p, but the provided + * generator has order 2048. We must square it a few times. + */ + R2 = modp_R2(p, p0i); + g = modp_montymul(g, R2, p, p0i); + for (k = logn; k < 10; k ++) { + g = modp_montymul(g, g, p, p0i); + } + + ig = modp_div(R2, g, p, p0i, modp_R(p)); + k = 10 - logn; + x1 = x2 = modp_R(p); + for (u = 0; u < n; u ++) { + size_t v; + + v = REV10[u << k]; + gm[v] = x1; + igm[v] = x2; + x1 = modp_montymul(x1, g, p, p0i); + x2 = modp_montymul(x2, ig, p, p0i); + } +} + +/* + * Compute the NTT over a polynomial (binary case). Polynomial elements + * are a[0], a[stride], a[2 * stride]... + */ +static void +modp_NTT2_ext(uint32_t *a, size_t stride, const uint32_t *gm, unsigned logn, + uint32_t p, uint32_t p0i) { + size_t t, m, n; + + if (logn == 0) { + return; + } + n = (size_t)1 << logn; + t = n; + for (m = 1; m < n; m <<= 1) { + size_t ht, u, v1; + + ht = t >> 1; + for (u = 0, v1 = 0; u < m; u ++, v1 += t) { + uint32_t s; + size_t v; + uint32_t *r1, *r2; + + s = gm[m + u]; + r1 = a + v1 * stride; + r2 = r1 + ht * stride; + for (v = 0; v < ht; v ++, r1 += stride, r2 += stride) { + uint32_t x, y; + + x = *r1; + y = modp_montymul(*r2, s, p, p0i); + *r1 = modp_add(x, y, p); + *r2 = modp_sub(x, y, p); + } + } + t = ht; + } +} + +/* + * Compute the inverse NTT over a polynomial (binary case). + */ +static void +modp_iNTT2_ext(uint32_t *a, size_t stride, const uint32_t *igm, unsigned logn, + uint32_t p, uint32_t p0i) { + size_t t, m, n, k; + uint32_t ni; + uint32_t *r; + + if (logn == 0) { + return; + } + n = (size_t)1 << logn; + t = 1; + for (m = n; m > 1; m >>= 1) { + size_t hm, dt, u, v1; + + hm = m >> 1; + dt = t << 1; + for (u = 0, v1 = 0; u < hm; u ++, v1 += dt) { + uint32_t s; + size_t v; + uint32_t *r1, *r2; + + s = igm[hm + u]; + r1 = a + v1 * stride; + r2 = r1 + t * stride; + for (v = 0; v < t; v ++, r1 += stride, r2 += stride) { + uint32_t x, y; + + x = *r1; + y = *r2; + *r1 = modp_add(x, y, p); + *r2 = modp_montymul( + modp_sub(x, y, p), s, p, p0i);; + } + } + t = dt; + } + + /* + * We need 1/n in Montgomery representation, i.e. R/n. Since + * 1 <= logn <= 10, R/n is an integer; morever, R/n <= 2^30 < p, + * thus a simple shift will do. + */ + ni = (uint32_t)1 << (31 - logn); + for (k = 0, r = a; k < n; k ++, r += stride) { + *r = modp_montymul(*r, ni, p, p0i); + } +} + +/* + * Simplified macros for NTT and iNTT (binary case) when the elements + * are consecutive in RAM. + */ +#define modp_NTT2(a, gm, logn, p, p0i) modp_NTT2_ext(a, 1, gm, logn, p, p0i) +#define modp_iNTT2(a, igm, logn, p, p0i) modp_iNTT2_ext(a, 1, igm, logn, p, p0i) + +/* + * Given polynomial f in NTT representation modulo p, compute f' of degree + * less than N/2 such that f' = f0^2 - X*f1^2, where f0 and f1 are + * polynomials of degree less than N/2 such that f = f0(X^2) + X*f1(X^2). + * + * The new polynomial is written "in place" over the first N/2 elements + * of f. + * + * If applied logn times successively on a given polynomial, the resulting + * degree-0 polynomial is the resultant of f and X^N+1 modulo p. + * + * This function applies only to the binary case; it is invoked from + * solve_NTRU_binary_depth1(). + */ +static void +modp_poly_rec_res(uint32_t *f, unsigned logn, + uint32_t p, uint32_t p0i, uint32_t R2) { + size_t hn, u; + + hn = (size_t)1 << (logn - 1); + for (u = 0; u < hn; u ++) { + uint32_t w0, w1; + + w0 = f[(u << 1) + 0]; + w1 = f[(u << 1) + 1]; + f[u] = modp_montymul(modp_montymul(w0, w1, p, p0i), R2, p, p0i); + } +} + +/* ==================================================================== */ +/* + * Custom bignum implementation. + * + * This is a very reduced set of functionalities. We need to do the + * following operations: + * + * - Rebuild the resultant and the polynomial coefficients from their + * values modulo small primes (of length 31 bits each). + * + * - Compute an extended GCD between the two computed resultants. + * + * - Extract top bits and add scaled values during the successive steps + * of Babai rounding. + * + * When rebuilding values using CRT, we must also recompute the product + * of the small prime factors. We always do it one small factor at a + * time, so the "complicated" operations can be done modulo the small + * prime with the modp_* functions. CRT coefficients (inverses) are + * precomputed. + * + * All values are positive until the last step: when the polynomial + * coefficients have been rebuilt, we normalize them around 0. But then, + * only additions and subtractions on the upper few bits are needed + * afterwards. + * + * We keep big integers as arrays of 31-bit words (in uint32_t values); + * the top bit of each uint32_t is kept equal to 0. Using 31-bit words + * makes it easier to keep track of carries. When negative values are + * used, two's complement is used. + */ + +/* + * Subtract integer b from integer a. Both integers are supposed to have + * the same size. The carry (0 or 1) is returned. Source arrays a and b + * MUST be distinct. + * + * The operation is performed as described above if ctr = 1. If + * ctl = 0, the value a[] is unmodified, but all memory accesses are + * still performed, and the carry is computed and returned. + */ +static uint32_t +zint_sub(uint32_t *a, const uint32_t *b, size_t len, + uint32_t ctl) { + size_t u; + uint32_t cc, m; + + cc = 0; + m = -ctl; + for (u = 0; u < len; u ++) { + uint32_t aw, w; + + aw = a[u]; + w = aw - b[u] - cc; + cc = w >> 31; + aw ^= ((w & 0x7FFFFFFF) ^ aw) & m; + a[u] = aw; + } + return cc; +} + +/* + * Mutiply the provided big integer m with a small value x. + * This function assumes that x < 2^31. The carry word is returned. + */ +static uint32_t +zint_mul_small(uint32_t *m, size_t mlen, uint32_t x) { + size_t u; + uint32_t cc; + + cc = 0; + for (u = 0; u < mlen; u ++) { + uint64_t z; + + z = (uint64_t)m[u] * (uint64_t)x + cc; + m[u] = (uint32_t)z & 0x7FFFFFFF; + cc = (uint32_t)(z >> 31); + } + return cc; +} + +/* + * Reduce a big integer d modulo a small integer p. + * Rules: + * d is unsigned + * p is prime + * 2^30 < p < 2^31 + * p0i = -(1/p) mod 2^31 + * R2 = 2^62 mod p + */ +static uint32_t +zint_mod_small_unsigned(const uint32_t *d, size_t dlen, + uint32_t p, uint32_t p0i, uint32_t R2) { + uint32_t x; + size_t u; + + /* + * Algorithm: we inject words one by one, starting with the high + * word. Each step is: + * - multiply x by 2^31 + * - add new word + */ + x = 0; + u = dlen; + while (u -- > 0) { + uint32_t w; + + x = modp_montymul(x, R2, p, p0i); + w = d[u] - p; + w += p & -(w >> 31); + x = modp_add(x, w, p); + } + return x; +} + +/* + * Similar to zint_mod_small_unsigned(), except that d may be signed. + * Extra parameter is Rx = 2^(31*dlen) mod p. + */ +static uint32_t +zint_mod_small_signed(const uint32_t *d, size_t dlen, + uint32_t p, uint32_t p0i, uint32_t R2, uint32_t Rx) { + uint32_t z; + + if (dlen == 0) { + return 0; + } + z = zint_mod_small_unsigned(d, dlen, p, p0i, R2); + z = modp_sub(z, Rx & -(d[dlen - 1] >> 30), p); + return z; +} + +/* + * Add y*s to x. x and y initially have length 'len' words; the new x + * has length 'len+1' words. 's' must fit on 31 bits. x[] and y[] must + * not overlap. + */ +static void +zint_add_mul_small(uint32_t *x, + const uint32_t *y, size_t len, uint32_t s) { + size_t u; + uint32_t cc; + + cc = 0; + for (u = 0; u < len; u ++) { + uint32_t xw, yw; + uint64_t z; + + xw = x[u]; + yw = y[u]; + z = (uint64_t)yw * (uint64_t)s + (uint64_t)xw + (uint64_t)cc; + x[u] = (uint32_t)z & 0x7FFFFFFF; + cc = (uint32_t)(z >> 31); + } + x[len] = cc; +} + +/* + * Normalize a modular integer around 0: if x > p/2, then x is replaced + * with x - p (signed encoding with two's complement); otherwise, x is + * untouched. The two integers x and p are encoded over the same length. + */ +static void +zint_norm_zero(uint32_t *x, const uint32_t *p, size_t len) { + size_t u; + uint32_t r, bb; + + /* + * Compare x with p/2. We use the shifted version of p, and p + * is odd, so we really compare with (p-1)/2; we want to perform + * the subtraction if and only if x > (p-1)/2. + */ + r = 0; + bb = 0; + u = len; + while (u -- > 0) { + uint32_t wx, wp, cc; + + /* + * Get the two words to compare in wx and wp (both over + * 31 bits exactly). + */ + wx = x[u]; + wp = (p[u] >> 1) | (bb << 30); + bb = p[u] & 1; + + /* + * We set cc to -1, 0 or 1, depending on whether wp is + * lower than, equal to, or greater than wx. + */ + cc = wp - wx; + cc = ((-cc) >> 31) | -(cc >> 31); + + /* + * If r != 0 then it is either 1 or -1, and we keep its + * value. Otherwise, if r = 0, then we replace it with cc. + */ + r |= cc & ((r & 1) - 1); + } + + /* + * At this point, r = -1, 0 or 1, depending on whether (p-1)/2 + * is lower than, equal to, or greater than x. We thus want to + * do the subtraction only if r = -1. + */ + zint_sub(x, p, len, r >> 31); +} + +/* + * Rebuild integers from their RNS representation. There are 'num' + * integers, and each consists in 'xlen' words. 'xx' points at that + * first word of the first integer; subsequent integers are accessed + * by adding 'xstride' repeatedly. + * + * The words of an integer are the RNS representation of that integer, + * using the provided 'primes' are moduli. This function replaces + * each integer with its multi-word value (little-endian order). + * + * If "normalize_signed" is non-zero, then the returned value is + * normalized to the -m/2..m/2 interval (where m is the product of all + * small prime moduli); two's complement is used for negative values. + */ +static void +zint_rebuild_CRT(uint32_t *xx, size_t xlen, size_t xstride, + size_t num, const small_prime *primes, int normalize_signed, + uint32_t *tmp) { + size_t u; + uint32_t *x; + + tmp[0] = primes[0].p; + for (u = 1; u < xlen; u ++) { + /* + * At the entry of each loop iteration: + * - the first u words of each array have been + * reassembled; + * - the first u words of tmp[] contains the + * product of the prime moduli processed so far. + * + * We call 'q' the product of all previous primes. + */ + uint32_t p, p0i, s, R2; + size_t v; + + p = primes[u].p; + s = primes[u].s; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + + for (v = 0, x = xx; v < num; v ++, x += xstride) { + uint32_t xp, xq, xr; + /* + * xp = the integer x modulo the prime p for this + * iteration + * xq = (x mod q) mod p + */ + xp = x[u]; + xq = zint_mod_small_unsigned(x, u, p, p0i, R2); + + /* + * New value is (x mod q) + q * (s * (xp - xq) mod p) + */ + xr = modp_montymul(s, modp_sub(xp, xq, p), p, p0i); + zint_add_mul_small(x, tmp, u, xr); + } + + /* + * Update product of primes in tmp[]. + */ + tmp[u] = zint_mul_small(tmp, u, p); + } + + /* + * Normalize the reconstructed values around 0. + */ + if (normalize_signed) { + for (u = 0, x = xx; u < num; u ++, x += xstride) { + zint_norm_zero(x, tmp, xlen); + } + } +} + +/* + * Negate a big integer conditionally: value a is replaced with -a if + * and only if ctl = 1. Control value ctl must be 0 or 1. + */ +static void +zint_negate(uint32_t *a, size_t len, uint32_t ctl) { + size_t u; + uint32_t cc, m; + + /* + * If ctl = 1 then we flip the bits of a by XORing with + * 0x7FFFFFFF, and we add 1 to the value. If ctl = 0 then we XOR + * with 0 and add 0, which leaves the value unchanged. + */ + cc = ctl; + m = -ctl >> 1; + for (u = 0; u < len; u ++) { + uint32_t aw; + + aw = a[u]; + aw = (aw ^ m) + cc; + a[u] = aw & 0x7FFFFFFF; + cc = aw >> 31; + } +} + +/* + * Replace a with (a*xa+b*xb)/(2^31) and b with (a*ya+b*yb)/(2^31). + * The low bits are dropped (the caller should compute the coefficients + * such that these dropped bits are all zeros). If either or both + * yields a negative value, then the value is negated. + * + * Returned value is: + * 0 both values were positive + * 1 new a had to be negated + * 2 new b had to be negated + * 3 both new a and new b had to be negated + * + * Coefficients xa, xb, ya and yb may use the full signed 32-bit range. + */ +static uint32_t +zint_co_reduce(uint32_t *a, uint32_t *b, size_t len, + int64_t xa, int64_t xb, int64_t ya, int64_t yb) { + size_t u; + int64_t cca, ccb; + uint32_t nega, negb; + + cca = 0; + ccb = 0; + for (u = 0; u < len; u ++) { + uint32_t wa, wb; + uint64_t za, zb; + + wa = a[u]; + wb = b[u]; + za = wa * (uint64_t)xa + wb * (uint64_t)xb + (uint64_t)cca; + zb = wa * (uint64_t)ya + wb * (uint64_t)yb + (uint64_t)ccb; + if (u > 0) { + a[u - 1] = (uint32_t)za & 0x7FFFFFFF; + b[u - 1] = (uint32_t)zb & 0x7FFFFFFF; + } + cca = *(int64_t *)&za >> 31; + ccb = *(int64_t *)&zb >> 31; + } + a[len - 1] = (uint32_t)cca; + b[len - 1] = (uint32_t)ccb; + + nega = (uint32_t)((uint64_t)cca >> 63); + negb = (uint32_t)((uint64_t)ccb >> 63); + zint_negate(a, len, nega); + zint_negate(b, len, negb); + return nega | (negb << 1); +} + +/* + * Finish modular reduction. Rules on input parameters: + * + * if neg = 1, then -m <= a < 0 + * if neg = 0, then 0 <= a < 2*m + * + * If neg = 0, then the top word of a[] is allowed to use 32 bits. + * + * Modulus m must be odd. + */ +static void +zint_finish_mod(uint32_t *a, size_t len, const uint32_t *m, uint32_t neg) { + size_t u; + uint32_t cc, xm, ym; + + /* + * First pass: compare a (assumed nonnegative) with m. Note that + * if the top word uses 32 bits, subtracting m must yield a + * value less than 2^31 since a < 2*m. + */ + cc = 0; + for (u = 0; u < len; u ++) { + cc = (a[u] - m[u] - cc) >> 31; + } + + /* + * If neg = 1 then we must add m (regardless of cc) + * If neg = 0 and cc = 0 then we must subtract m + * If neg = 0 and cc = 1 then we must do nothing + * + * In the loop below, we conditionally subtract either m or -m + * from a. Word xm is a word of m (if neg = 0) or -m (if neg = 1); + * but if neg = 0 and cc = 1, then ym = 0 and it forces mw to 0. + */ + xm = -neg >> 1; + ym = -(neg | (1 - cc)); + cc = neg; + for (u = 0; u < len; u ++) { + uint32_t aw, mw; + + aw = a[u]; + mw = (m[u] ^ xm) & ym; + aw = aw - mw - cc; + a[u] = aw & 0x7FFFFFFF; + cc = aw >> 31; + } +} + +/* + * Replace a with (a*xa+b*xb)/(2^31) mod m, and b with + * (a*ya+b*yb)/(2^31) mod m. Modulus m must be odd; m0i = -1/m[0] mod 2^31. + */ +static void +zint_co_reduce_mod(uint32_t *a, uint32_t *b, const uint32_t *m, size_t len, + uint32_t m0i, int64_t xa, int64_t xb, int64_t ya, int64_t yb) { + size_t u; + int64_t cca, ccb; + uint32_t fa, fb; + + /* + * These are actually four combined Montgomery multiplications. + */ + cca = 0; + ccb = 0; + fa = ((a[0] * (uint32_t)xa + b[0] * (uint32_t)xb) * m0i) & 0x7FFFFFFF; + fb = ((a[0] * (uint32_t)ya + b[0] * (uint32_t)yb) * m0i) & 0x7FFFFFFF; + for (u = 0; u < len; u ++) { + uint32_t wa, wb; + uint64_t za, zb; + + wa = a[u]; + wb = b[u]; + za = wa * (uint64_t)xa + wb * (uint64_t)xb + + m[u] * (uint64_t)fa + (uint64_t)cca; + zb = wa * (uint64_t)ya + wb * (uint64_t)yb + + m[u] * (uint64_t)fb + (uint64_t)ccb; + if (u > 0) { + a[u - 1] = (uint32_t)za & 0x7FFFFFFF; + b[u - 1] = (uint32_t)zb & 0x7FFFFFFF; + } + cca = *(int64_t *)&za >> 31; + ccb = *(int64_t *)&zb >> 31; + } + a[len - 1] = (uint32_t)cca; + b[len - 1] = (uint32_t)ccb; + + /* + * At this point: + * -m <= a < 2*m + * -m <= b < 2*m + * (this is a case of Montgomery reduction) + * The top words of 'a' and 'b' may have a 32-th bit set. + * We want to add or subtract the modulus, as required. + */ + zint_finish_mod(a, len, m, (uint32_t)((uint64_t)cca >> 63)); + zint_finish_mod(b, len, m, (uint32_t)((uint64_t)ccb >> 63)); +} + +/* + * Compute a GCD between two positive big integers x and y. The two + * integers must be odd. Returned value is 1 if the GCD is 1, 0 + * otherwise. When 1 is returned, arrays u and v are filled with values + * such that: + * 0 <= u <= y + * 0 <= v <= x + * x*u - y*v = 1 + * x[] and y[] are unmodified. Both input values must have the same + * encoded length. Temporary array must be large enough to accommodate 4 + * extra values of that length. Arrays u, v and tmp may not overlap with + * each other, or with either x or y. + */ +static int +zint_bezout(uint32_t *u, uint32_t *v, + const uint32_t *x, const uint32_t *y, + size_t len, uint32_t *tmp) { + /* + * Algorithm is an extended binary GCD. We maintain 6 values + * a, b, u0, u1, v0 and v1 with the following invariants: + * + * a = x*u0 - y*v0 + * b = x*u1 - y*v1 + * 0 <= a <= x + * 0 <= b <= y + * 0 <= u0 < y + * 0 <= v0 < x + * 0 <= u1 <= y + * 0 <= v1 < x + * + * Initial values are: + * + * a = x u0 = 1 v0 = 0 + * b = y u1 = y v1 = x-1 + * + * Each iteration reduces either a or b, and maintains the + * invariants. Algorithm stops when a = b, at which point their + * common value is GCD(a,b) and (u0,v0) (or (u1,v1)) contains + * the values (u,v) we want to return. + * + * The formal definition of the algorithm is a sequence of steps: + * + * - If a is even, then: + * a <- a/2 + * u0 <- u0/2 mod y + * v0 <- v0/2 mod x + * + * - Otherwise, if b is even, then: + * b <- b/2 + * u1 <- u1/2 mod y + * v1 <- v1/2 mod x + * + * - Otherwise, if a > b, then: + * a <- (a-b)/2 + * u0 <- (u0-u1)/2 mod y + * v0 <- (v0-v1)/2 mod x + * + * - Otherwise: + * b <- (b-a)/2 + * u1 <- (u1-u0)/2 mod y + * v1 <- (v1-v0)/2 mod y + * + * We can show that the operations above preserve the invariants: + * + * - If a is even, then u0 and v0 are either both even or both + * odd (since a = x*u0 - y*v0, and x and y are both odd). + * If u0 and v0 are both even, then (u0,v0) <- (u0/2,v0/2). + * Otherwise, (u0,v0) <- ((u0+y)/2,(v0+x)/2). Either way, + * the a = x*u0 - y*v0 invariant is preserved. + * + * - The same holds for the case where b is even. + * + * - If a and b are odd, and a > b, then: + * + * a-b = x*(u0-u1) - y*(v0-v1) + * + * In that situation, if u0 < u1, then x*(u0-u1) < 0, but + * a-b > 0; therefore, it must be that v0 < v1, and the + * first part of the update is: (u0,v0) <- (u0-u1+y,v0-v1+x), + * which preserves the invariants. Otherwise, if u0 > u1, + * then u0-u1 >= 1, thus x*(u0-u1) >= x. But a <= x and + * b >= 0, hence a-b <= x. It follows that, in that case, + * v0-v1 >= 0. The first part of the update is then: + * (u0,v0) <- (u0-u1,v0-v1), which again preserves the + * invariants. + * + * Either way, once the subtraction is done, the new value of + * a, which is the difference of two odd values, is even, + * and the remaining of this step is a subcase of the + * first algorithm case (i.e. when a is even). + * + * - If a and b are odd, and b > a, then the a similar + * argument holds. + * + * The values a and b start at x and y, respectively. Since x + * and y are odd, their GCD is odd, and it is easily seen that + * all steps conserve the GCD (GCD(a-b,b) = GCD(a, b); + * GCD(a/2,b) = GCD(a,b) if GCD(a,b) is odd). Moreover, either a + * or b is reduced by at least one bit at each iteration, so + * the algorithm necessarily converges on the case a = b, at + * which point the common value is the GCD. + * + * In the algorithm expressed above, when a = b, the fourth case + * applies, and sets b = 0. Since a contains the GCD of x and y, + * which are both odd, a must be odd, and subsequent iterations + * (if any) will simply divide b by 2 repeatedly, which has no + * consequence. Thus, the algorithm can run for more iterations + * than necessary; the final GCD will be in a, and the (u,v) + * coefficients will be (u0,v0). + * + * + * The presentation above is bit-by-bit. It can be sped up by + * noticing that all decisions are taken based on the low bits + * and high bits of a and b. We can extract the two top words + * and low word of each of a and b, and compute reduction + * parameters pa, pb, qa and qb such that the new values for + * a and b are: + * a' = (a*pa + b*pb) / (2^31) + * b' = (a*qa + b*qb) / (2^31) + * the two divisions being exact. The coefficients are obtained + * just from the extracted words, and may be slightly off, requiring + * an optional correction: if a' < 0, then we replace pa with -pa + * and pb with -pb. Each such step will reduce the total length + * (sum of lengths of a and b) by at least 30 bits at each + * iteration. + */ + uint32_t *u0, *u1, *v0, *v1, *a, *b; + uint32_t x0i, y0i; + uint32_t num, rc; + size_t j; + + if (len == 0) { + return 0; + } + + /* + * u0 and v0 are the u and v result buffers; the four other + * values (u1, v1, a and b) are taken from tmp[]. + */ + u0 = u; + v0 = v; + u1 = tmp; + v1 = u1 + len; + a = v1 + len; + b = a + len; + + /* + * We'll need the Montgomery reduction coefficients. + */ + x0i = modp_ninv31(x[0]); + y0i = modp_ninv31(y[0]); + + /* + * Initialize a, b, u0, u1, v0 and v1. + * a = x u0 = 1 v0 = 0 + * b = y u1 = y v1 = x-1 + * Note that x is odd, so computing x-1 is easy. + */ + memcpy(a, x, len * sizeof * x); + memcpy(b, y, len * sizeof * y); + u0[0] = 1; + memset(u0 + 1, 0, (len - 1) * sizeof * u0); + memset(v0, 0, len * sizeof * v0); + memcpy(u1, y, len * sizeof * u1); + memcpy(v1, x, len * sizeof * v1); + v1[0] --; + + /* + * Each input operand may be as large as 31*len bits, and we + * reduce the total length by at least 30 bits at each iteration. + */ + for (num = 62 * (uint32_t)len + 30; num >= 30; num -= 30) { + uint32_t c0, c1; + uint32_t a0, a1, b0, b1; + uint64_t a_hi, b_hi; + uint32_t a_lo, b_lo; + int64_t pa, pb, qa, qb; + int i; + uint32_t r; + + /* + * Extract the top words of a and b. If j is the highest + * index >= 1 such that a[j] != 0 or b[j] != 0, then we + * want (a[j] << 31) + a[j-1] and (b[j] << 31) + b[j-1]. + * If a and b are down to one word each, then we use + * a[0] and b[0]. + */ + c0 = (uint32_t) -1; + c1 = (uint32_t) -1; + a0 = 0; + a1 = 0; + b0 = 0; + b1 = 0; + j = len; + while (j -- > 0) { + uint32_t aw, bw; + + aw = a[j]; + bw = b[j]; + a0 ^= (a0 ^ aw) & c0; + a1 ^= (a1 ^ aw) & c1; + b0 ^= (b0 ^ bw) & c0; + b1 ^= (b1 ^ bw) & c1; + c1 = c0; + c0 &= (((aw | bw) + 0x7FFFFFFF) >> 31) - (uint32_t)1; + } + + /* + * If c1 = 0, then we grabbed two words for a and b. + * If c1 != 0 but c0 = 0, then we grabbed one word. It + * is not possible that c1 != 0 and c0 != 0, because that + * would mean that both integers are zero. + */ + a1 |= a0 & c1; + a0 &= ~c1; + b1 |= b0 & c1; + b0 &= ~c1; + a_hi = ((uint64_t)a0 << 31) + a1; + b_hi = ((uint64_t)b0 << 31) + b1; + a_lo = a[0]; + b_lo = b[0]; + + /* + * Compute reduction factors: + * + * a' = a*pa + b*pb + * b' = a*qa + b*qb + * + * such that a' and b' are both multiple of 2^31, but are + * only marginally larger than a and b. + */ + pa = 1; + pb = 0; + qa = 0; + qb = 1; + for (i = 0; i < 31; i ++) { + /* + * At each iteration: + * + * a <- (a-b)/2 if: a is odd, b is odd, a_hi > b_hi + * b <- (b-a)/2 if: a is odd, b is odd, a_hi <= b_hi + * a <- a/2 if: a is even + * b <- b/2 if: a is odd, b is even + * + * We multiply a_lo and b_lo by 2 at each + * iteration, thus a division by 2 really is a + * non-multiplication by 2. + */ + uint32_t rt, oa, ob, cAB, cBA, cA; + uint64_t rz; + + /* + * rt = 1 if a_hi > b_hi, 0 otherwise. + */ + rz = b_hi - a_hi; + rt = (uint32_t)((rz ^ ((a_hi ^ b_hi) + & (a_hi ^ rz))) >> 63); + + /* + * cAB = 1 if b must be subtracted from a + * cBA = 1 if a must be subtracted from b + * cA = 1 if a must be divided by 2 + * + * Rules: + * + * cAB and cBA cannot both be 1. + * If a is not divided by 2, b is. + */ + oa = (a_lo >> i) & 1; + ob = (b_lo >> i) & 1; + cAB = oa & ob & rt; + cBA = oa & ob & ~rt; + cA = cAB | (oa ^ 1); + + /* + * Conditional subtractions. + */ + a_lo -= b_lo & -cAB; + a_hi -= b_hi & -(uint64_t)cAB; + pa -= qa & -(int64_t)cAB; + pb -= qb & -(int64_t)cAB; + b_lo -= a_lo & -cBA; + b_hi -= a_hi & -(uint64_t)cBA; + qa -= pa & -(int64_t)cBA; + qb -= pb & -(int64_t)cBA; + + /* + * Shifting. + */ + a_lo += a_lo & (cA - 1); + pa += pa & ((int64_t)cA - 1); + pb += pb & ((int64_t)cA - 1); + a_hi ^= (a_hi ^ (a_hi >> 1)) & -(uint64_t)cA; + b_lo += b_lo & -cA; + qa += qa & -(int64_t)cA; + qb += qb & -(int64_t)cA; + b_hi ^= (b_hi ^ (b_hi >> 1)) & ((uint64_t)cA - 1); + } + + /* + * Apply the computed parameters to our values. We + * may have to correct pa and pb depending on the + * returned value of zint_co_reduce() (when a and/or b + * had to be negated). + */ + r = zint_co_reduce(a, b, len, pa, pb, qa, qb); + pa -= (pa + pa) & -(int64_t)(r & 1); + pb -= (pb + pb) & -(int64_t)(r & 1); + qa -= (qa + qa) & -(int64_t)(r >> 1); + qb -= (qb + qb) & -(int64_t)(r >> 1); + zint_co_reduce_mod(u0, u1, y, len, y0i, pa, pb, qa, qb); + zint_co_reduce_mod(v0, v1, x, len, x0i, pa, pb, qa, qb); + } + + /* + * At that point, array a[] should contain the GCD, and the + * results (u,v) should already be set. We check that the GCD + * is indeed 1. We also check that the two operands x and y + * are odd. + */ + rc = a[0] ^ 1; + for (j = 1; j < len; j ++) { + rc |= a[j]; + } + return (int)((1 - ((rc | -rc) >> 31)) & x[0] & y[0]); +} + +/* + * Add k*y*2^sc to x. The result is assumed to fit in the array of + * size xlen (truncation is applied if necessary). + * Scale factor 'sc' is provided as sch and scl, such that: + * sch = sc / 31 + * scl = sc % 31 + * xlen MUST NOT be lower than ylen. + * + * x[] and y[] are both signed integers, using two's complement for + * negative values. + */ +static void +zint_add_scaled_mul_small(uint32_t *x, size_t xlen, + const uint32_t *y, size_t ylen, int32_t k, + uint32_t sch, uint32_t scl) { + size_t u; + uint32_t ysign, tw; + int32_t cc; + + if (ylen == 0) { + return; + } + + ysign = -(y[ylen - 1] >> 30) >> 1; + tw = 0; + cc = 0; + for (u = sch; u < xlen; u ++) { + size_t v; + uint32_t wy, wys, ccu; + uint64_t z; + + /* + * Get the next word of y (scaled). + */ + v = u - sch; + if (v < ylen) { + wy = y[v]; + } else { + wy = ysign; + } + wys = ((wy << scl) & 0x7FFFFFFF) | tw; + tw = wy >> (31 - scl); + + /* + * The expression below does not overflow. + */ + z = (uint64_t)((int64_t)wys * (int64_t)k + (int64_t)x[u] + cc); + x[u] = (uint32_t)z & 0x7FFFFFFF; + + /* + * Right-shifting the signed value z would yield + * implementation-defined results (arithmetic shift is + * not guaranteed). However, we can cast to unsigned, + * and get the next carry as an unsigned word. We can + * then convert it back to signed by using the guaranteed + * fact that 'int32_t' uses two's complement with no + * trap representation or padding bit, and with a layout + * compatible with that of 'uint32_t'. + */ + ccu = (uint32_t)(z >> 31); + cc = *(int32_t *)&ccu; + } +} + +/* + * Subtract y*2^sc from x. The result is assumed to fit in the array of + * size xlen (truncation is applied if necessary). + * Scale factor 'sc' is provided as sch and scl, such that: + * sch = sc / 31 + * scl = sc % 31 + * xlen MUST NOT be lower than ylen. + * + * x[] and y[] are both signed integers, using two's complement for + * negative values. + */ +static void +zint_sub_scaled(uint32_t *x, size_t xlen, + const uint32_t *y, size_t ylen, uint32_t sch, uint32_t scl) { + size_t u; + uint32_t ysign, tw; + uint32_t cc; + + if (ylen == 0) { + return; + } + + ysign = -(y[ylen - 1] >> 30) >> 1; + tw = 0; + cc = 0; + for (u = sch; u < xlen; u ++) { + size_t v; + uint32_t w, wy, wys; + + /* + * Get the next word of y (scaled). + */ + v = u - sch; + if (v < ylen) { + wy = y[v]; + } else { + wy = ysign; + } + wys = ((wy << scl) & 0x7FFFFFFF) | tw; + tw = wy >> (31 - scl); + + w = x[u] - wys - cc; + x[u] = w & 0x7FFFFFFF; + cc = w >> 31; + } +} + +/* + * Convert a one-word signed big integer into a signed value. + */ +static inline int32_t +zint_one_to_plain(const uint32_t *x) { + uint32_t w; + + w = x[0]; + w |= (w & 0x40000000) << 1; + return *(int32_t *)&w; +} + +/* ==================================================================== */ + +/* + * Convert a polynomial to floating-point values. + * + * Each coefficient has length flen words, and starts fstride words after + * the previous. + * + * IEEE-754 binary64 values can represent values in a finite range, + * roughly 2^(-1023) to 2^(+1023); thus, if coefficients are too large, + * they should be "trimmed" by pointing not to the lowest word of each, + * but upper. + */ +static void +poly_big_to_fp(fpr *d, const uint32_t *f, size_t flen, size_t fstride, + unsigned logn) { + size_t n, u; + + n = MKN(logn); + if (flen == 0) { + for (u = 0; u < n; u ++) { + d[u] = fpr_zero; + } + return; + } + for (u = 0; u < n; u ++, f += fstride) { + size_t v; + uint32_t neg, cc, xm; + fpr x, fsc; + + /* + * Get sign of the integer; if it is negative, then we + * will load its absolute value instead, and negate the + * result. + */ + neg = -(f[flen - 1] >> 30); + xm = neg >> 1; + cc = neg & 1; + x = fpr_zero; + fsc = fpr_one; + for (v = 0; v < flen; v ++, fsc = fpr_mul(fsc, fpr_ptwo31)) { + uint32_t w; + + w = (f[v] ^ xm) + cc; + cc = w >> 31; + w &= 0x7FFFFFFF; + w -= (w << 1) & neg; + x = fpr_add(x, fpr_mul(fpr_of(*(int32_t *)&w), fsc)); + } + d[u] = x; + } +} + +/* + * Convert a polynomial to small integers. Source values are supposed + * to be one-word integers, signed over 31 bits. Returned value is 0 + * if any of the coefficients exceeds the provided limit (in absolute + * value), or 1 on success. + * + * This is not constant-time; this is not a problem here, because on + * any failure, the NTRU-solving process will be deemed to have failed + * and the (f,g) polynomials will be discarded. + */ +static int +poly_big_to_small(int8_t *d, const uint32_t *s, int lim, unsigned logn) { + size_t n, u; + + n = MKN(logn); + for (u = 0; u < n; u ++) { + int32_t z; + + z = zint_one_to_plain(s + u); + if (z < -lim || z > lim) { + return 0; + } + d[u] = (int8_t)z; + } + return 1; +} + +/* + * Subtract k*f from F, where F, f and k are polynomials modulo X^N+1. + * Coefficients of polynomial k are small integers (signed values in the + * -2^31..2^31 range) scaled by 2^sc. Value sc is provided as sch = sc / 31 + * and scl = sc % 31. + * + * This function implements the basic quadratic multiplication algorithm, + * which is efficient in space (no extra buffer needed) but slow at + * high degree. + */ +static void +poly_sub_scaled(uint32_t *F, size_t Flen, size_t Fstride, + const uint32_t *f, size_t flen, size_t fstride, + const int32_t *k, uint32_t sch, uint32_t scl, unsigned logn) { + size_t n, u; + + n = MKN(logn); + for (u = 0; u < n; u ++) { + int32_t kf; + size_t v; + uint32_t *x; + const uint32_t *y; + + kf = -k[u]; + x = F + u * Fstride; + y = f; + for (v = 0; v < n; v ++) { + zint_add_scaled_mul_small( + x, Flen, y, flen, kf, sch, scl); + if (u + v == n - 1) { + x = F; + kf = -kf; + } else { + x += Fstride; + } + y += fstride; + } + } +} + +/* + * Subtract k*f from F. Coefficients of polynomial k are small integers + * (signed values in the -2^31..2^31 range) scaled by 2^sc. This function + * assumes that the degree is large, and integers relatively small. + * The value sc is provided as sch = sc / 31 and scl = sc % 31. + */ +static void +poly_sub_scaled_ntt(uint32_t *F, size_t Flen, size_t Fstride, + const uint32_t *f, size_t flen, size_t fstride, + const int32_t *k, uint32_t sch, uint32_t scl, unsigned logn, + uint32_t *tmp) { + uint32_t *gm, *igm, *fk, *t1, *x; + const uint32_t *y; + size_t n, u, tlen; + const small_prime *primes; + + n = MKN(logn); + tlen = flen + 1; + gm = tmp; + igm = gm + MKN(logn); + fk = igm + MKN(logn); + t1 = fk + n * tlen; + + primes = PRIMES; + + /* + * Compute k*f in fk[], in RNS notation. + */ + for (u = 0; u < tlen; u ++) { + uint32_t p, p0i, R2, Rx; + size_t v; + + p = primes[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + Rx = modp_Rx((unsigned)flen, p, p0i, R2); + modp_mkgm2(gm, igm, logn, primes[u].g, p, p0i); + + for (v = 0; v < n; v ++) { + t1[v] = modp_set(k[v], p); + } + modp_NTT2(t1, gm, logn, p, p0i); + for (v = 0, y = f, x = fk + u; + v < n; v ++, y += fstride, x += tlen) { + *x = zint_mod_small_signed(y, flen, p, p0i, R2, Rx); + } + modp_NTT2_ext(fk + u, tlen, gm, logn, p, p0i); + for (v = 0, x = fk + u; v < n; v ++, x += tlen) { + *x = modp_montymul( + modp_montymul(t1[v], *x, p, p0i), R2, p, p0i); + } + modp_iNTT2_ext(fk + u, tlen, igm, logn, p, p0i); + } + + /* + * Rebuild k*f. + */ + zint_rebuild_CRT(fk, tlen, tlen, n, primes, 1, t1); + + /* + * Subtract k*f, scaled, from F. + */ + for (u = 0, x = F, y = fk; u < n; u ++, x += Fstride, y += tlen) { + zint_sub_scaled(x, Flen, y, tlen, sch, scl); + } +} + +/* ==================================================================== */ + + +#define RNG_CONTEXT inner_shake256_context + +/* + * Get a random 8-byte integer from a SHAKE-based RNG. This function + * ensures consistent interpretation of the SHAKE output so that + * the same values will be obtained over different platforms, in case + * a known seed is used. + */ +static inline uint64_t +get_rng_u64(inner_shake256_context *rng) { + /* + * We enforce little-endian representation. + */ + + uint8_t tmp[8]; + + inner_shake256_extract(rng, tmp, sizeof tmp); + return (uint64_t)tmp[0] + | ((uint64_t)tmp[1] << 8) + | ((uint64_t)tmp[2] << 16) + | ((uint64_t)tmp[3] << 24) + | ((uint64_t)tmp[4] << 32) + | ((uint64_t)tmp[5] << 40) + | ((uint64_t)tmp[6] << 48) + | ((uint64_t)tmp[7] << 56); +} + +/* + * Table below incarnates a discrete Gaussian distribution: + * D(x) = exp(-(x^2)/(2*sigma^2)) + * where sigma = 1.17*sqrt(q/(2*N)), q = 12289, and N = 1024. + * Element 0 of the table is P(x = 0). + * For k > 0, element k is P(x >= k+1 | x > 0). + * Probabilities are scaled up by 2^63. + */ +static const uint64_t gauss_1024_12289[] = { + 1283868770400643928u, 6416574995475331444u, 4078260278032692663u, + 2353523259288686585u, 1227179971273316331u, 575931623374121527u, + 242543240509105209u, 91437049221049666u, 30799446349977173u, + 9255276791179340u, 2478152334826140u, 590642893610164u, + 125206034929641u, 23590435911403u, 3948334035941u, + 586753615614u, 77391054539u, 9056793210u, + 940121950u, 86539696u, 7062824u, + 510971u, 32764u, 1862u, + 94u, 4u, 0u +}; + +/* + * Generate a random value with a Gaussian distribution centered on 0. + * The RNG must be ready for extraction (already flipped). + * + * Distribution has standard deviation 1.17*sqrt(q/(2*N)). The + * precomputed table is for N = 1024. Since the sum of two independent + * values of standard deviation sigma has standard deviation + * sigma*sqrt(2), then we can just generate more values and add them + * together for lower dimensions. + */ +static int +mkgauss(RNG_CONTEXT *rng, unsigned logn) { + unsigned u, g; + int val; + + g = 1U << (10 - logn); + val = 0; + for (u = 0; u < g; u ++) { + /* + * Each iteration generates one value with the + * Gaussian distribution for N = 1024. + * + * We use two random 64-bit values. First value + * decides on whether the generated value is 0, and, + * if not, the sign of the value. Second random 64-bit + * word is used to generate the non-zero value. + * + * For constant-time code we have to read the complete + * table. This has negligible cost, compared with the + * remainder of the keygen process (solving the NTRU + * equation). + */ + uint64_t r; + uint32_t f, v, k, neg; + + /* + * First value: + * - flag 'neg' is randomly selected to be 0 or 1. + * - flag 'f' is set to 1 if the generated value is zero, + * or set to 0 otherwise. + */ + r = get_rng_u64(rng); + neg = (uint32_t)(r >> 63); + r &= ~((uint64_t)1 << 63); + f = (uint32_t)((r - gauss_1024_12289[0]) >> 63); + + /* + * We produce a new random 63-bit integer r, and go over + * the array, starting at index 1. We store in v the + * index of the first array element which is not greater + * than r, unless the flag f was already 1. + */ + v = 0; + r = get_rng_u64(rng); + r &= ~((uint64_t)1 << 63); + for (k = 1; k < (uint32_t)((sizeof gauss_1024_12289) + / (sizeof gauss_1024_12289[0])); k ++) { + uint32_t t; + + t = (uint32_t)((r - gauss_1024_12289[k]) >> 63) ^ 1; + v |= k & -(t & (f ^ 1)); + f |= t; + } + + /* + * We apply the sign ('neg' flag). If the value is zero, + * the sign has no effect. + */ + v = (v ^ -neg) + neg; + + /* + * Generated value is added to val. + */ + val += *(int32_t *)&v; + } + return val; +} + +/* + * The MAX_BL_SMALL[] and MAX_BL_LARGE[] contain the lengths, in 31-bit + * words, of intermediate values in the computation: + * + * MAX_BL_SMALL[depth]: length for the input f and g at that depth + * MAX_BL_LARGE[depth]: length for the unreduced F and G at that depth + * + * Rules: + * + * - Within an array, values grow. + * + * - The 'SMALL' array must have an entry for maximum depth, corresponding + * to the size of values used in the binary GCD. There is no such value + * for the 'LARGE' array (the binary GCD yields already reduced + * coefficients). + * + * - MAX_BL_LARGE[depth] >= MAX_BL_SMALL[depth + 1]. + * + * - Values must be large enough to handle the common cases, with some + * margins. + * + * - Values must not be "too large" either because we will convert some + * integers into floating-point values by considering the top 10 words, + * i.e. 310 bits; hence, for values of length more than 10 words, we + * should take care to have the length centered on the expected size. + * + * The following average lengths, in bits, have been measured on thousands + * of random keys (fg = max length of the absolute value of coefficients + * of f and g at that depth; FG = idem for the unreduced F and G; for the + * maximum depth, F and G are the output of binary GCD, multiplied by q; + * for each value, the average and standard deviation are provided). + * + * Binary case: + * depth: 10 fg: 6307.52 (24.48) FG: 6319.66 (24.51) + * depth: 9 fg: 3138.35 (12.25) FG: 9403.29 (27.55) + * depth: 8 fg: 1576.87 ( 7.49) FG: 4703.30 (14.77) + * depth: 7 fg: 794.17 ( 4.98) FG: 2361.84 ( 9.31) + * depth: 6 fg: 400.67 ( 3.10) FG: 1188.68 ( 6.04) + * depth: 5 fg: 202.22 ( 1.87) FG: 599.81 ( 3.87) + * depth: 4 fg: 101.62 ( 1.02) FG: 303.49 ( 2.38) + * depth: 3 fg: 50.37 ( 0.53) FG: 153.65 ( 1.39) + * depth: 2 fg: 24.07 ( 0.25) FG: 78.20 ( 0.73) + * depth: 1 fg: 10.99 ( 0.08) FG: 39.82 ( 0.41) + * depth: 0 fg: 4.00 ( 0.00) FG: 19.61 ( 0.49) + * + * Integers are actually represented either in binary notation over + * 31-bit words (signed, using two's complement), or in RNS, modulo + * many small primes. These small primes are close to, but slightly + * lower than, 2^31. Use of RNS loses less than two bits, even for + * the largest values. + * + * IMPORTANT: if these values are modified, then the temporary buffer + * sizes (FALCON_KEYGEN_TEMP_*, in inner.h) must be recomputed + * accordingly. + */ + +static const size_t MAX_BL_SMALL[] = { + 1, 1, 2, 2, 4, 7, 14, 27, 53, 106, 209 +}; + +static const size_t MAX_BL_LARGE[] = { + 2, 2, 5, 7, 12, 21, 40, 78, 157, 308 +}; + +/* + * Average and standard deviation for the maximum size (in bits) of + * coefficients of (f,g), depending on depth. These values are used + * to compute bounds for Babai's reduction. + */ +static const struct { + int avg; + int std; +} BITLENGTH[] = { + { 4, 0 }, + { 11, 1 }, + { 24, 1 }, + { 50, 1 }, + { 102, 1 }, + { 202, 2 }, + { 401, 4 }, + { 794, 5 }, + { 1577, 8 }, + { 3138, 13 }, + { 6308, 25 } +}; + +/* + * Minimal recursion depth at which we rebuild intermediate values + * when reconstructing f and g. + */ +#define DEPTH_INT_FG 4 + +/* + * Compute squared norm of a short vector. Returned value is saturated to + * 2^32-1 if it is not lower than 2^31. + */ +static uint32_t +poly_small_sqnorm(const int8_t *f, unsigned logn) { + size_t n, u; + uint32_t s, ng; + + n = MKN(logn); + s = 0; + ng = 0; + for (u = 0; u < n; u ++) { + int32_t z; + + z = f[u]; + s += (uint32_t)(z * z); + ng |= s; + } + return s | -(ng >> 31); +} + +/* + * Align (upwards) the provided 'data' pointer with regards to 'base' + * so that the offset is a multiple of the size of 'fpr'. + */ +static fpr * +align_fpr(void *base, void *data) { + uint8_t *cb, *cd; + size_t k, km; + + cb = base; + cd = data; + k = (size_t)(cd - cb); + km = k % sizeof(fpr); + if (km) { + k += (sizeof(fpr)) - km; + } + return (fpr *)(cb + k); +} + +/* + * Align (upwards) the provided 'data' pointer with regards to 'base' + * so that the offset is a multiple of the size of 'uint32_t'. + */ +static uint32_t * +align_u32(void *base, void *data) { + uint8_t *cb, *cd; + size_t k, km; + + cb = base; + cd = data; + k = (size_t)(cd - cb); + km = k % sizeof(uint32_t); + if (km) { + k += (sizeof(uint32_t)) - km; + } + return (uint32_t *)(cb + k); +} + +/* + * Convert a small vector to floating point. + */ +static void +poly_small_to_fp(fpr *x, const int8_t *f, unsigned logn) { + size_t n, u; + + n = MKN(logn); + for (u = 0; u < n; u ++) { + x[u] = fpr_of(f[u]); + } +} + +/* + * Input: f,g of degree N = 2^logn; 'depth' is used only to get their + * individual length. + * + * Output: f',g' of degree N/2, with the length for 'depth+1'. + * + * Values are in RNS; input and/or output may also be in NTT. + */ +static void +make_fg_step(uint32_t *data, unsigned logn, unsigned depth, + int in_ntt, int out_ntt) { + size_t n, hn, u; + size_t slen, tlen; + uint32_t *fd, *gd, *fs, *gs, *gm, *igm, *t1; + const small_prime *primes; + + n = (size_t)1 << logn; + hn = n >> 1; + slen = MAX_BL_SMALL[depth]; + tlen = MAX_BL_SMALL[depth + 1]; + primes = PRIMES; + + /* + * Prepare room for the result. + */ + fd = data; + gd = fd + hn * tlen; + fs = gd + hn * tlen; + gs = fs + n * slen; + gm = gs + n * slen; + igm = gm + n; + t1 = igm + n; + memmove(fs, data, 2 * n * slen * sizeof * data); + + /* + * First slen words: we use the input values directly, and apply + * inverse NTT as we go. + */ + for (u = 0; u < slen; u ++) { + uint32_t p, p0i, R2; + size_t v; + uint32_t *x; + + p = primes[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + modp_mkgm2(gm, igm, logn, primes[u].g, p, p0i); + + for (v = 0, x = fs + u; v < n; v ++, x += slen) { + t1[v] = *x; + } + if (!in_ntt) { + modp_NTT2(t1, gm, logn, p, p0i); + } + for (v = 0, x = fd + u; v < hn; v ++, x += tlen) { + uint32_t w0, w1; + + w0 = t1[(v << 1) + 0]; + w1 = t1[(v << 1) + 1]; + *x = modp_montymul( + modp_montymul(w0, w1, p, p0i), R2, p, p0i); + } + if (in_ntt) { + modp_iNTT2_ext(fs + u, slen, igm, logn, p, p0i); + } + + for (v = 0, x = gs + u; v < n; v ++, x += slen) { + t1[v] = *x; + } + if (!in_ntt) { + modp_NTT2(t1, gm, logn, p, p0i); + } + for (v = 0, x = gd + u; v < hn; v ++, x += tlen) { + uint32_t w0, w1; + + w0 = t1[(v << 1) + 0]; + w1 = t1[(v << 1) + 1]; + *x = modp_montymul( + modp_montymul(w0, w1, p, p0i), R2, p, p0i); + } + if (in_ntt) { + modp_iNTT2_ext(gs + u, slen, igm, logn, p, p0i); + } + + if (!out_ntt) { + modp_iNTT2_ext(fd + u, tlen, igm, logn - 1, p, p0i); + modp_iNTT2_ext(gd + u, tlen, igm, logn - 1, p, p0i); + } + } + + /* + * Since the fs and gs words have been de-NTTized, we can use the + * CRT to rebuild the values. + */ + zint_rebuild_CRT(fs, slen, slen, n, primes, 1, gm); + zint_rebuild_CRT(gs, slen, slen, n, primes, 1, gm); + + /* + * Remaining words: use modular reductions to extract the values. + */ + for (u = slen; u < tlen; u ++) { + uint32_t p, p0i, R2, Rx; + size_t v; + uint32_t *x; + + p = primes[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + Rx = modp_Rx((unsigned)slen, p, p0i, R2); + modp_mkgm2(gm, igm, logn, primes[u].g, p, p0i); + for (v = 0, x = fs; v < n; v ++, x += slen) { + t1[v] = zint_mod_small_signed(x, slen, p, p0i, R2, Rx); + } + modp_NTT2(t1, gm, logn, p, p0i); + for (v = 0, x = fd + u; v < hn; v ++, x += tlen) { + uint32_t w0, w1; + + w0 = t1[(v << 1) + 0]; + w1 = t1[(v << 1) + 1]; + *x = modp_montymul( + modp_montymul(w0, w1, p, p0i), R2, p, p0i); + } + for (v = 0, x = gs; v < n; v ++, x += slen) { + t1[v] = zint_mod_small_signed(x, slen, p, p0i, R2, Rx); + } + modp_NTT2(t1, gm, logn, p, p0i); + for (v = 0, x = gd + u; v < hn; v ++, x += tlen) { + uint32_t w0, w1; + + w0 = t1[(v << 1) + 0]; + w1 = t1[(v << 1) + 1]; + *x = modp_montymul( + modp_montymul(w0, w1, p, p0i), R2, p, p0i); + } + + if (!out_ntt) { + modp_iNTT2_ext(fd + u, tlen, igm, logn - 1, p, p0i); + modp_iNTT2_ext(gd + u, tlen, igm, logn - 1, p, p0i); + } + } +} + +/* + * Compute f and g at a specific depth, in RNS notation. + * + * Returned values are stored in the data[] array, at slen words per integer. + * + * Conditions: + * 0 <= depth <= logn + * + * Space use in data[]: enough room for any two successive values (f', g', + * f and g). + */ +static void +make_fg(uint32_t *data, const int8_t *f, const int8_t *g, + unsigned logn, unsigned depth, int out_ntt) { + size_t n, u; + uint32_t *ft, *gt, p0; + unsigned d; + const small_prime *primes; + + n = MKN(logn); + ft = data; + gt = ft + n; + primes = PRIMES; + p0 = primes[0].p; + for (u = 0; u < n; u ++) { + ft[u] = modp_set(f[u], p0); + gt[u] = modp_set(g[u], p0); + } + + if (depth == 0 && out_ntt) { + uint32_t *gm, *igm; + uint32_t p, p0i; + + p = primes[0].p; + p0i = modp_ninv31(p); + gm = gt + n; + igm = gm + MKN(logn); + modp_mkgm2(gm, igm, logn, primes[0].g, p, p0i); + modp_NTT2(ft, gm, logn, p, p0i); + modp_NTT2(gt, gm, logn, p, p0i); + return; + } + + if (depth == 0) { + return; + } + if (depth == 1) { + make_fg_step(data, logn, 0, 0, out_ntt); + return; + } + make_fg_step(data, logn, 0, 0, 1); + for (d = 1; d + 1 < depth; d ++) { + make_fg_step(data, logn - d, d, 1, 1); + } + make_fg_step(data, logn - depth + 1, depth - 1, 1, out_ntt); +} + +/* + * Solving the NTRU equation, deepest level: compute the resultants of + * f and g with X^N+1, and use binary GCD. The F and G values are + * returned in tmp[]. + * + * Returned value: 1 on success, 0 on error. + */ +static int +solve_NTRU_deepest(unsigned logn_top, + const int8_t *f, const int8_t *g, uint32_t *tmp) { + size_t len; + uint32_t *Fp, *Gp, *fp, *gp, *t1, q; + const small_prime *primes; + + len = MAX_BL_SMALL[logn_top]; + primes = PRIMES; + + Fp = tmp; + Gp = Fp + len; + fp = Gp + len; + gp = fp + len; + t1 = gp + len; + + make_fg(fp, f, g, logn_top, logn_top, 0); + + /* + * We use the CRT to rebuild the resultants as big integers. + * There are two such big integers. The resultants are always + * nonnegative. + */ + zint_rebuild_CRT(fp, len, len, 2, primes, 0, t1); + + /* + * Apply the binary GCD. The zint_bezout() function works only + * if both inputs are odd. + * + * We can test on the result and return 0 because that would + * imply failure of the NTRU solving equation, and the (f,g) + * values will be abandoned in that case. + */ + if (!zint_bezout(Gp, Fp, fp, gp, len, t1)) { + return 0; + } + + /* + * Multiply the two values by the target value q. Values must + * fit in the destination arrays. + * We can again test on the returned words: a non-zero output + * of zint_mul_small() means that we exceeded our array + * capacity, and that implies failure and rejection of (f,g). + */ + q = 12289; + if (zint_mul_small(Fp, len, q) != 0 + || zint_mul_small(Gp, len, q) != 0) { + return 0; + } + + return 1; +} + +/* + * Solving the NTRU equation, intermediate level. Upon entry, the F and G + * from the previous level should be in the tmp[] array. + * This function MAY be invoked for the top-level (in which case depth = 0). + * + * Returned value: 1 on success, 0 on error. + */ +static int +solve_NTRU_intermediate(unsigned logn_top, + const int8_t *f, const int8_t *g, unsigned depth, uint32_t *tmp) { + /* + * In this function, 'logn' is the log2 of the degree for + * this step. If N = 2^logn, then: + * - the F and G values already in fk->tmp (from the deeper + * levels) have degree N/2; + * - this function should return F and G of degree N. + */ + unsigned logn; + size_t n, hn, slen, dlen, llen, rlen, FGlen, u; + uint32_t *Fd, *Gd, *Ft, *Gt, *ft, *gt, *t1; + fpr *rt1, *rt2, *rt3, *rt4, *rt5; + int scale_fg, minbl_fg, maxbl_fg, maxbl_FG, scale_k; + uint32_t *x, *y; + int32_t *k; + const small_prime *primes; + + logn = logn_top - depth; + n = (size_t)1 << logn; + hn = n >> 1; + + /* + * slen = size for our input f and g; also size of the reduced + * F and G we return (degree N) + * + * dlen = size of the F and G obtained from the deeper level + * (degree N/2 or N/3) + * + * llen = size for intermediary F and G before reduction (degree N) + * + * We build our non-reduced F and G as two independent halves each, + * of degree N/2 (F = F0 + X*F1, G = G0 + X*G1). + */ + slen = MAX_BL_SMALL[depth]; + dlen = MAX_BL_SMALL[depth + 1]; + llen = MAX_BL_LARGE[depth]; + primes = PRIMES; + + /* + * Fd and Gd are the F and G from the deeper level. + */ + Fd = tmp; + Gd = Fd + dlen * hn; + + /* + * Compute the input f and g for this level. Note that we get f + * and g in RNS + NTT representation. + */ + ft = Gd + dlen * hn; + make_fg(ft, f, g, logn_top, depth, 1); + + /* + * Move the newly computed f and g to make room for our candidate + * F and G (unreduced). + */ + Ft = tmp; + Gt = Ft + n * llen; + t1 = Gt + n * llen; + memmove(t1, ft, 2 * n * slen * sizeof * ft); + ft = t1; + gt = ft + slen * n; + t1 = gt + slen * n; + + /* + * Move Fd and Gd _after_ f and g. + */ + memmove(t1, Fd, 2 * hn * dlen * sizeof * Fd); + Fd = t1; + Gd = Fd + hn * dlen; + + /* + * We reduce Fd and Gd modulo all the small primes we will need, + * and store the values in Ft and Gt (only n/2 values in each). + */ + for (u = 0; u < llen; u ++) { + uint32_t p, p0i, R2, Rx; + size_t v; + uint32_t *xs, *ys, *xd, *yd; + + p = primes[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + Rx = modp_Rx((unsigned)dlen, p, p0i, R2); + for (v = 0, xs = Fd, ys = Gd, xd = Ft + u, yd = Gt + u; + v < hn; + v ++, xs += dlen, ys += dlen, xd += llen, yd += llen) { + *xd = zint_mod_small_signed(xs, dlen, p, p0i, R2, Rx); + *yd = zint_mod_small_signed(ys, dlen, p, p0i, R2, Rx); + } + } + + /* + * We do not need Fd and Gd after that point. + */ + + /* + * Compute our F and G modulo sufficiently many small primes. + */ + for (u = 0; u < llen; u ++) { + uint32_t p, p0i, R2; + uint32_t *gm, *igm, *fx, *gx, *Fp, *Gp; + size_t v; + + /* + * All computations are done modulo p. + */ + p = primes[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + + /* + * If we processed slen words, then f and g have been + * de-NTTized, and are in RNS; we can rebuild them. + */ + if (u == slen) { + zint_rebuild_CRT(ft, slen, slen, n, primes, 1, t1); + zint_rebuild_CRT(gt, slen, slen, n, primes, 1, t1); + } + + gm = t1; + igm = gm + n; + fx = igm + n; + gx = fx + n; + + modp_mkgm2(gm, igm, logn, primes[u].g, p, p0i); + + if (u < slen) { + for (v = 0, x = ft + u, y = gt + u; + v < n; v ++, x += slen, y += slen) { + fx[v] = *x; + gx[v] = *y; + } + modp_iNTT2_ext(ft + u, slen, igm, logn, p, p0i); + modp_iNTT2_ext(gt + u, slen, igm, logn, p, p0i); + } else { + uint32_t Rx; + + Rx = modp_Rx((unsigned)slen, p, p0i, R2); + for (v = 0, x = ft, y = gt; + v < n; v ++, x += slen, y += slen) { + fx[v] = zint_mod_small_signed(x, slen, + p, p0i, R2, Rx); + gx[v] = zint_mod_small_signed(y, slen, + p, p0i, R2, Rx); + } + modp_NTT2(fx, gm, logn, p, p0i); + modp_NTT2(gx, gm, logn, p, p0i); + } + + /* + * Get F' and G' modulo p and in NTT representation + * (they have degree n/2). These values were computed in + * a previous step, and stored in Ft and Gt. + */ + Fp = gx + n; + Gp = Fp + hn; + for (v = 0, x = Ft + u, y = Gt + u; + v < hn; v ++, x += llen, y += llen) { + Fp[v] = *x; + Gp[v] = *y; + } + modp_NTT2(Fp, gm, logn - 1, p, p0i); + modp_NTT2(Gp, gm, logn - 1, p, p0i); + + /* + * Compute our F and G modulo p. + * + * General case: + * + * we divide degree by d = 2 or 3 + * f'(x^d) = N(f)(x^d) = f * adj(f) + * g'(x^d) = N(g)(x^d) = g * adj(g) + * f'*G' - g'*F' = q + * F = F'(x^d) * adj(g) + * G = G'(x^d) * adj(f) + * + * We compute things in the NTT. We group roots of phi + * such that all roots x in a group share the same x^d. + * If the roots in a group are x_1, x_2... x_d, then: + * + * N(f)(x_1^d) = f(x_1)*f(x_2)*...*f(x_d) + * + * Thus, we have: + * + * G(x_1) = f(x_2)*f(x_3)*...*f(x_d)*G'(x_1^d) + * G(x_2) = f(x_1)*f(x_3)*...*f(x_d)*G'(x_1^d) + * ... + * G(x_d) = f(x_1)*f(x_2)*...*f(x_{d-1})*G'(x_1^d) + * + * In all cases, we can thus compute F and G in NTT + * representation by a few simple multiplications. + * Moreover, in our chosen NTT representation, roots + * from the same group are consecutive in RAM. + */ + for (v = 0, x = Ft + u, y = Gt + u; v < hn; + v ++, x += (llen << 1), y += (llen << 1)) { + uint32_t ftA, ftB, gtA, gtB; + uint32_t mFp, mGp; + + ftA = fx[(v << 1) + 0]; + ftB = fx[(v << 1) + 1]; + gtA = gx[(v << 1) + 0]; + gtB = gx[(v << 1) + 1]; + mFp = modp_montymul(Fp[v], R2, p, p0i); + mGp = modp_montymul(Gp[v], R2, p, p0i); + x[0] = modp_montymul(gtB, mFp, p, p0i); + x[llen] = modp_montymul(gtA, mFp, p, p0i); + y[0] = modp_montymul(ftB, mGp, p, p0i); + y[llen] = modp_montymul(ftA, mGp, p, p0i); + } + modp_iNTT2_ext(Ft + u, llen, igm, logn, p, p0i); + modp_iNTT2_ext(Gt + u, llen, igm, logn, p, p0i); + } + + /* + * Rebuild F and G with the CRT. + */ + zint_rebuild_CRT(Ft, llen, llen, n, primes, 1, t1); + zint_rebuild_CRT(Gt, llen, llen, n, primes, 1, t1); + + /* + * At that point, Ft, Gt, ft and gt are consecutive in RAM (in that + * order). + */ + + /* + * Apply Babai reduction to bring back F and G to size slen. + * + * We use the FFT to compute successive approximations of the + * reduction coefficient. We first isolate the top bits of + * the coefficients of f and g, and convert them to floating + * point; with the FFT, we compute adj(f), adj(g), and + * 1/(f*adj(f)+g*adj(g)). + * + * Then, we repeatedly apply the following: + * + * - Get the top bits of the coefficients of F and G into + * floating point, and use the FFT to compute: + * (F*adj(f)+G*adj(g))/(f*adj(f)+g*adj(g)) + * + * - Convert back that value into normal representation, and + * round it to the nearest integers, yielding a polynomial k. + * Proper scaling is applied to f, g, F and G so that the + * coefficients fit on 32 bits (signed). + * + * - Subtract k*f from F and k*g from G. + * + * Under normal conditions, this process reduces the size of F + * and G by some bits at each iteration. For constant-time + * operation, we do not want to measure the actual length of + * F and G; instead, we do the following: + * + * - f and g are converted to floating-point, with some scaling + * if necessary to keep values in the representable range. + * + * - For each iteration, we _assume_ a maximum size for F and G, + * and use the values at that size. If we overreach, then + * we get zeros, which is harmless: the resulting coefficients + * of k will be 0 and the value won't be reduced. + * + * - We conservatively assume that F and G will be reduced by + * at least 25 bits at each iteration. + * + * Even when reaching the bottom of the reduction, reduction + * coefficient will remain low. If it goes out-of-range, then + * something wrong occurred and the whole NTRU solving fails. + */ + + /* + * Memory layout: + * - We need to compute and keep adj(f), adj(g), and + * 1/(f*adj(f)+g*adj(g)) (sizes N, N and N/2 fp numbers, + * respectively). + * - At each iteration we need two extra fp buffer (N fp values), + * and produce a k (N 32-bit words). k will be shared with one + * of the fp buffers. + * - To compute k*f and k*g efficiently (with the NTT), we need + * some extra room; we reuse the space of the temporary buffers. + * + * Arrays of 'fpr' are obtained from the temporary array itself. + * We ensure that the base is at a properly aligned offset (the + * source array tmp[] is supposed to be already aligned). + */ + + rt3 = align_fpr(tmp, t1); + rt4 = rt3 + n; + rt5 = rt4 + n; + rt1 = rt5 + (n >> 1); + k = (int32_t *)align_u32(tmp, rt1); + rt2 = align_fpr(tmp, k + n); + if (rt2 < (rt1 + n)) { + rt2 = rt1 + n; + } + t1 = (uint32_t *)k + n; + + /* + * Get f and g into rt3 and rt4 as floating-point approximations. + * + * We need to "scale down" the floating-point representation of + * coefficients when they are too big. We want to keep the value + * below 2^310 or so. Thus, when values are larger than 10 words, + * we consider only the top 10 words. Array lengths have been + * computed so that average maximum length will fall in the + * middle or the upper half of these top 10 words. + */ + rlen = slen; + if (rlen > 10) { + rlen = 10; + } + poly_big_to_fp(rt3, ft + slen - rlen, rlen, slen, logn); + poly_big_to_fp(rt4, gt + slen - rlen, rlen, slen, logn); + + /* + * Values in rt3 and rt4 are downscaled by 2^(scale_fg). + */ + scale_fg = 31 * (int)(slen - rlen); + + /* + * Estimated boundaries for the maximum size (in bits) of the + * coefficients of (f,g). We use the measured average, and + * allow for a deviation of at most six times the standard + * deviation. + */ + minbl_fg = BITLENGTH[depth].avg - 6 * BITLENGTH[depth].std; + maxbl_fg = BITLENGTH[depth].avg + 6 * BITLENGTH[depth].std; + + /* + * Compute 1/(f*adj(f)+g*adj(g)) in rt5. We also keep adj(f) + * and adj(g) in rt3 and rt4, respectively. + */ + PQCLEAN_FALCON512_AVX2_FFT(rt3, logn); + PQCLEAN_FALCON512_AVX2_FFT(rt4, logn); + PQCLEAN_FALCON512_AVX2_poly_invnorm2_fft(rt5, rt3, rt4, logn); + PQCLEAN_FALCON512_AVX2_poly_adj_fft(rt3, logn); + PQCLEAN_FALCON512_AVX2_poly_adj_fft(rt4, logn); + + /* + * Reduce F and G repeatedly. + * + * The expected maximum bit length of coefficients of F and G + * is kept in maxbl_FG, with the corresponding word length in + * FGlen. + */ + FGlen = llen; + maxbl_FG = 31 * (int)llen; + + /* + * Each reduction operation computes the reduction polynomial + * "k". We need that polynomial to have coefficients that fit + * on 32-bit signed integers, with some scaling; thus, we use + * a descending sequence of scaling values, down to zero. + * + * The size of the coefficients of k is (roughly) the difference + * between the size of the coefficients of (F,G) and the size + * of the coefficients of (f,g). Thus, the maximum size of the + * coefficients of k is, at the start, maxbl_FG - minbl_fg; + * this is our starting scale value for k. + * + * We need to estimate the size of (F,G) during the execution of + * the algorithm; we are allowed some overestimation but not too + * much (poly_big_to_fp() uses a 310-bit window). Generally + * speaking, after applying a reduction with k scaled to + * scale_k, the size of (F,G) will be size(f,g) + scale_k + dd, + * where 'dd' is a few bits to account for the fact that the + * reduction is never perfect (intuitively, dd is on the order + * of sqrt(N), so at most 5 bits; we here allow for 10 extra + * bits). + * + * The size of (f,g) is not known exactly, but maxbl_fg is an + * upper bound. + */ + scale_k = maxbl_FG - minbl_fg; + + for (;;) { + int scale_FG, dc, new_maxbl_FG; + uint32_t scl, sch; + fpr pdc, pt; + + /* + * Convert current F and G into floating-point. We apply + * scaling if the current length is more than 10 words. + */ + rlen = FGlen; + if (rlen > 10) { + rlen = 10; + } + scale_FG = 31 * (int)(FGlen - rlen); + poly_big_to_fp(rt1, Ft + FGlen - rlen, rlen, llen, logn); + poly_big_to_fp(rt2, Gt + FGlen - rlen, rlen, llen, logn); + + /* + * Compute (F*adj(f)+G*adj(g))/(f*adj(f)+g*adj(g)) in rt2. + */ + PQCLEAN_FALCON512_AVX2_FFT(rt1, logn); + PQCLEAN_FALCON512_AVX2_FFT(rt2, logn); + PQCLEAN_FALCON512_AVX2_poly_mul_fft(rt1, rt3, logn); + PQCLEAN_FALCON512_AVX2_poly_mul_fft(rt2, rt4, logn); + PQCLEAN_FALCON512_AVX2_poly_add(rt2, rt1, logn); + PQCLEAN_FALCON512_AVX2_poly_mul_autoadj_fft(rt2, rt5, logn); + PQCLEAN_FALCON512_AVX2_iFFT(rt2, logn); + + /* + * (f,g) are scaled by 'scale_fg', meaning that the + * numbers in rt3/rt4 should be multiplied by 2^(scale_fg) + * to have their true mathematical value. + * + * (F,G) are similarly scaled by 'scale_FG'. Therefore, + * the value we computed in rt2 is scaled by + * 'scale_FG-scale_fg'. + * + * We want that value to be scaled by 'scale_k', hence we + * apply a corrective scaling. After scaling, the values + * should fit in -2^31-1..+2^31-1. + */ + dc = scale_k - scale_FG + scale_fg; + + /* + * We will need to multiply values by 2^(-dc). The value + * 'dc' is not secret, so we can compute 2^(-dc) with a + * non-constant-time process. + * (We could use ldexp(), but we prefer to avoid any + * dependency on libm. When using FP emulation, we could + * use our fpr_ldexp(), which is constant-time.) + */ + if (dc < 0) { + dc = -dc; + pt = fpr_two; + } else { + pt = fpr_onehalf; + } + pdc = fpr_one; + while (dc != 0) { + if ((dc & 1) != 0) { + pdc = fpr_mul(pdc, pt); + } + dc >>= 1; + pt = fpr_sqr(pt); + } + + for (u = 0; u < n; u ++) { + fpr xv; + + xv = fpr_mul(rt2[u], pdc); + + /* + * Sometimes the values can be out-of-bounds if + * the algorithm fails; we must not call + * fpr_rint() (and cast to int32_t) if the value + * is not in-bounds. Note that the test does not + * break constant-time discipline, since any + * failure here implies that we discard the current + * secret key (f,g). + */ + if (!fpr_lt(fpr_mtwo31m1, xv) + || !fpr_lt(xv, fpr_ptwo31m1)) { + return 0; + } + k[u] = (int32_t)fpr_rint(xv); + } + + /* + * Values in k[] are integers. They really are scaled + * down by maxbl_FG - minbl_fg bits. + * + * If we are at low depth, then we use the NTT to + * compute k*f and k*g. + */ + sch = (uint32_t)(scale_k / 31); + scl = (uint32_t)(scale_k % 31); + if (depth <= DEPTH_INT_FG) { + poly_sub_scaled_ntt(Ft, FGlen, llen, ft, slen, slen, + k, sch, scl, logn, t1); + poly_sub_scaled_ntt(Gt, FGlen, llen, gt, slen, slen, + k, sch, scl, logn, t1); + } else { + poly_sub_scaled(Ft, FGlen, llen, ft, slen, slen, + k, sch, scl, logn); + poly_sub_scaled(Gt, FGlen, llen, gt, slen, slen, + k, sch, scl, logn); + } + + /* + * We compute the new maximum size of (F,G), assuming that + * (f,g) has _maximal_ length (i.e. that reduction is + * "late" instead of "early". We also adjust FGlen + * accordingly. + */ + new_maxbl_FG = scale_k + maxbl_fg + 10; + if (new_maxbl_FG < maxbl_FG) { + maxbl_FG = new_maxbl_FG; + if ((int)FGlen * 31 >= maxbl_FG + 31) { + FGlen --; + } + } + + /* + * We suppose that scaling down achieves a reduction by + * at least 25 bits per iteration. We stop when we have + * done the loop with an unscaled k. + */ + if (scale_k <= 0) { + break; + } + scale_k -= 25; + if (scale_k < 0) { + scale_k = 0; + } + } + + /* + * If (F,G) length was lowered below 'slen', then we must take + * care to re-extend the sign. + */ + if (FGlen < slen) { + for (u = 0; u < n; u ++, Ft += llen, Gt += llen) { + size_t v; + uint32_t sw; + + sw = -(Ft[FGlen - 1] >> 30) >> 1; + for (v = FGlen; v < slen; v ++) { + Ft[v] = sw; + } + sw = -(Gt[FGlen - 1] >> 30) >> 1; + for (v = FGlen; v < slen; v ++) { + Gt[v] = sw; + } + } + } + + /* + * Compress encoding of all values to 'slen' words (this is the + * expected output format). + */ + for (u = 0, x = tmp, y = tmp; + u < (n << 1); u ++, x += slen, y += llen) { + memmove(x, y, slen * sizeof * y); + } + return 1; +} + +/* + * Solving the NTRU equation, binary case, depth = 1. Upon entry, the + * F and G from the previous level should be in the tmp[] array. + * + * Returned value: 1 on success, 0 on error. + */ +static int +solve_NTRU_binary_depth1(unsigned logn_top, + const int8_t *f, const int8_t *g, uint32_t *tmp) { + /* + * The first half of this function is a copy of the corresponding + * part in solve_NTRU_intermediate(), for the reconstruction of + * the unreduced F and G. The second half (Babai reduction) is + * done differently, because the unreduced F and G fit in 53 bits + * of precision, allowing a much simpler process with lower RAM + * usage. + */ + unsigned depth, logn; + size_t n_top, n, hn, slen, dlen, llen, u; + uint32_t *Fd, *Gd, *Ft, *Gt, *ft, *gt, *t1; + fpr *rt1, *rt2, *rt3, *rt4, *rt5, *rt6; + uint32_t *x, *y; + + depth = 1; + n_top = (size_t)1 << logn_top; + logn = logn_top - depth; + n = (size_t)1 << logn; + hn = n >> 1; + + /* + * Equations are: + * + * f' = f0^2 - X^2*f1^2 + * g' = g0^2 - X^2*g1^2 + * F' and G' are a solution to f'G' - g'F' = q (from deeper levels) + * F = F'*(g0 - X*g1) + * G = G'*(f0 - X*f1) + * + * f0, f1, g0, g1, f', g', F' and G' are all "compressed" to + * degree N/2 (their odd-indexed coefficients are all zero). + */ + + /* + * slen = size for our input f and g; also size of the reduced + * F and G we return (degree N) + * + * dlen = size of the F and G obtained from the deeper level + * (degree N/2) + * + * llen = size for intermediary F and G before reduction (degree N) + * + * We build our non-reduced F and G as two independent halves each, + * of degree N/2 (F = F0 + X*F1, G = G0 + X*G1). + */ + slen = MAX_BL_SMALL[depth]; + dlen = MAX_BL_SMALL[depth + 1]; + llen = MAX_BL_LARGE[depth]; + + /* + * Fd and Gd are the F and G from the deeper level. Ft and Gt + * are the destination arrays for the unreduced F and G. + */ + Fd = tmp; + Gd = Fd + dlen * hn; + Ft = Gd + dlen * hn; + Gt = Ft + llen * n; + + /* + * We reduce Fd and Gd modulo all the small primes we will need, + * and store the values in Ft and Gt. + */ + for (u = 0; u < llen; u ++) { + uint32_t p, p0i, R2, Rx; + size_t v; + uint32_t *xs, *ys, *xd, *yd; + + p = PRIMES[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + Rx = modp_Rx((unsigned)dlen, p, p0i, R2); + for (v = 0, xs = Fd, ys = Gd, xd = Ft + u, yd = Gt + u; + v < hn; + v ++, xs += dlen, ys += dlen, xd += llen, yd += llen) { + *xd = zint_mod_small_signed(xs, dlen, p, p0i, R2, Rx); + *yd = zint_mod_small_signed(ys, dlen, p, p0i, R2, Rx); + } + } + + /* + * Now Fd and Gd are not needed anymore; we can squeeze them out. + */ + memmove(tmp, Ft, llen * n * sizeof(uint32_t)); + Ft = tmp; + memmove(Ft + llen * n, Gt, llen * n * sizeof(uint32_t)); + Gt = Ft + llen * n; + ft = Gt + llen * n; + gt = ft + slen * n; + + t1 = gt + slen * n; + + /* + * Compute our F and G modulo sufficiently many small primes. + */ + for (u = 0; u < llen; u ++) { + uint32_t p, p0i, R2; + uint32_t *gm, *igm, *fx, *gx, *Fp, *Gp; + unsigned e; + size_t v; + + /* + * All computations are done modulo p. + */ + p = PRIMES[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + + /* + * We recompute things from the source f and g, of full + * degree. However, we will need only the n first elements + * of the inverse NTT table (igm); the call to modp_mkgm() + * below will fill n_top elements in igm[] (thus overflowing + * into fx[]) but later code will overwrite these extra + * elements. + */ + gm = t1; + igm = gm + n_top; + fx = igm + n; + gx = fx + n_top; + modp_mkgm2(gm, igm, logn_top, PRIMES[u].g, p, p0i); + + /* + * Set ft and gt to f and g modulo p, respectively. + */ + for (v = 0; v < n_top; v ++) { + fx[v] = modp_set(f[v], p); + gx[v] = modp_set(g[v], p); + } + + /* + * Convert to NTT and compute our f and g. + */ + modp_NTT2(fx, gm, logn_top, p, p0i); + modp_NTT2(gx, gm, logn_top, p, p0i); + for (e = logn_top; e > logn; e --) { + modp_poly_rec_res(fx, e, p, p0i, R2); + modp_poly_rec_res(gx, e, p, p0i, R2); + } + + /* + * From that point onward, we only need tables for + * degree n, so we can save some space. + */ + if (depth > 0) { /* always true */ + memmove(gm + n, igm, n * sizeof * igm); + igm = gm + n; + memmove(igm + n, fx, n * sizeof * ft); + fx = igm + n; + memmove(fx + n, gx, n * sizeof * gt); + gx = fx + n; + } + + /* + * Get F' and G' modulo p and in NTT representation + * (they have degree n/2). These values were computed + * in a previous step, and stored in Ft and Gt. + */ + Fp = gx + n; + Gp = Fp + hn; + for (v = 0, x = Ft + u, y = Gt + u; + v < hn; v ++, x += llen, y += llen) { + Fp[v] = *x; + Gp[v] = *y; + } + modp_NTT2(Fp, gm, logn - 1, p, p0i); + modp_NTT2(Gp, gm, logn - 1, p, p0i); + + /* + * Compute our F and G modulo p. + * + * Equations are: + * + * f'(x^2) = N(f)(x^2) = f * adj(f) + * g'(x^2) = N(g)(x^2) = g * adj(g) + * + * f'*G' - g'*F' = q + * + * F = F'(x^2) * adj(g) + * G = G'(x^2) * adj(f) + * + * The NTT representation of f is f(w) for all w which + * are roots of phi. In the binary case, as well as in + * the ternary case for all depth except the deepest, + * these roots can be grouped in pairs (w,-w), and we + * then have: + * + * f(w) = adj(f)(-w) + * f(-w) = adj(f)(w) + * + * and w^2 is then a root for phi at the half-degree. + * + * At the deepest level in the ternary case, this still + * holds, in the following sense: the roots of x^2-x+1 + * are (w,-w^2) (for w^3 = -1, and w != -1), and we + * have: + * + * f(w) = adj(f)(-w^2) + * f(-w^2) = adj(f)(w) + * + * In all case, we can thus compute F and G in NTT + * representation by a few simple multiplications. + * Moreover, the two roots for each pair are consecutive + * in our bit-reversal encoding. + */ + for (v = 0, x = Ft + u, y = Gt + u; + v < hn; v ++, x += (llen << 1), y += (llen << 1)) { + uint32_t ftA, ftB, gtA, gtB; + uint32_t mFp, mGp; + + ftA = fx[(v << 1) + 0]; + ftB = fx[(v << 1) + 1]; + gtA = gx[(v << 1) + 0]; + gtB = gx[(v << 1) + 1]; + mFp = modp_montymul(Fp[v], R2, p, p0i); + mGp = modp_montymul(Gp[v], R2, p, p0i); + x[0] = modp_montymul(gtB, mFp, p, p0i); + x[llen] = modp_montymul(gtA, mFp, p, p0i); + y[0] = modp_montymul(ftB, mGp, p, p0i); + y[llen] = modp_montymul(ftA, mGp, p, p0i); + } + modp_iNTT2_ext(Ft + u, llen, igm, logn, p, p0i); + modp_iNTT2_ext(Gt + u, llen, igm, logn, p, p0i); + + /* + * Also save ft and gt (only up to size slen). + */ + if (u < slen) { + modp_iNTT2(fx, igm, logn, p, p0i); + modp_iNTT2(gx, igm, logn, p, p0i); + for (v = 0, x = ft + u, y = gt + u; + v < n; v ++, x += slen, y += slen) { + *x = fx[v]; + *y = gx[v]; + } + } + } + + /* + * Rebuild f, g, F and G with the CRT. Note that the elements of F + * and G are consecutive, and thus can be rebuilt in a single + * loop; similarly, the elements of f and g are consecutive. + */ + zint_rebuild_CRT(Ft, llen, llen, n << 1, PRIMES, 1, t1); + zint_rebuild_CRT(ft, slen, slen, n << 1, PRIMES, 1, t1); + + /* + * Here starts the Babai reduction, specialized for depth = 1. + * + * Candidates F and G (from Ft and Gt), and base f and g (ft and gt), + * are converted to floating point. There is no scaling, and a + * single pass is sufficient. + */ + + /* + * Convert F and G into floating point (rt1 and rt2). + */ + rt1 = align_fpr(tmp, gt + slen * n); + rt2 = rt1 + n; + poly_big_to_fp(rt1, Ft, llen, llen, logn); + poly_big_to_fp(rt2, Gt, llen, llen, logn); + + /* + * Integer representation of F and G is no longer needed, we + * can remove it. + */ + memmove(tmp, ft, 2 * slen * n * sizeof * ft); + ft = tmp; + gt = ft + slen * n; + rt3 = align_fpr(tmp, gt + slen * n); + memmove(rt3, rt1, 2 * n * sizeof * rt1); + rt1 = rt3; + rt2 = rt1 + n; + rt3 = rt2 + n; + rt4 = rt3 + n; + + /* + * Convert f and g into floating point (rt3 and rt4). + */ + poly_big_to_fp(rt3, ft, slen, slen, logn); + poly_big_to_fp(rt4, gt, slen, slen, logn); + + /* + * Remove unneeded ft and gt. + */ + memmove(tmp, rt1, 4 * n * sizeof * rt1); + rt1 = (fpr *)tmp; + rt2 = rt1 + n; + rt3 = rt2 + n; + rt4 = rt3 + n; + + /* + * We now have: + * rt1 = F + * rt2 = G + * rt3 = f + * rt4 = g + * in that order in RAM. We convert all of them to FFT. + */ + PQCLEAN_FALCON512_AVX2_FFT(rt1, logn); + PQCLEAN_FALCON512_AVX2_FFT(rt2, logn); + PQCLEAN_FALCON512_AVX2_FFT(rt3, logn); + PQCLEAN_FALCON512_AVX2_FFT(rt4, logn); + + /* + * Compute: + * rt5 = F*adj(f) + G*adj(g) + * rt6 = 1 / (f*adj(f) + g*adj(g)) + * (Note that rt6 is half-length.) + */ + rt5 = rt4 + n; + rt6 = rt5 + n; + PQCLEAN_FALCON512_AVX2_poly_add_muladj_fft(rt5, rt1, rt2, rt3, rt4, logn); + PQCLEAN_FALCON512_AVX2_poly_invnorm2_fft(rt6, rt3, rt4, logn); + + /* + * Compute: + * rt5 = (F*adj(f)+G*adj(g)) / (f*adj(f)+g*adj(g)) + */ + PQCLEAN_FALCON512_AVX2_poly_mul_autoadj_fft(rt5, rt6, logn); + + /* + * Compute k as the rounded version of rt5. Check that none of + * the values is larger than 2^63-1 (in absolute value) + * because that would make the fpr_rint() do something undefined; + * note that any out-of-bounds value here implies a failure and + * (f,g) will be discarded, so we can make a simple test. + */ + PQCLEAN_FALCON512_AVX2_iFFT(rt5, logn); + for (u = 0; u < n; u ++) { + fpr z; + + z = rt5[u]; + if (!fpr_lt(z, fpr_ptwo63m1) || !fpr_lt(fpr_mtwo63m1, z)) { + return 0; + } + rt5[u] = fpr_of(fpr_rint(z)); + } + PQCLEAN_FALCON512_AVX2_FFT(rt5, logn); + + /* + * Subtract k*f from F, and k*g from G. + */ + PQCLEAN_FALCON512_AVX2_poly_mul_fft(rt3, rt5, logn); + PQCLEAN_FALCON512_AVX2_poly_mul_fft(rt4, rt5, logn); + PQCLEAN_FALCON512_AVX2_poly_sub(rt1, rt3, logn); + PQCLEAN_FALCON512_AVX2_poly_sub(rt2, rt4, logn); + PQCLEAN_FALCON512_AVX2_iFFT(rt1, logn); + PQCLEAN_FALCON512_AVX2_iFFT(rt2, logn); + + /* + * Convert back F and G to integers, and return. + */ + Ft = tmp; + Gt = Ft + n; + rt3 = align_fpr(tmp, Gt + n); + memmove(rt3, rt1, 2 * n * sizeof * rt1); + rt1 = rt3; + rt2 = rt1 + n; + for (u = 0; u < n; u ++) { + Ft[u] = (uint32_t)fpr_rint(rt1[u]); + Gt[u] = (uint32_t)fpr_rint(rt2[u]); + } + + return 1; +} + +/* + * Solving the NTRU equation, top level. Upon entry, the F and G + * from the previous level should be in the tmp[] array. + * + * Returned value: 1 on success, 0 on error. + */ +static int +solve_NTRU_binary_depth0(unsigned logn, + const int8_t *f, const int8_t *g, uint32_t *tmp) { + size_t n, hn, u; + uint32_t p, p0i, R2; + uint32_t *Fp, *Gp, *t1, *t2, *t3, *t4, *t5; + uint32_t *gm, *igm, *ft, *gt; + fpr *rt2, *rt3; + + n = (size_t)1 << logn; + hn = n >> 1; + + /* + * Equations are: + * + * f' = f0^2 - X^2*f1^2 + * g' = g0^2 - X^2*g1^2 + * F' and G' are a solution to f'G' - g'F' = q (from deeper levels) + * F = F'*(g0 - X*g1) + * G = G'*(f0 - X*f1) + * + * f0, f1, g0, g1, f', g', F' and G' are all "compressed" to + * degree N/2 (their odd-indexed coefficients are all zero). + * + * Everything should fit in 31-bit integers, hence we can just use + * the first small prime p = 2147473409. + */ + p = PRIMES[0].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + + Fp = tmp; + Gp = Fp + hn; + ft = Gp + hn; + gt = ft + n; + gm = gt + n; + igm = gm + n; + + modp_mkgm2(gm, igm, logn, PRIMES[0].g, p, p0i); + + /* + * Convert F' anf G' in NTT representation. + */ + for (u = 0; u < hn; u ++) { + Fp[u] = modp_set(zint_one_to_plain(Fp + u), p); + Gp[u] = modp_set(zint_one_to_plain(Gp + u), p); + } + modp_NTT2(Fp, gm, logn - 1, p, p0i); + modp_NTT2(Gp, gm, logn - 1, p, p0i); + + /* + * Load f and g and convert them to NTT representation. + */ + for (u = 0; u < n; u ++) { + ft[u] = modp_set(f[u], p); + gt[u] = modp_set(g[u], p); + } + modp_NTT2(ft, gm, logn, p, p0i); + modp_NTT2(gt, gm, logn, p, p0i); + + /* + * Build the unreduced F,G in ft and gt. + */ + for (u = 0; u < n; u += 2) { + uint32_t ftA, ftB, gtA, gtB; + uint32_t mFp, mGp; + + ftA = ft[u + 0]; + ftB = ft[u + 1]; + gtA = gt[u + 0]; + gtB = gt[u + 1]; + mFp = modp_montymul(Fp[u >> 1], R2, p, p0i); + mGp = modp_montymul(Gp[u >> 1], R2, p, p0i); + ft[u + 0] = modp_montymul(gtB, mFp, p, p0i); + ft[u + 1] = modp_montymul(gtA, mFp, p, p0i); + gt[u + 0] = modp_montymul(ftB, mGp, p, p0i); + gt[u + 1] = modp_montymul(ftA, mGp, p, p0i); + } + modp_iNTT2(ft, igm, logn, p, p0i); + modp_iNTT2(gt, igm, logn, p, p0i); + + Gp = Fp + n; + t1 = Gp + n; + memmove(Fp, ft, 2 * n * sizeof * ft); + + /* + * We now need to apply the Babai reduction. At that point, + * we have F and G in two n-word arrays. + * + * We can compute F*adj(f)+G*adj(g) and f*adj(f)+g*adj(g) + * modulo p, using the NTT. We still move memory around in + * order to save RAM. + */ + t2 = t1 + n; + t3 = t2 + n; + t4 = t3 + n; + t5 = t4 + n; + + /* + * Compute the NTT tables in t1 and t2. We do not keep t2 + * (we'll recompute it later on). + */ + modp_mkgm2(t1, t2, logn, PRIMES[0].g, p, p0i); + + /* + * Convert F and G to NTT. + */ + modp_NTT2(Fp, t1, logn, p, p0i); + modp_NTT2(Gp, t1, logn, p, p0i); + + /* + * Load f and adj(f) in t4 and t5, and convert them to NTT + * representation. + */ + t4[0] = t5[0] = modp_set(f[0], p); + for (u = 1; u < n; u ++) { + t4[u] = modp_set(f[u], p); + t5[n - u] = modp_set(-f[u], p); + } + modp_NTT2(t4, t1, logn, p, p0i); + modp_NTT2(t5, t1, logn, p, p0i); + + /* + * Compute F*adj(f) in t2, and f*adj(f) in t3. + */ + for (u = 0; u < n; u ++) { + uint32_t w; + + w = modp_montymul(t5[u], R2, p, p0i); + t2[u] = modp_montymul(w, Fp[u], p, p0i); + t3[u] = modp_montymul(w, t4[u], p, p0i); + } + + /* + * Load g and adj(g) in t4 and t5, and convert them to NTT + * representation. + */ + t4[0] = t5[0] = modp_set(g[0], p); + for (u = 1; u < n; u ++) { + t4[u] = modp_set(g[u], p); + t5[n - u] = modp_set(-g[u], p); + } + modp_NTT2(t4, t1, logn, p, p0i); + modp_NTT2(t5, t1, logn, p, p0i); + + /* + * Add G*adj(g) to t2, and g*adj(g) to t3. + */ + for (u = 0; u < n; u ++) { + uint32_t w; + + w = modp_montymul(t5[u], R2, p, p0i); + t2[u] = modp_add(t2[u], + modp_montymul(w, Gp[u], p, p0i), p); + t3[u] = modp_add(t3[u], + modp_montymul(w, t4[u], p, p0i), p); + } + + /* + * Convert back t2 and t3 to normal representation (normalized + * around 0), and then + * move them to t1 and t2. We first need to recompute the + * inverse table for NTT. + */ + modp_mkgm2(t1, t4, logn, PRIMES[0].g, p, p0i); + modp_iNTT2(t2, t4, logn, p, p0i); + modp_iNTT2(t3, t4, logn, p, p0i); + for (u = 0; u < n; u ++) { + t1[u] = (uint32_t)modp_norm(t2[u], p); + t2[u] = (uint32_t)modp_norm(t3[u], p); + } + + /* + * At that point, array contents are: + * + * F (NTT representation) (Fp) + * G (NTT representation) (Gp) + * F*adj(f)+G*adj(g) (t1) + * f*adj(f)+g*adj(g) (t2) + * + * We want to divide t1 by t2. The result is not integral; it + * must be rounded. We thus need to use the FFT. + */ + + /* + * Get f*adj(f)+g*adj(g) in FFT representation. Since this + * polynomial is auto-adjoint, all its coordinates in FFT + * representation are actually real, so we can truncate off + * the imaginary parts. + */ + rt3 = align_fpr(tmp, t3); + for (u = 0; u < n; u ++) { + rt3[u] = fpr_of(((int32_t *)t2)[u]); + } + PQCLEAN_FALCON512_AVX2_FFT(rt3, logn); + rt2 = align_fpr(tmp, t2); + memmove(rt2, rt3, hn * sizeof * rt3); + + /* + * Convert F*adj(f)+G*adj(g) in FFT representation. + */ + rt3 = rt2 + hn; + for (u = 0; u < n; u ++) { + rt3[u] = fpr_of(((int32_t *)t1)[u]); + } + PQCLEAN_FALCON512_AVX2_FFT(rt3, logn); + + /* + * Compute (F*adj(f)+G*adj(g))/(f*adj(f)+g*adj(g)) and get + * its rounded normal representation in t1. + */ + PQCLEAN_FALCON512_AVX2_poly_div_autoadj_fft(rt3, rt2, logn); + PQCLEAN_FALCON512_AVX2_iFFT(rt3, logn); + for (u = 0; u < n; u ++) { + t1[u] = modp_set((int32_t)fpr_rint(rt3[u]), p); + } + + /* + * RAM contents are now: + * + * F (NTT representation) (Fp) + * G (NTT representation) (Gp) + * k (t1) + * + * We want to compute F-k*f, and G-k*g. + */ + t2 = t1 + n; + t3 = t2 + n; + t4 = t3 + n; + t5 = t4 + n; + modp_mkgm2(t2, t3, logn, PRIMES[0].g, p, p0i); + for (u = 0; u < n; u ++) { + t4[u] = modp_set(f[u], p); + t5[u] = modp_set(g[u], p); + } + modp_NTT2(t1, t2, logn, p, p0i); + modp_NTT2(t4, t2, logn, p, p0i); + modp_NTT2(t5, t2, logn, p, p0i); + for (u = 0; u < n; u ++) { + uint32_t kw; + + kw = modp_montymul(t1[u], R2, p, p0i); + Fp[u] = modp_sub(Fp[u], + modp_montymul(kw, t4[u], p, p0i), p); + Gp[u] = modp_sub(Gp[u], + modp_montymul(kw, t5[u], p, p0i), p); + } + modp_iNTT2(Fp, t3, logn, p, p0i); + modp_iNTT2(Gp, t3, logn, p, p0i); + for (u = 0; u < n; u ++) { + Fp[u] = (uint32_t)modp_norm(Fp[u], p); + Gp[u] = (uint32_t)modp_norm(Gp[u], p); + } + + return 1; +} + +/* + * Solve the NTRU equation. Returned value is 1 on success, 0 on error. + * G can be NULL, in which case that value is computed but not returned. + * If any of the coefficients of F and G exceeds lim (in absolute value), + * then 0 is returned. + */ +static int +solve_NTRU(unsigned logn, int8_t *F, int8_t *G, + const int8_t *f, const int8_t *g, int lim, uint32_t *tmp) { + size_t n, u; + uint32_t *ft, *gt, *Ft, *Gt, *gm; + uint32_t p, p0i, r; + const small_prime *primes; + + n = MKN(logn); + + if (!solve_NTRU_deepest(logn, f, g, tmp)) { + return 0; + } + + /* + * For logn <= 2, we need to use solve_NTRU_intermediate() + * directly, because coefficients are a bit too large and + * do not fit the hypotheses in solve_NTRU_binary_depth0(). + */ + if (logn <= 2) { + unsigned depth; + + depth = logn; + while (depth -- > 0) { + if (!solve_NTRU_intermediate(logn, f, g, depth, tmp)) { + return 0; + } + } + } else { + unsigned depth; + + depth = logn; + while (depth -- > 2) { + if (!solve_NTRU_intermediate(logn, f, g, depth, tmp)) { + return 0; + } + } + if (!solve_NTRU_binary_depth1(logn, f, g, tmp)) { + return 0; + } + if (!solve_NTRU_binary_depth0(logn, f, g, tmp)) { + return 0; + } + } + + /* + * If no buffer has been provided for G, use a temporary one. + */ + if (G == NULL) { + G = (int8_t *)(tmp + 2 * n); + } + + /* + * Final F and G are in fk->tmp, one word per coefficient + * (signed value over 31 bits). + */ + if (!poly_big_to_small(F, tmp, lim, logn) + || !poly_big_to_small(G, tmp + n, lim, logn)) { + return 0; + } + + /* + * Verify that the NTRU equation is fulfilled. Since all elements + * have short lengths, verifying modulo a small prime p works, and + * allows using the NTT. + * + * We put Gt[] first in tmp[], and process it first, so that it does + * not overlap with G[] in case we allocated it ourselves. + */ + Gt = tmp; + ft = Gt + n; + gt = ft + n; + Ft = gt + n; + gm = Ft + n; + + primes = PRIMES; + p = primes[0].p; + p0i = modp_ninv31(p); + modp_mkgm2(gm, tmp, logn, primes[0].g, p, p0i); + for (u = 0; u < n; u ++) { + Gt[u] = modp_set(G[u], p); + } + for (u = 0; u < n; u ++) { + ft[u] = modp_set(f[u], p); + gt[u] = modp_set(g[u], p); + Ft[u] = modp_set(F[u], p); + } + modp_NTT2(ft, gm, logn, p, p0i); + modp_NTT2(gt, gm, logn, p, p0i); + modp_NTT2(Ft, gm, logn, p, p0i); + modp_NTT2(Gt, gm, logn, p, p0i); + r = modp_montymul(12289, 1, p, p0i); + for (u = 0; u < n; u ++) { + uint32_t z; + + z = modp_sub(modp_montymul(ft[u], Gt[u], p, p0i), + modp_montymul(gt[u], Ft[u], p, p0i), p); + if (z != r) { + return 0; + } + } + + return 1; +} + +/* + * Generate a random polynomial with a Gaussian distribution. This function + * also makes sure that the resultant of the polynomial with phi is odd. + */ +static void +poly_small_mkgauss(RNG_CONTEXT *rng, int8_t *f, unsigned logn) { + size_t n, u; + unsigned mod2; + + n = MKN(logn); + mod2 = 0; + for (u = 0; u < n; u ++) { + int s; + +restart: + s = mkgauss(rng, logn); + + /* + * We need the coefficient to fit within -127..+127; + * realistically, this is always the case except for + * the very low degrees (N = 2 or 4), for which there + * is no real security anyway. + */ + if (s < -127 || s > 127) { + goto restart; + } + + /* + * We need the sum of all coefficients to be 1; otherwise, + * the resultant of the polynomial with X^N+1 will be even, + * and the binary GCD will fail. + */ + if (u == n - 1) { + if ((mod2 ^ (unsigned)(s & 1)) == 0) { + goto restart; + } + } else { + mod2 ^= (unsigned)(s & 1); + } + f[u] = (int8_t)s; + } +} + +/* see falcon.h */ +void +PQCLEAN_FALCON512_AVX2_keygen(inner_shake256_context *rng, + int8_t *f, int8_t *g, int8_t *F, int8_t *G, uint16_t *h, + unsigned logn, uint8_t *tmp) { + /* + * Algorithm is the following: + * + * - Generate f and g with the Gaussian distribution. + * + * - If either Res(f,phi) or Res(g,phi) is even, try again. + * + * - If ||(f,g)|| is too large, try again. + * + * - If ||B~_{f,g}|| is too large, try again. + * + * - If f is not invertible mod phi mod q, try again. + * + * - Compute h = g/f mod phi mod q. + * + * - Solve the NTRU equation fG - gF = q; if the solving fails, + * try again. Usual failure condition is when Res(f,phi) + * and Res(g,phi) are not prime to each other. + */ + size_t n, u; + uint16_t *h2, *tmp2; + RNG_CONTEXT *rc; + + n = MKN(logn); + rc = rng; + + /* + * We need to generate f and g randomly, until we find values + * such that the norm of (g,-f), and of the orthogonalized + * vector, are satisfying. The orthogonalized vector is: + * (q*adj(f)/(f*adj(f)+g*adj(g)), q*adj(g)/(f*adj(f)+g*adj(g))) + * (it is actually the (N+1)-th row of the Gram-Schmidt basis). + * + * In the binary case, coefficients of f and g are generated + * independently of each other, with a discrete Gaussian + * distribution of standard deviation 1.17*sqrt(q/(2*N)). Then, + * the two vectors have expected norm 1.17*sqrt(q), which is + * also our acceptance bound: we require both vectors to be no + * larger than that (this will be satisfied about 1/4th of the + * time, thus we expect sampling new (f,g) about 4 times for that + * step). + * + * We require that Res(f,phi) and Res(g,phi) are both odd (the + * NTRU equation solver requires it). + */ + for (;;) { + fpr *rt1, *rt2, *rt3; + fpr bnorm; + uint32_t normf, normg, norm; + int lim; + + /* + * The poly_small_mkgauss() function makes sure + * that the sum of coefficients is 1 modulo 2 + * (i.e. the resultant of the polynomial with phi + * will be odd). + */ + poly_small_mkgauss(rc, f, logn); + poly_small_mkgauss(rc, g, logn); + + /* + * Verify that all coefficients are within the bounds + * defined in max_fg_bits. This is the case with + * overwhelming probability; this guarantees that the + * key will be encodable with FALCON_COMP_TRIM. + */ + lim = 1 << (PQCLEAN_FALCON512_AVX2_max_fg_bits[logn] - 1); + for (u = 0; u < n; u ++) { + /* + * We can use non-CT tests since on any failure + * we will discard f and g. + */ + if (f[u] >= lim || f[u] <= -lim + || g[u] >= lim || g[u] <= -lim) { + lim = -1; + break; + } + } + if (lim < 0) { + continue; + } + + /* + * Bound is 1.17*sqrt(q). We compute the squared + * norms. With q = 12289, the squared bound is: + * (1.17^2)* 12289 = 16822.4121 + * Since f and g are integral, the squared norm + * of (g,-f) is an integer. + */ + normf = poly_small_sqnorm(f, logn); + normg = poly_small_sqnorm(g, logn); + norm = (normf + normg) | -((normf | normg) >> 31); + if (norm >= 16823) { + continue; + } + + /* + * We compute the orthogonalized vector norm. + */ + rt1 = (fpr *)tmp; + rt2 = rt1 + n; + rt3 = rt2 + n; + poly_small_to_fp(rt1, f, logn); + poly_small_to_fp(rt2, g, logn); + PQCLEAN_FALCON512_AVX2_FFT(rt1, logn); + PQCLEAN_FALCON512_AVX2_FFT(rt2, logn); + PQCLEAN_FALCON512_AVX2_poly_invnorm2_fft(rt3, rt1, rt2, logn); + PQCLEAN_FALCON512_AVX2_poly_adj_fft(rt1, logn); + PQCLEAN_FALCON512_AVX2_poly_adj_fft(rt2, logn); + PQCLEAN_FALCON512_AVX2_poly_mulconst(rt1, fpr_q, logn); + PQCLEAN_FALCON512_AVX2_poly_mulconst(rt2, fpr_q, logn); + PQCLEAN_FALCON512_AVX2_poly_mul_autoadj_fft(rt1, rt3, logn); + PQCLEAN_FALCON512_AVX2_poly_mul_autoadj_fft(rt2, rt3, logn); + PQCLEAN_FALCON512_AVX2_iFFT(rt1, logn); + PQCLEAN_FALCON512_AVX2_iFFT(rt2, logn); + bnorm = fpr_zero; + for (u = 0; u < n; u ++) { + bnorm = fpr_add(bnorm, fpr_sqr(rt1[u])); + bnorm = fpr_add(bnorm, fpr_sqr(rt2[u])); + } + if (!fpr_lt(bnorm, fpr_bnorm_max)) { + continue; + } + + /* + * Compute public key h = g/f mod X^N+1 mod q. If this + * fails, we must restart. + */ + if (h == NULL) { + h2 = (uint16_t *)tmp; + tmp2 = h2 + n; + } else { + h2 = h; + tmp2 = (uint16_t *)tmp; + } + if (!PQCLEAN_FALCON512_AVX2_compute_public(h2, f, g, logn, (uint8_t *)tmp2)) { + continue; + } + + /* + * Solve the NTRU equation to get F and G. + */ + lim = (1 << (PQCLEAN_FALCON512_AVX2_max_FG_bits[logn] - 1)) - 1; + if (!solve_NTRU(logn, F, G, f, g, lim, (uint32_t *)tmp)) { + continue; + } + + /* + * Key pair is generated. + */ + break; + } +} diff --git a/crypto_sign/falcon/falcon-512/avx2/pqclean.c b/crypto_sign/falcon/falcon-512/avx2/pqclean.c new file mode 100644 index 00000000..7777acbc --- /dev/null +++ b/crypto_sign/falcon/falcon-512/avx2/pqclean.c @@ -0,0 +1,384 @@ +#include "api.h" +#include "inner.h" +#include "randombytes.h" +#include +#include +/* + * Wrapper for implementing the PQClean API. + */ + + + +#define NONCELEN 40 +#define SEEDLEN 48 + +/* + * Encoding formats (nnnn = log of degree, 9 for Falcon-512, 10 for Falcon-1024) + * + * private key: + * header byte: 0101nnnn + * private f (6 or 5 bits by element, depending on degree) + * private g (6 or 5 bits by element, depending on degree) + * private F (8 bits by element) + * + * public key: + * header byte: 0000nnnn + * public h (14 bits by element) + * + * signature: + * header byte: 0011nnnn + * nonce 40 bytes + * value (12 bits by element) + * + * message + signature: + * signature length (2 bytes, big-endian) + * nonce 40 bytes + * message + * header byte: 0010nnnn + * value (12 bits by element) + * (signature length is 1+len(value), not counting the nonce) + */ + +/* see api.h */ +int +PQCLEAN_FALCON512_AVX2_crypto_sign_keypair(unsigned char *pk, unsigned char *sk) { + union { + uint8_t b[FALCON_KEYGEN_TEMP_9]; + uint64_t dummy_u64; + fpr dummy_fpr; + } tmp; + int8_t f[512], g[512], F[512]; + uint16_t h[512]; + unsigned char seed[SEEDLEN]; + inner_shake256_context rng; + size_t u, v; + + /* + * Generate key pair. + */ + randombytes(seed, sizeof seed); + inner_shake256_init(&rng); + inner_shake256_inject(&rng, seed, sizeof seed); + inner_shake256_flip(&rng); + PQCLEAN_FALCON512_AVX2_keygen(&rng, f, g, F, NULL, h, 9, tmp.b); + inner_shake256_ctx_release(&rng); + + /* + * Encode private key. + */ + sk[0] = 0x50 + 9; + u = 1; + v = PQCLEAN_FALCON512_AVX2_trim_i8_encode( + sk + u, PQCLEAN_FALCON512_AVX2_CRYPTO_SECRETKEYBYTES - u, + f, 9, PQCLEAN_FALCON512_AVX2_max_fg_bits[9]); + if (v == 0) { + return -1; + } + u += v; + v = PQCLEAN_FALCON512_AVX2_trim_i8_encode( + sk + u, PQCLEAN_FALCON512_AVX2_CRYPTO_SECRETKEYBYTES - u, + g, 9, PQCLEAN_FALCON512_AVX2_max_fg_bits[9]); + if (v == 0) { + return -1; + } + u += v; + v = PQCLEAN_FALCON512_AVX2_trim_i8_encode( + sk + u, PQCLEAN_FALCON512_AVX2_CRYPTO_SECRETKEYBYTES - u, + F, 9, PQCLEAN_FALCON512_AVX2_max_FG_bits[9]); + if (v == 0) { + return -1; + } + u += v; + if (u != PQCLEAN_FALCON512_AVX2_CRYPTO_SECRETKEYBYTES) { + return -1; + } + + /* + * Encode public key. + */ + pk[0] = 0x00 + 9; + v = PQCLEAN_FALCON512_AVX2_modq_encode( + pk + 1, PQCLEAN_FALCON512_AVX2_CRYPTO_PUBLICKEYBYTES - 1, + h, 9); + if (v != PQCLEAN_FALCON512_AVX2_CRYPTO_PUBLICKEYBYTES - 1) { + return -1; + } + + return 0; +} + +/* + * Compute the signature. nonce[] receives the nonce and must have length + * NONCELEN bytes. sigbuf[] receives the signature value (without nonce + * or header byte), with *sigbuflen providing the maximum value length and + * receiving the actual value length. + * + * If a signature could be computed but not encoded because it would + * exceed the output buffer size, then a new signature is computed. If + * the provided buffer size is too low, this could loop indefinitely, so + * the caller must provide a size that can accommodate signatures with a + * large enough probability. + * + * Return value: 0 on success, -1 on error. + */ +static int +do_sign(uint8_t *nonce, uint8_t *sigbuf, size_t *sigbuflen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + union { + uint8_t b[72 * 512]; + uint64_t dummy_u64; + fpr dummy_fpr; + } tmp; + int8_t f[512], g[512], F[512], G[512]; + union { + int16_t sig[512]; + uint16_t hm[512]; + } r; + unsigned char seed[SEEDLEN]; + inner_shake256_context sc; + size_t u, v; + + /* + * Decode the private key. + */ + if (sk[0] != 0x50 + 9) { + return -1; + } + u = 1; + v = PQCLEAN_FALCON512_AVX2_trim_i8_decode( + f, 9, PQCLEAN_FALCON512_AVX2_max_fg_bits[9], + sk + u, PQCLEAN_FALCON512_AVX2_CRYPTO_SECRETKEYBYTES - u); + if (v == 0) { + return -1; + } + u += v; + v = PQCLEAN_FALCON512_AVX2_trim_i8_decode( + g, 9, PQCLEAN_FALCON512_AVX2_max_fg_bits[9], + sk + u, PQCLEAN_FALCON512_AVX2_CRYPTO_SECRETKEYBYTES - u); + if (v == 0) { + return -1; + } + u += v; + v = PQCLEAN_FALCON512_AVX2_trim_i8_decode( + F, 9, PQCLEAN_FALCON512_AVX2_max_FG_bits[9], + sk + u, PQCLEAN_FALCON512_AVX2_CRYPTO_SECRETKEYBYTES - u); + if (v == 0) { + return -1; + } + u += v; + if (u != PQCLEAN_FALCON512_AVX2_CRYPTO_SECRETKEYBYTES) { + return -1; + } + if (!PQCLEAN_FALCON512_AVX2_complete_private(G, f, g, F, 9, tmp.b)) { + return -1; + } + + /* + * Create a random nonce (40 bytes). + */ + randombytes(nonce, NONCELEN); + + /* + * Hash message nonce + message into a vector. + */ + inner_shake256_init(&sc); + inner_shake256_inject(&sc, nonce, NONCELEN); + inner_shake256_inject(&sc, m, mlen); + inner_shake256_flip(&sc); + PQCLEAN_FALCON512_AVX2_hash_to_point_ct(&sc, r.hm, 9, tmp.b); + inner_shake256_ctx_release(&sc); + + /* + * Initialize a RNG. + */ + randombytes(seed, sizeof seed); + inner_shake256_init(&sc); + inner_shake256_inject(&sc, seed, sizeof seed); + inner_shake256_flip(&sc); + + /* + * Compute and return the signature. This loops until a signature + * value is found that fits in the provided buffer. + */ + for (;;) { + PQCLEAN_FALCON512_AVX2_sign_dyn(r.sig, &sc, f, g, F, G, r.hm, 9, tmp.b); + v = PQCLEAN_FALCON512_AVX2_comp_encode(sigbuf, *sigbuflen, r.sig, 9); + if (v != 0) { + inner_shake256_ctx_release(&sc); + *sigbuflen = v; + return 0; + } + } +} + +/* + * Verify a sigature. The nonce has size NONCELEN bytes. sigbuf[] + * (of size sigbuflen) contains the signature value, not including the + * header byte or nonce. Return value is 0 on success, -1 on error. + */ +static int +do_verify( + const uint8_t *nonce, const uint8_t *sigbuf, size_t sigbuflen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + union { + uint8_t b[2 * 512]; + uint64_t dummy_u64; + fpr dummy_fpr; + } tmp; + uint16_t h[512], hm[512]; + int16_t sig[512]; + inner_shake256_context sc; + + /* + * Decode public key. + */ + if (pk[0] != 0x00 + 9) { + return -1; + } + if (PQCLEAN_FALCON512_AVX2_modq_decode(h, 9, + pk + 1, PQCLEAN_FALCON512_AVX2_CRYPTO_PUBLICKEYBYTES - 1) + != PQCLEAN_FALCON512_AVX2_CRYPTO_PUBLICKEYBYTES - 1) { + return -1; + } + PQCLEAN_FALCON512_AVX2_to_ntt_monty(h, 9); + + /* + * Decode signature. + */ + if (sigbuflen == 0) { + return -1; + } + if (PQCLEAN_FALCON512_AVX2_comp_decode(sig, 9, sigbuf, sigbuflen) != sigbuflen) { + return -1; + } + + /* + * Hash nonce + message into a vector. + */ + inner_shake256_init(&sc); + inner_shake256_inject(&sc, nonce, NONCELEN); + inner_shake256_inject(&sc, m, mlen); + inner_shake256_flip(&sc); + PQCLEAN_FALCON512_AVX2_hash_to_point_ct(&sc, hm, 9, tmp.b); + inner_shake256_ctx_release(&sc); + + /* + * Verify signature. + */ + if (!PQCLEAN_FALCON512_AVX2_verify_raw(hm, sig, h, 9, tmp.b)) { + return -1; + } + return 0; +} + +/* see api.h */ +int +PQCLEAN_FALCON512_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + /* + * The PQCLEAN_FALCON512_AVX2_CRYPTO_BYTES constant is used for + * the signed message object (as produced by PQCLEAN_FALCON512_AVX2_crypto_sign()) + * and includes a two-byte length value, so we take care here + * to only generate signatures that are two bytes shorter than + * the maximum. This is done to ensure that PQCLEAN_FALCON512_AVX2_crypto_sign() + * and PQCLEAN_FALCON512_AVX2_crypto_sign_signature() produce the exact same signature + * value, if used on the same message, with the same private key, + * and using the same output from randombytes() (this is for + * reproducibility of tests). + */ + size_t vlen; + + vlen = PQCLEAN_FALCON512_AVX2_CRYPTO_BYTES - NONCELEN - 3; + if (do_sign(sig + 1, sig + 1 + NONCELEN, &vlen, m, mlen, sk) < 0) { + return -1; + } + sig[0] = 0x30 + 9; + *siglen = 1 + NONCELEN + vlen; + return 0; +} + +/* see api.h */ +int +PQCLEAN_FALCON512_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + if (siglen < 1 + NONCELEN) { + return -1; + } + if (sig[0] != 0x30 + 9) { + return -1; + } + return do_verify(sig + 1, + sig + 1 + NONCELEN, siglen - 1 - NONCELEN, m, mlen, pk); +} + +/* see api.h */ +int +PQCLEAN_FALCON512_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + uint8_t *pm, *sigbuf; + size_t sigbuflen; + + /* + * Move the message to its final location; this is a memmove() so + * it handles overlaps properly. + */ + memmove(sm + 2 + NONCELEN, m, mlen); + pm = sm + 2 + NONCELEN; + sigbuf = pm + 1 + mlen; + sigbuflen = PQCLEAN_FALCON512_AVX2_CRYPTO_BYTES - NONCELEN - 3; + if (do_sign(sm + 2, sigbuf, &sigbuflen, pm, mlen, sk) < 0) { + return -1; + } + pm[mlen] = 0x20 + 9; + sigbuflen ++; + sm[0] = (uint8_t)(sigbuflen >> 8); + sm[1] = (uint8_t)sigbuflen; + *smlen = mlen + 2 + NONCELEN + sigbuflen; + return 0; +} + +/* see api.h */ +int +PQCLEAN_FALCON512_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + const uint8_t *sigbuf; + size_t pmlen, sigbuflen; + + if (smlen < 3 + NONCELEN) { + return -1; + } + sigbuflen = ((size_t)sm[0] << 8) | (size_t)sm[1]; + if (sigbuflen < 2 || sigbuflen > (smlen - NONCELEN - 2)) { + return -1; + } + sigbuflen --; + pmlen = smlen - NONCELEN - 3 - sigbuflen; + if (sm[2 + NONCELEN + pmlen] != 0x20 + 9) { + return -1; + } + sigbuf = sm + 2 + NONCELEN + pmlen + 1; + + /* + * The 2-byte length header and the one-byte signature header + * have been verified. Nonce is at sm+2, followed by the message + * itself. Message length is in pmlen. sigbuf/sigbuflen point to + * the signature value (excluding the header byte). + */ + if (do_verify(sm + 2, sigbuf, sigbuflen, + sm + 2 + NONCELEN, pmlen, pk) < 0) { + return -1; + } + + /* + * Signature is correct, we just have to copy/move the message + * to its final destination. The memmove() properly handles + * overlaps. + */ + memmove(m, sm + 2 + NONCELEN, pmlen); + *mlen = pmlen; + return 0; +} diff --git a/crypto_sign/falcon/falcon-512/avx2/rng.c b/crypto_sign/falcon/falcon-512/avx2/rng.c new file mode 100644 index 00000000..68c6dc7e --- /dev/null +++ b/crypto_sign/falcon/falcon-512/avx2/rng.c @@ -0,0 +1,195 @@ +#include "inner.h" +#include +/* + * PRNG and interface to the system RNG. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + + +/* + * Include relevant system header files. For Win32, this will also need + * linking with advapi32.dll, which we trigger with an appropriate #pragma. + */ + +/* see inner.h */ +int +PQCLEAN_FALCON512_AVX2_get_seed(void *seed, size_t len) { + (void)seed; + if (len == 0) { + return 1; + } + return 0; +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_prng_init(prng *p, inner_shake256_context *src) { + inner_shake256_extract(src, p->state.d, 56); + PQCLEAN_FALCON512_AVX2_prng_refill(p); +} + +/* + * PRNG based on ChaCha20. + * + * State consists in key (32 bytes) then IV (16 bytes) and block counter + * (8 bytes). Normally, we should not care about local endianness (this + * is for a PRNG), but for the NIST competition we need reproducible KAT + * vectors that work across architectures, so we enforce little-endian + * interpretation where applicable. Moreover, output words are "spread + * out" over the output buffer with the interleaving pattern that is + * naturally obtained from the AVX2 implementation that runs eight + * ChaCha20 instances in parallel. + * + * The block counter is XORed into the first 8 bytes of the IV. + */ +void +PQCLEAN_FALCON512_AVX2_prng_refill(prng *p) { + + static const uint32_t CW[] = { + 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 + }; + + uint64_t cc; + size_t u; + int i; + uint32_t *sw; + union { + uint32_t w[16]; + __m256i y[2]; /* for alignment */ + } t; + __m256i state[16], init[16]; + + sw = (uint32_t *)p->state.d; + + /* + * XOR next counter values into state. + */ + cc = *(uint64_t *)(p->state.d + 48); + for (u = 0; u < 8; u ++) { + t.w[u] = (uint32_t)(cc + u); + t.w[u + 8] = (uint32_t)((cc + u) >> 32); + } + *(uint64_t *)(p->state.d + 48) = cc + 8; + + /* + * Load state. + */ + for (u = 0; u < 4; u ++) { + state[u] = init[u] = + _mm256_broadcastd_epi32(_mm_cvtsi32_si128((int32_t)CW[u])); + } + for (u = 0; u < 10; u ++) { + state[u + 4] = init[u + 4] = + _mm256_broadcastd_epi32(_mm_cvtsi32_si128((int32_t)sw[u])); + } + state[14] = init[14] = _mm256_xor_si256( + _mm256_broadcastd_epi32(_mm_cvtsi32_si128((int32_t)sw[10])), + _mm256_loadu_si256((__m256i *)&t.w[0])); + state[15] = init[15] = _mm256_xor_si256( + _mm256_broadcastd_epi32(_mm_cvtsi32_si128((int32_t)sw[11])), + _mm256_loadu_si256((__m256i *)&t.w[8])); + + /* + * Do all rounds. + */ + for (i = 0; i < 10; i ++) { + +#define QROUND(a, b, c, d) do { \ + state[a] = _mm256_add_epi32(state[a], state[b]); \ + state[d] = _mm256_xor_si256(state[d], state[a]); \ + state[d] = _mm256_or_si256( \ + _mm256_slli_epi32(state[d], 16), \ + _mm256_srli_epi32(state[d], 16)); \ + state[c] = _mm256_add_epi32(state[c], state[d]); \ + state[b] = _mm256_xor_si256(state[b], state[c]); \ + state[b] = _mm256_or_si256( \ + _mm256_slli_epi32(state[b], 12), \ + _mm256_srli_epi32(state[b], 20)); \ + state[a] = _mm256_add_epi32(state[a], state[b]); \ + state[d] = _mm256_xor_si256(state[d], state[a]); \ + state[d] = _mm256_or_si256( \ + _mm256_slli_epi32(state[d], 8), \ + _mm256_srli_epi32(state[d], 24)); \ + state[c] = _mm256_add_epi32(state[c], state[d]); \ + state[b] = _mm256_xor_si256(state[b], state[c]); \ + state[b] = _mm256_or_si256( \ + _mm256_slli_epi32(state[b], 7), \ + _mm256_srli_epi32(state[b], 25)); \ + } while (0) + + QROUND( 0, 4, 8, 12); + QROUND( 1, 5, 9, 13); + QROUND( 2, 6, 10, 14); + QROUND( 3, 7, 11, 15); + QROUND( 0, 5, 10, 15); + QROUND( 1, 6, 11, 12); + QROUND( 2, 7, 8, 13); + QROUND( 3, 4, 9, 14); + +#undef QROUND + + } + + /* + * Add initial state back and encode the result in the destination + * buffer. We can dump the AVX2 values "as is" because the non-AVX2 + * code uses a compatible order of values. + */ + for (u = 0; u < 16; u ++) { + _mm256_storeu_si256((__m256i *)&p->buf.d[u << 5], + _mm256_add_epi32(state[u], init[u])); + } + + + p->ptr = 0; +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_prng_get_bytes(prng *p, void *dst, size_t len) { + uint8_t *buf; + + buf = dst; + while (len > 0) { + size_t clen; + + clen = (sizeof p->buf.d) - p->ptr; + if (clen > len) { + clen = len; + } + memcpy(buf, p->buf.d, clen); + buf += clen; + len -= clen; + p->ptr += clen; + if (p->ptr == sizeof p->buf.d) { + PQCLEAN_FALCON512_AVX2_prng_refill(p); + } + } +} diff --git a/crypto_sign/falcon/falcon-512/avx2/sign.c b/crypto_sign/falcon/falcon-512/avx2/sign.c new file mode 100644 index 00000000..1b6cad3f --- /dev/null +++ b/crypto_sign/falcon/falcon-512/avx2/sign.c @@ -0,0 +1,1312 @@ +#include "inner.h" + +/* + * Falcon signature generation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +/* =================================================================== */ + +/* + * Compute degree N from logarithm 'logn'. + */ +#define MKN(logn) ((size_t)1 << (logn)) + +/* =================================================================== */ +/* + * Binary case: + * N = 2^logn + * phi = X^N+1 + */ + +/* + * Get the size of the LDL tree for an input with polynomials of size + * 2^logn. The size is expressed in the number of elements. + */ +static inline unsigned +ffLDL_treesize(unsigned logn) { + /* + * For logn = 0 (polynomials are constant), the "tree" is a + * single element. Otherwise, the tree node has size 2^logn, and + * has two child trees for size logn-1 each. Thus, treesize s() + * must fulfill these two relations: + * + * s(0) = 1 + * s(logn) = (2^logn) + 2*s(logn-1) + */ + return (logn + 1) << logn; +} + +/* + * Inner function for ffLDL_fft(). It expects the matrix to be both + * auto-adjoint and quasicyclic; also, it uses the source operands + * as modifiable temporaries. + * + * tmp[] must have room for at least one polynomial. + */ +static void +ffLDL_fft_inner(fpr *tree, + fpr *g0, fpr *g1, unsigned logn, fpr *tmp) { + size_t n, hn; + + n = MKN(logn); + if (n == 1) { + tree[0] = g0[0]; + return; + } + hn = n >> 1; + + /* + * The LDL decomposition yields L (which is written in the tree) + * and the diagonal of D. Since d00 = g0, we just write d11 + * into tmp. + */ + PQCLEAN_FALCON512_AVX2_poly_LDLmv_fft(tmp, tree, g0, g1, g0, logn); + + /* + * Split d00 (currently in g0) and d11 (currently in tmp). We + * reuse g0 and g1 as temporary storage spaces: + * d00 splits into g1, g1+hn + * d11 splits into g0, g0+hn + */ + PQCLEAN_FALCON512_AVX2_poly_split_fft(g1, g1 + hn, g0, logn); + PQCLEAN_FALCON512_AVX2_poly_split_fft(g0, g0 + hn, tmp, logn); + + /* + * Each split result is the first row of a new auto-adjoint + * quasicyclic matrix for the next recursive step. + */ + ffLDL_fft_inner(tree + n, + g1, g1 + hn, logn - 1, tmp); + ffLDL_fft_inner(tree + n + ffLDL_treesize(logn - 1), + g0, g0 + hn, logn - 1, tmp); +} + +/* + * Compute the ffLDL tree of an auto-adjoint matrix G. The matrix + * is provided as three polynomials (FFT representation). + * + * The "tree" array is filled with the computed tree, of size + * (logn+1)*(2^logn) elements (see ffLDL_treesize()). + * + * Input arrays MUST NOT overlap, except possibly the three unmodified + * arrays g00, g01 and g11. tmp[] should have room for at least three + * polynomials of 2^logn elements each. + */ +static void +ffLDL_fft(fpr *tree, const fpr *g00, + const fpr *g01, const fpr *g11, + unsigned logn, fpr *tmp) { + size_t n, hn; + fpr *d00, *d11; + + n = MKN(logn); + if (n == 1) { + tree[0] = g00[0]; + return; + } + hn = n >> 1; + d00 = tmp; + d11 = tmp + n; + tmp += n << 1; + + memcpy(d00, g00, n * sizeof * g00); + PQCLEAN_FALCON512_AVX2_poly_LDLmv_fft(d11, tree, g00, g01, g11, logn); + + PQCLEAN_FALCON512_AVX2_poly_split_fft(tmp, tmp + hn, d00, logn); + PQCLEAN_FALCON512_AVX2_poly_split_fft(d00, d00 + hn, d11, logn); + memcpy(d11, tmp, n * sizeof * tmp); + ffLDL_fft_inner(tree + n, + d11, d11 + hn, logn - 1, tmp); + ffLDL_fft_inner(tree + n + ffLDL_treesize(logn - 1), + d00, d00 + hn, logn - 1, tmp); +} + +/* + * Normalize an ffLDL tree: each leaf of value x is replaced with + * sigma / sqrt(x). + */ +static void +ffLDL_binary_normalize(fpr *tree, unsigned logn) { + /* + * TODO: make an iterative version. + */ + size_t n; + + n = MKN(logn); + if (n == 1) { + /* + * We actually store in the tree leaf the inverse of + * the value mandated by the specification: this + * saves a division both here and in the sampler. + */ + tree[0] = fpr_mul(fpr_sqrt(tree[0]), fpr_inv_sigma); + } else { + ffLDL_binary_normalize(tree + n, logn - 1); + ffLDL_binary_normalize(tree + n + ffLDL_treesize(logn - 1), + logn - 1); + } +} + +/* =================================================================== */ + +/* + * Convert an integer polynomial (with small values) into the + * representation with complex numbers. + */ +static void +smallints_to_fpr(fpr *r, const int8_t *t, unsigned logn) { + size_t n, u; + + n = MKN(logn); + for (u = 0; u < n; u ++) { + r[u] = fpr_of(t[u]); + } +} + +/* + * The expanded private key contains: + * - The B0 matrix (four elements) + * - The ffLDL tree + */ + +static inline size_t +skoff_b00(unsigned logn) { + (void)logn; + return 0; +} + +static inline size_t +skoff_b01(unsigned logn) { + return MKN(logn); +} + +static inline size_t +skoff_b10(unsigned logn) { + return 2 * MKN(logn); +} + +static inline size_t +skoff_b11(unsigned logn) { + return 3 * MKN(logn); +} + +static inline size_t +skoff_tree(unsigned logn) { + return 4 * MKN(logn); +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_expand_privkey(fpr *expanded_key, + const int8_t *f, const int8_t *g, + const int8_t *F, const int8_t *G, + unsigned logn, uint8_t *tmp) { + size_t n; + fpr *rf, *rg, *rF, *rG; + fpr *b00, *b01, *b10, *b11; + fpr *g00, *g01, *g11, *gxx; + fpr *tree; + + n = MKN(logn); + b00 = expanded_key + skoff_b00(logn); + b01 = expanded_key + skoff_b01(logn); + b10 = expanded_key + skoff_b10(logn); + b11 = expanded_key + skoff_b11(logn); + tree = expanded_key + skoff_tree(logn); + + /* + * We load the private key elements directly into the B0 matrix, + * since B0 = [[g, -f], [G, -F]]. + */ + rf = b01; + rg = b00; + rF = b11; + rG = b10; + + smallints_to_fpr(rf, f, logn); + smallints_to_fpr(rg, g, logn); + smallints_to_fpr(rF, F, logn); + smallints_to_fpr(rG, G, logn); + + /* + * Compute the FFT for the key elements, and negate f and F. + */ + PQCLEAN_FALCON512_AVX2_FFT(rf, logn); + PQCLEAN_FALCON512_AVX2_FFT(rg, logn); + PQCLEAN_FALCON512_AVX2_FFT(rF, logn); + PQCLEAN_FALCON512_AVX2_FFT(rG, logn); + PQCLEAN_FALCON512_AVX2_poly_neg(rf, logn); + PQCLEAN_FALCON512_AVX2_poly_neg(rF, logn); + + /* + * The Gram matrix is G = B·B*. Formulas are: + * g00 = b00*adj(b00) + b01*adj(b01) + * g01 = b00*adj(b10) + b01*adj(b11) + * g10 = b10*adj(b00) + b11*adj(b01) + * g11 = b10*adj(b10) + b11*adj(b11) + * + * For historical reasons, this implementation uses + * g00, g01 and g11 (upper triangle). + */ + g00 = (fpr *)tmp; + g01 = g00 + n; + g11 = g01 + n; + gxx = g11 + n; + + memcpy(g00, b00, n * sizeof * b00); + PQCLEAN_FALCON512_AVX2_poly_mulselfadj_fft(g00, logn); + memcpy(gxx, b01, n * sizeof * b01); + PQCLEAN_FALCON512_AVX2_poly_mulselfadj_fft(gxx, logn); + PQCLEAN_FALCON512_AVX2_poly_add(g00, gxx, logn); + + memcpy(g01, b00, n * sizeof * b00); + PQCLEAN_FALCON512_AVX2_poly_muladj_fft(g01, b10, logn); + memcpy(gxx, b01, n * sizeof * b01); + PQCLEAN_FALCON512_AVX2_poly_muladj_fft(gxx, b11, logn); + PQCLEAN_FALCON512_AVX2_poly_add(g01, gxx, logn); + + memcpy(g11, b10, n * sizeof * b10); + PQCLEAN_FALCON512_AVX2_poly_mulselfadj_fft(g11, logn); + memcpy(gxx, b11, n * sizeof * b11); + PQCLEAN_FALCON512_AVX2_poly_mulselfadj_fft(gxx, logn); + PQCLEAN_FALCON512_AVX2_poly_add(g11, gxx, logn); + + /* + * Compute the Falcon tree. + */ + ffLDL_fft(tree, g00, g01, g11, logn, gxx); + + /* + * Normalize tree. + */ + ffLDL_binary_normalize(tree, logn); +} + +typedef int (*samplerZ)(void *ctx, fpr mu, fpr sigma); + +/* + * Perform Fast Fourier Sampling for target vector t. The Gram matrix + * is provided (G = [[g00, g01], [adj(g01), g11]]). The sampled vector + * is written over (t0,t1). The Gram matrix is modified as well. The + * tmp[] buffer must have room for four polynomials. + */ +static void +ffSampling_fft_dyntree(samplerZ samp, void *samp_ctx, + fpr *t0, fpr *t1, + fpr *g00, fpr *g01, fpr *g11, + unsigned logn, fpr *tmp) { + size_t n, hn; + fpr *z0, *z1; + + /* + * Deepest level: the LDL tree leaf value is just g00 (the + * array has length only 1 at this point); we normalize it + * with regards to sigma, then use it for sampling. + */ + if (logn == 0) { + fpr leaf; + + leaf = g00[0]; + leaf = fpr_mul(fpr_sqrt(leaf), fpr_inv_sigma); + t0[0] = fpr_of(samp(samp_ctx, t0[0], leaf)); + t1[0] = fpr_of(samp(samp_ctx, t1[0], leaf)); + return; + } + + n = (size_t)1 << logn; + hn = n >> 1; + + /* + * Decompose G into LDL. We only need d00 (identical to g00), + * d11, and l10; we do that in place. + */ + PQCLEAN_FALCON512_AVX2_poly_LDL_fft(g00, g01, g11, logn); + + /* + * Split d00 and d11 and expand them into half-size quasi-cyclic + * Gram matrices. We also save l10 in tmp[]. + */ + PQCLEAN_FALCON512_AVX2_poly_split_fft(tmp, tmp + hn, g00, logn); + memcpy(g00, tmp, n * sizeof * tmp); + PQCLEAN_FALCON512_AVX2_poly_split_fft(tmp, tmp + hn, g11, logn); + memcpy(g11, tmp, n * sizeof * tmp); + memcpy(tmp, g01, n * sizeof * g01); + memcpy(g01, g00, hn * sizeof * g00); + memcpy(g01 + hn, g11, hn * sizeof * g00); + + /* + * The half-size Gram matrices for the recursive LDL tree + * building are now: + * - left sub-tree: g00, g00+hn, g01 + * - right sub-tree: g11, g11+hn, g01+hn + * l10 is in tmp[]. + */ + + /* + * We split t1 and use the first recursive call on the two + * halves, using the right sub-tree. The result is merged + * back into tmp + 2*n. + */ + z1 = tmp + n; + PQCLEAN_FALCON512_AVX2_poly_split_fft(z1, z1 + hn, t1, logn); + ffSampling_fft_dyntree(samp, samp_ctx, z1, z1 + hn, + g11, g11 + hn, g01 + hn, logn - 1, z1 + n); + PQCLEAN_FALCON512_AVX2_poly_merge_fft(tmp + (n << 1), z1, z1 + hn, logn); + + /* + * Compute tb0 = t0 + (t1 - z1) * l10. + * At that point, l10 is in tmp, t1 is unmodified, and z1 is + * in tmp + (n << 1). The buffer in z1 is free. + * + * In the end, z1 is written over t1, and tb0 is in t0. + */ + memcpy(z1, t1, n * sizeof * t1); + PQCLEAN_FALCON512_AVX2_poly_sub(z1, tmp + (n << 1), logn); + memcpy(t1, tmp + (n << 1), n * sizeof * tmp); + PQCLEAN_FALCON512_AVX2_poly_mul_fft(tmp, z1, logn); + PQCLEAN_FALCON512_AVX2_poly_add(t0, tmp, logn); + + /* + * Second recursive invocation, on the split tb0 (currently in t0) + * and the left sub-tree. + */ + z0 = tmp; + PQCLEAN_FALCON512_AVX2_poly_split_fft(z0, z0 + hn, t0, logn); + ffSampling_fft_dyntree(samp, samp_ctx, z0, z0 + hn, + g00, g00 + hn, g01, logn - 1, z0 + n); + PQCLEAN_FALCON512_AVX2_poly_merge_fft(t0, z0, z0 + hn, logn); +} + +/* + * Perform Fast Fourier Sampling for target vector t and LDL tree T. + * tmp[] must have size for at least two polynomials of size 2^logn. + */ +static void +ffSampling_fft(samplerZ samp, void *samp_ctx, + fpr *z0, fpr *z1, + const fpr *tree, + const fpr *t0, const fpr *t1, unsigned logn, + fpr *tmp) { + size_t n, hn; + const fpr *tree0, *tree1; + + /* + * When logn == 2, we inline the last two recursion levels. + */ + if (logn == 2) { + fpr w0, w1, w2, w3, sigma; + __m128d ww0, ww1, wa, wb, wc, wd; + __m128d wy0, wy1, wz0, wz1; + __m128d half, invsqrt8, invsqrt2, neghi, neglo; + int si0, si1, si2, si3; + + tree0 = tree + 4; + tree1 = tree + 8; + + half = _mm_set1_pd(0.5); + invsqrt8 = _mm_set1_pd(0.353553390593273762200422181052); + invsqrt2 = _mm_set1_pd(0.707106781186547524400844362105); + neghi = _mm_set_pd(-0.0, 0.0); + neglo = _mm_set_pd(0.0, -0.0); + + /* + * We split t1 into w*, then do the recursive invocation, + * with output in w*. We finally merge back into z1. + */ + ww0 = _mm_loadu_pd(&t1[0].v); + ww1 = _mm_loadu_pd(&t1[2].v); + wa = _mm_unpacklo_pd(ww0, ww1); + wb = _mm_unpackhi_pd(ww0, ww1); + wc = _mm_add_pd(wa, wb); + ww0 = _mm_mul_pd(wc, half); + wc = _mm_sub_pd(wa, wb); + wd = _mm_xor_pd(_mm_permute_pd(wc, 1), neghi); + ww1 = _mm_mul_pd(_mm_add_pd(wc, wd), invsqrt8); + + w2.v = _mm_cvtsd_f64(ww1); + w3.v = _mm_cvtsd_f64(_mm_permute_pd(ww1, 1)); + wa = ww1; + sigma = tree1[3]; + si2 = samp(samp_ctx, w2, sigma); + si3 = samp(samp_ctx, w3, sigma); + ww1 = _mm_set_pd((double)si3, (double)si2); + wa = _mm_sub_pd(wa, ww1); + wb = _mm_loadu_pd(&tree1[0].v); + wc = _mm_mul_pd(wa, wb); + wd = _mm_mul_pd(wa, _mm_permute_pd(wb, 1)); + wa = _mm_unpacklo_pd(wc, wd); + wb = _mm_unpackhi_pd(wc, wd); + ww0 = _mm_add_pd(ww0, _mm_add_pd(wa, _mm_xor_pd(wb, neglo))); + w0.v = _mm_cvtsd_f64(ww0); + w1.v = _mm_cvtsd_f64(_mm_permute_pd(ww0, 1)); + sigma = tree1[2]; + si0 = samp(samp_ctx, w0, sigma); + si1 = samp(samp_ctx, w1, sigma); + ww0 = _mm_set_pd((double)si1, (double)si0); + + wc = _mm_mul_pd( + _mm_set_pd((double)(si2 + si3), (double)(si2 - si3)), + invsqrt2); + wa = _mm_add_pd(ww0, wc); + wb = _mm_sub_pd(ww0, wc); + ww0 = _mm_unpacklo_pd(wa, wb); + ww1 = _mm_unpackhi_pd(wa, wb); + _mm_storeu_pd(&z1[0].v, ww0); + _mm_storeu_pd(&z1[2].v, ww1); + + /* + * Compute tb0 = t0 + (t1 - z1) * L. Value tb0 ends up in w*. + */ + wy0 = _mm_sub_pd(_mm_loadu_pd(&t1[0].v), ww0); + wy1 = _mm_sub_pd(_mm_loadu_pd(&t1[2].v), ww1); + wz0 = _mm_loadu_pd(&tree[0].v); + wz1 = _mm_loadu_pd(&tree[2].v); + ww0 = _mm_sub_pd(_mm_mul_pd(wy0, wz0), _mm_mul_pd(wy1, wz1)); + ww1 = _mm_add_pd(_mm_mul_pd(wy0, wz1), _mm_mul_pd(wy1, wz0)); + ww0 = _mm_add_pd(ww0, _mm_loadu_pd(&t0[0].v)); + ww1 = _mm_add_pd(ww1, _mm_loadu_pd(&t0[2].v)); + + /* + * Second recursive invocation. + */ + wa = _mm_unpacklo_pd(ww0, ww1); + wb = _mm_unpackhi_pd(ww0, ww1); + wc = _mm_add_pd(wa, wb); + ww0 = _mm_mul_pd(wc, half); + wc = _mm_sub_pd(wa, wb); + wd = _mm_xor_pd(_mm_permute_pd(wc, 1), neghi); + ww1 = _mm_mul_pd(_mm_add_pd(wc, wd), invsqrt8); + + w2.v = _mm_cvtsd_f64(ww1); + w3.v = _mm_cvtsd_f64(_mm_permute_pd(ww1, 1)); + wa = ww1; + sigma = tree0[3]; + si2 = samp(samp_ctx, w2, sigma); + si3 = samp(samp_ctx, w3, sigma); + ww1 = _mm_set_pd((double)si3, (double)si2); + wa = _mm_sub_pd(wa, ww1); + wb = _mm_loadu_pd(&tree0[0].v); + wc = _mm_mul_pd(wa, wb); + wd = _mm_mul_pd(wa, _mm_permute_pd(wb, 1)); + wa = _mm_unpacklo_pd(wc, wd); + wb = _mm_unpackhi_pd(wc, wd); + ww0 = _mm_add_pd(ww0, _mm_add_pd(wa, _mm_xor_pd(wb, neglo))); + w0.v = _mm_cvtsd_f64(ww0); + w1.v = _mm_cvtsd_f64(_mm_permute_pd(ww0, 1)); + sigma = tree0[2]; + si0 = samp(samp_ctx, w0, sigma); + si1 = samp(samp_ctx, w1, sigma); + ww0 = _mm_set_pd((double)si1, (double)si0); + + wc = _mm_mul_pd( + _mm_set_pd((double)(si2 + si3), (double)(si2 - si3)), + invsqrt2); + wa = _mm_add_pd(ww0, wc); + wb = _mm_sub_pd(ww0, wc); + ww0 = _mm_unpacklo_pd(wa, wb); + ww1 = _mm_unpackhi_pd(wa, wb); + _mm_storeu_pd(&z0[0].v, ww0); + _mm_storeu_pd(&z0[2].v, ww1); + + return; + } + + /* + * Case logn == 1 is reachable only when using Falcon-2 (the + * smallest size for which Falcon is mathematically defined, but + * of course way too insecure to be of any use). + */ + if (logn == 1) { + fpr x0, x1, y0, y1, sigma; + fpr a_re, a_im, b_re, b_im, c_re, c_im; + + x0 = t1[0]; + x1 = t1[1]; + sigma = tree[3]; + z1[0] = y0 = fpr_of(samp(samp_ctx, x0, sigma)); + z1[1] = y1 = fpr_of(samp(samp_ctx, x1, sigma)); + a_re = fpr_sub(x0, y0); + a_im = fpr_sub(x1, y1); + b_re = tree[0]; + b_im = tree[1]; + c_re = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im)); + c_im = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re)); + x0 = fpr_add(c_re, t0[0]); + x1 = fpr_add(c_im, t0[1]); + sigma = tree[2]; + z0[0] = fpr_of(samp(samp_ctx, x0, sigma)); + z0[1] = fpr_of(samp(samp_ctx, x1, sigma)); + + return; + } + + /* + * Normal end of recursion is for logn == 0. Since the last + * steps of the recursions were inlined in the blocks above + * (when logn == 1 or 2), this case is not reachable, and is + * retained here only for documentation purposes. + + if (logn == 0) { + fpr x0, x1, sigma; + + x0 = t0[0]; + x1 = t1[0]; + sigma = tree[0]; + z0[0] = fpr_of(samp(samp_ctx, x0, sigma)); + z1[0] = fpr_of(samp(samp_ctx, x1, sigma)); + return; + } + + */ + + /* + * General recursive case (logn >= 3). + */ + + n = (size_t)1 << logn; + hn = n >> 1; + tree0 = tree + n; + tree1 = tree + n + ffLDL_treesize(logn - 1); + + /* + * We split t1 into z1 (reused as temporary storage), then do + * the recursive invocation, with output in tmp. We finally + * merge back into z1. + */ + PQCLEAN_FALCON512_AVX2_poly_split_fft(z1, z1 + hn, t1, logn); + ffSampling_fft(samp, samp_ctx, tmp, tmp + hn, + tree1, z1, z1 + hn, logn - 1, tmp + n); + PQCLEAN_FALCON512_AVX2_poly_merge_fft(z1, tmp, tmp + hn, logn); + + /* + * Compute tb0 = t0 + (t1 - z1) * L. Value tb0 ends up in tmp[]. + */ + memcpy(tmp, t1, n * sizeof * t1); + PQCLEAN_FALCON512_AVX2_poly_sub(tmp, z1, logn); + PQCLEAN_FALCON512_AVX2_poly_mul_fft(tmp, tree, logn); + PQCLEAN_FALCON512_AVX2_poly_add(tmp, t0, logn); + + /* + * Second recursive invocation. + */ + PQCLEAN_FALCON512_AVX2_poly_split_fft(z0, z0 + hn, tmp, logn); + ffSampling_fft(samp, samp_ctx, tmp, tmp + hn, + tree0, z0, z0 + hn, logn - 1, tmp + n); + PQCLEAN_FALCON512_AVX2_poly_merge_fft(z0, tmp, tmp + hn, logn); +} + +/* + * Compute a signature: the signature contains two vectors, s1 and s2. + * The s1 vector is not returned. The squared norm of (s1,s2) is + * computed, and if it is short enough, then s2 is returned into the + * s2[] buffer, and 1 is returned; otherwise, s2[] is untouched and 0 is + * returned; the caller should then try again. This function uses an + * expanded key. + * + * tmp[] must have room for at least six polynomials. + */ +static int +do_sign_tree(samplerZ samp, void *samp_ctx, int16_t *s2, + const fpr *expanded_key, + const uint16_t *hm, + unsigned logn, fpr *tmp) { + size_t n, u; + fpr *t0, *t1, *tx, *ty; + const fpr *b00, *b01, *b10, *b11, *tree; + fpr ni; + uint32_t sqn, ng; + int16_t *s1tmp, *s2tmp; + + n = MKN(logn); + t0 = tmp; + t1 = t0 + n; + b00 = expanded_key + skoff_b00(logn); + b01 = expanded_key + skoff_b01(logn); + b10 = expanded_key + skoff_b10(logn); + b11 = expanded_key + skoff_b11(logn); + tree = expanded_key + skoff_tree(logn); + + /* + * Set the target vector to [hm, 0] (hm is the hashed message). + */ + for (u = 0; u < n; u ++) { + t0[u] = fpr_of(hm[u]); + /* This is implicit. + t1[u] = fpr_zero; + */ + } + + /* + * Apply the lattice basis to obtain the real target + * vector (after normalization with regards to modulus). + */ + PQCLEAN_FALCON512_AVX2_FFT(t0, logn); + ni = fpr_inverse_of_q; + memcpy(t1, t0, n * sizeof * t0); + PQCLEAN_FALCON512_AVX2_poly_mul_fft(t1, b01, logn); + PQCLEAN_FALCON512_AVX2_poly_mulconst(t1, fpr_neg(ni), logn); + PQCLEAN_FALCON512_AVX2_poly_mul_fft(t0, b11, logn); + PQCLEAN_FALCON512_AVX2_poly_mulconst(t0, ni, logn); + + tx = t1 + n; + ty = tx + n; + + /* + * Apply sampling. Output is written back in [tx, ty]. + */ + ffSampling_fft(samp, samp_ctx, tx, ty, tree, t0, t1, logn, ty + n); + + /* + * Get the lattice point corresponding to that tiny vector. + */ + memcpy(t0, tx, n * sizeof * tx); + memcpy(t1, ty, n * sizeof * ty); + PQCLEAN_FALCON512_AVX2_poly_mul_fft(tx, b00, logn); + PQCLEAN_FALCON512_AVX2_poly_mul_fft(ty, b10, logn); + PQCLEAN_FALCON512_AVX2_poly_add(tx, ty, logn); + memcpy(ty, t0, n * sizeof * t0); + PQCLEAN_FALCON512_AVX2_poly_mul_fft(ty, b01, logn); + + memcpy(t0, tx, n * sizeof * tx); + PQCLEAN_FALCON512_AVX2_poly_mul_fft(t1, b11, logn); + PQCLEAN_FALCON512_AVX2_poly_add(t1, ty, logn); + + PQCLEAN_FALCON512_AVX2_iFFT(t0, logn); + PQCLEAN_FALCON512_AVX2_iFFT(t1, logn); + + /* + * Compute the signature. + */ + s1tmp = (int16_t *)tx; + sqn = 0; + ng = 0; + for (u = 0; u < n; u ++) { + int32_t z; + + z = (int32_t)hm[u] - (int32_t)fpr_rint(t0[u]); + sqn += (uint32_t)(z * z); + ng |= sqn; + s1tmp[u] = (int16_t)z; + } + sqn |= -(ng >> 31); + + /* + * With "normal" degrees (e.g. 512 or 1024), it is very + * improbable that the computed vector is not short enough; + * however, it may happen in practice for the very reduced + * versions (e.g. degree 16 or below). In that case, the caller + * will loop, and we must not write anything into s2[] because + * s2[] may overlap with the hashed message hm[] and we need + * hm[] for the next iteration. + */ + s2tmp = (int16_t *)tmp; + for (u = 0; u < n; u ++) { + s2tmp[u] = (int16_t) - fpr_rint(t1[u]); + } + if (PQCLEAN_FALCON512_AVX2_is_short_half(sqn, s2tmp, logn)) { + memcpy(s2, s2tmp, n * sizeof * s2); + memcpy(tmp, s1tmp, n * sizeof * s1tmp); + return 1; + } + return 0; +} + +/* + * Compute a signature: the signature contains two vectors, s1 and s2. + * The s1 vector is not returned. The squared norm of (s1,s2) is + * computed, and if it is short enough, then s2 is returned into the + * s2[] buffer, and 1 is returned; otherwise, s2[] is untouched and 0 is + * returned; the caller should then try again. + * + * tmp[] must have room for at least nine polynomials. + */ +static int +do_sign_dyn(samplerZ samp, void *samp_ctx, int16_t *s2, + const int8_t *f, const int8_t *g, + const int8_t *F, const int8_t *G, + const uint16_t *hm, unsigned logn, fpr *tmp) { + size_t n, u; + fpr *t0, *t1, *tx, *ty; + fpr *b00, *b01, *b10, *b11, *g00, *g01, *g11; + fpr ni; + uint32_t sqn, ng; + int16_t *s1tmp, *s2tmp; + + n = MKN(logn); + + /* + * Lattice basis is B = [[g, -f], [G, -F]]. We convert it to FFT. + */ + b00 = tmp; + b01 = b00 + n; + b10 = b01 + n; + b11 = b10 + n; + smallints_to_fpr(b01, f, logn); + smallints_to_fpr(b00, g, logn); + smallints_to_fpr(b11, F, logn); + smallints_to_fpr(b10, G, logn); + PQCLEAN_FALCON512_AVX2_FFT(b01, logn); + PQCLEAN_FALCON512_AVX2_FFT(b00, logn); + PQCLEAN_FALCON512_AVX2_FFT(b11, logn); + PQCLEAN_FALCON512_AVX2_FFT(b10, logn); + PQCLEAN_FALCON512_AVX2_poly_neg(b01, logn); + PQCLEAN_FALCON512_AVX2_poly_neg(b11, logn); + + /* + * Compute the Gram matrix G = B·B*. Formulas are: + * g00 = b00*adj(b00) + b01*adj(b01) + * g01 = b00*adj(b10) + b01*adj(b11) + * g10 = b10*adj(b00) + b11*adj(b01) + * g11 = b10*adj(b10) + b11*adj(b11) + * + * For historical reasons, this implementation uses + * g00, g01 and g11 (upper triangle). g10 is not kept + * since it is equal to adj(g01). + * + * We _replace_ the matrix B with the Gram matrix, but we + * must keep b01 and b11 for computing the target vector. + */ + t0 = b11 + n; + t1 = t0 + n; + + memcpy(t0, b01, n * sizeof * b01); + PQCLEAN_FALCON512_AVX2_poly_mulselfadj_fft(t0, logn); // t0 <- b01*adj(b01) + + memcpy(t1, b00, n * sizeof * b00); + PQCLEAN_FALCON512_AVX2_poly_muladj_fft(t1, b10, logn); // t1 <- b00*adj(b10) + PQCLEAN_FALCON512_AVX2_poly_mulselfadj_fft(b00, logn); // b00 <- b00*adj(b00) + PQCLEAN_FALCON512_AVX2_poly_add(b00, t0, logn); // b00 <- g00 + memcpy(t0, b01, n * sizeof * b01); + PQCLEAN_FALCON512_AVX2_poly_muladj_fft(b01, b11, logn); // b01 <- b01*adj(b11) + PQCLEAN_FALCON512_AVX2_poly_add(b01, t1, logn); // b01 <- g01 + + PQCLEAN_FALCON512_AVX2_poly_mulselfadj_fft(b10, logn); // b10 <- b10*adj(b10) + memcpy(t1, b11, n * sizeof * b11); + PQCLEAN_FALCON512_AVX2_poly_mulselfadj_fft(t1, logn); // t1 <- b11*adj(b11) + PQCLEAN_FALCON512_AVX2_poly_add(b10, t1, logn); // b10 <- g11 + + /* + * We rename variables to make things clearer. The three elements + * of the Gram matrix uses the first 3*n slots of tmp[], followed + * by b11 and b01 (in that order). + */ + g00 = b00; + g01 = b01; + g11 = b10; + b01 = t0; + t0 = b01 + n; + t1 = t0 + n; + + /* + * Memory layout at that point: + * g00 g01 g11 b11 b01 t0 t1 + */ + + /* + * Set the target vector to [hm, 0] (hm is the hashed message). + */ + for (u = 0; u < n; u ++) { + t0[u] = fpr_of(hm[u]); + /* This is implicit. + t1[u] = fpr_zero; + */ + } + + /* + * Apply the lattice basis to obtain the real target + * vector (after normalization with regards to modulus). + */ + PQCLEAN_FALCON512_AVX2_FFT(t0, logn); + ni = fpr_inverse_of_q; + memcpy(t1, t0, n * sizeof * t0); + PQCLEAN_FALCON512_AVX2_poly_mul_fft(t1, b01, logn); + PQCLEAN_FALCON512_AVX2_poly_mulconst(t1, fpr_neg(ni), logn); + PQCLEAN_FALCON512_AVX2_poly_mul_fft(t0, b11, logn); + PQCLEAN_FALCON512_AVX2_poly_mulconst(t0, ni, logn); + + /* + * b01 and b11 can be discarded, so we move back (t0,t1). + * Memory layout is now: + * g00 g01 g11 t0 t1 + */ + memcpy(b11, t0, n * 2 * sizeof * t0); + t0 = g11 + n; + t1 = t0 + n; + + /* + * Apply sampling; result is written over (t0,t1). + */ + ffSampling_fft_dyntree(samp, samp_ctx, + t0, t1, g00, g01, g11, logn, t1 + n); + + /* + * We arrange the layout back to: + * b00 b01 b10 b11 t0 t1 + * + * We did not conserve the matrix basis, so we must recompute + * it now. + */ + b00 = tmp; + b01 = b00 + n; + b10 = b01 + n; + b11 = b10 + n; + memmove(b11 + n, t0, n * 2 * sizeof * t0); + t0 = b11 + n; + t1 = t0 + n; + smallints_to_fpr(b01, f, logn); + smallints_to_fpr(b00, g, logn); + smallints_to_fpr(b11, F, logn); + smallints_to_fpr(b10, G, logn); + PQCLEAN_FALCON512_AVX2_FFT(b01, logn); + PQCLEAN_FALCON512_AVX2_FFT(b00, logn); + PQCLEAN_FALCON512_AVX2_FFT(b11, logn); + PQCLEAN_FALCON512_AVX2_FFT(b10, logn); + PQCLEAN_FALCON512_AVX2_poly_neg(b01, logn); + PQCLEAN_FALCON512_AVX2_poly_neg(b11, logn); + tx = t1 + n; + ty = tx + n; + + /* + * Get the lattice point corresponding to that tiny vector. + */ + memcpy(tx, t0, n * sizeof * t0); + memcpy(ty, t1, n * sizeof * t1); + PQCLEAN_FALCON512_AVX2_poly_mul_fft(tx, b00, logn); + PQCLEAN_FALCON512_AVX2_poly_mul_fft(ty, b10, logn); + PQCLEAN_FALCON512_AVX2_poly_add(tx, ty, logn); + memcpy(ty, t0, n * sizeof * t0); + PQCLEAN_FALCON512_AVX2_poly_mul_fft(ty, b01, logn); + + memcpy(t0, tx, n * sizeof * tx); + PQCLEAN_FALCON512_AVX2_poly_mul_fft(t1, b11, logn); + PQCLEAN_FALCON512_AVX2_poly_add(t1, ty, logn); + PQCLEAN_FALCON512_AVX2_iFFT(t0, logn); + PQCLEAN_FALCON512_AVX2_iFFT(t1, logn); + + s1tmp = (int16_t *)tx; + sqn = 0; + ng = 0; + for (u = 0; u < n; u ++) { + int32_t z; + + z = (int32_t)hm[u] - (int32_t)fpr_rint(t0[u]); + sqn += (uint32_t)(z * z); + ng |= sqn; + s1tmp[u] = (int16_t)z; + } + sqn |= -(ng >> 31); + + /* + * With "normal" degrees (e.g. 512 or 1024), it is very + * improbable that the computed vector is not short enough; + * however, it may happen in practice for the very reduced + * versions (e.g. degree 16 or below). In that case, the caller + * will loop, and we must not write anything into s2[] because + * s2[] may overlap with the hashed message hm[] and we need + * hm[] for the next iteration. + */ + s2tmp = (int16_t *)tmp; + for (u = 0; u < n; u ++) { + s2tmp[u] = (int16_t) - fpr_rint(t1[u]); + } + if (PQCLEAN_FALCON512_AVX2_is_short_half(sqn, s2tmp, logn)) { + memcpy(s2, s2tmp, n * sizeof * s2); + memcpy(tmp, s1tmp, n * sizeof * s1tmp); + return 1; + } + return 0; +} + +/* + * Sample an integer value along a half-gaussian distribution centered + * on zero and standard deviation 1.8205, with a precision of 72 bits. + */ +int +PQCLEAN_FALCON512_AVX2_gaussian0_sampler(prng *p) { + + /* + * High words. + */ + static const union { + uint16_t u16[16]; + __m256i ymm[1]; + } rhi15 = { + { + 0x51FB, 0x2A69, 0x113E, 0x0568, + 0x014A, 0x003B, 0x0008, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + } + }; + + static const union { + uint64_t u64[20]; + __m256i ymm[5]; + } rlo57 = { + { + 0x1F42ED3AC391802, 0x12B181F3F7DDB82, + 0x1CDD0934829C1FF, 0x1754377C7994AE4, + 0x1846CAEF33F1F6F, 0x14AC754ED74BD5F, + 0x024DD542B776AE4, 0x1A1FFDC65AD63DA, + 0x01F80D88A7B6428, 0x001C3FDB2040C69, + 0x00012CF24D031FB, 0x00000949F8B091F, + 0x0000003665DA998, 0x00000000EBF6EBB, + 0x0000000002F5D7E, 0x000000000007098, + 0x0000000000000C6, 0x000000000000001, + 0x000000000000000, 0x000000000000000 + } + }; + + uint64_t lo; + unsigned hi; + __m256i xhi, rhi, gthi, eqhi, eqm; + __m256i xlo, gtlo0, gtlo1, gtlo2, gtlo3, gtlo4; + __m128i t, zt; + int r; + + /* + * Get a 72-bit random value and split it into a low part + * (57 bits) and a high part (15 bits) + */ + lo = prng_get_u64(p); + hi = prng_get_u8(p); + hi = (hi << 7) | (unsigned)(lo >> 57); + lo &= 0x1FFFFFFFFFFFFFF; + + /* + * Broadcast the high part and compare it with the relevant + * values. We need both a "greater than" and an "equal" + * comparisons. + */ + xhi = _mm256_broadcastw_epi16(_mm_cvtsi32_si128((int32_t)hi)); + rhi = _mm256_loadu_si256(&rhi15.ymm[0]); + gthi = _mm256_cmpgt_epi16(rhi, xhi); + eqhi = _mm256_cmpeq_epi16(rhi, xhi); + + /* + * The result is the number of 72-bit values (among the list of 19) + * which are greater than the 72-bit random value. We first count + * all non-zero 16-bit elements in the first eight of gthi. Such + * elements have value -1 or 0, so we first negate them. + */ + t = _mm_srli_epi16(_mm256_castsi256_si128(gthi), 15); + zt = _mm_setzero_si128(); + t = _mm_hadd_epi16(t, zt); + t = _mm_hadd_epi16(t, zt); + t = _mm_hadd_epi16(t, zt); + r = _mm_cvtsi128_si32(t); + + /* + * We must look at the low bits for all values for which the + * high bits are an "equal" match; values 8-18 all have the + * same high bits (0). + * On 32-bit systems, 'lo' really is two registers, requiring + * some extra code. + */ + xlo = _mm256_broadcastq_epi64(_mm_cvtsi64_si128(*(int64_t *)&lo)); + gtlo0 = _mm256_cmpgt_epi64(_mm256_loadu_si256(&rlo57.ymm[0]), xlo); + gtlo1 = _mm256_cmpgt_epi64(_mm256_loadu_si256(&rlo57.ymm[1]), xlo); + gtlo2 = _mm256_cmpgt_epi64(_mm256_loadu_si256(&rlo57.ymm[2]), xlo); + gtlo3 = _mm256_cmpgt_epi64(_mm256_loadu_si256(&rlo57.ymm[3]), xlo); + gtlo4 = _mm256_cmpgt_epi64(_mm256_loadu_si256(&rlo57.ymm[4]), xlo); + + /* + * Keep only comparison results that correspond to the non-zero + * elements in eqhi. + */ + gtlo0 = _mm256_and_si256(gtlo0, _mm256_cvtepi16_epi64( + _mm256_castsi256_si128(eqhi))); + gtlo1 = _mm256_and_si256(gtlo1, _mm256_cvtepi16_epi64( + _mm256_castsi256_si128(_mm256_bsrli_epi128(eqhi, 8)))); + eqm = _mm256_permute4x64_epi64(eqhi, 0xFF); + gtlo2 = _mm256_and_si256(gtlo2, eqm); + gtlo3 = _mm256_and_si256(gtlo3, eqm); + gtlo4 = _mm256_and_si256(gtlo4, eqm); + + /* + * Add all values to count the total number of "-1" elements. + * Since the first eight "high" words are all different, only + * one element (at most) in gtlo0:gtlo1 can be non-zero; however, + * if the high word of the random value is zero, then many + * elements of gtlo2:gtlo3:gtlo4 can be non-zero. + */ + gtlo0 = _mm256_or_si256(gtlo0, gtlo1); + gtlo0 = _mm256_add_epi64( + _mm256_add_epi64(gtlo0, gtlo2), + _mm256_add_epi64(gtlo3, gtlo4)); + t = _mm_add_epi64( + _mm256_castsi256_si128(gtlo0), + _mm256_extracti128_si256(gtlo0, 1)); + t = _mm_add_epi64(t, _mm_srli_si128(t, 8)); + r -= _mm_cvtsi128_si32(t); + + return r; + +} + +/* + * Sample a bit with probability exp(-x) for some x >= 0. + */ +static int +BerExp(prng *p, fpr x, fpr ccs) { + int s, i; + fpr r; + uint32_t sw, w; + uint64_t z; + + /* + * Reduce x modulo log(2): x = s*log(2) + r, with s an integer, + * and 0 <= r < log(2). Since x >= 0, we can use fpr_trunc(). + */ + s = (int)fpr_trunc(fpr_mul(x, fpr_inv_log2)); + r = fpr_sub(x, fpr_mul(fpr_of(s), fpr_log2)); + + /* + * It may happen (quite rarely) that s >= 64; if sigma = 1.2 + * (the minimum value for sigma), r = 0 and b = 1, then we get + * s >= 64 if the half-Gaussian produced a z >= 13, which happens + * with probability about 0.000000000230383991, which is + * approximatively equal to 2^(-32). In any case, if s >= 64, + * then BerExp will be non-zero with probability less than + * 2^(-64), so we can simply saturate s at 63. + */ + sw = (uint32_t)s; + sw ^= (sw ^ 63) & -((63 - sw) >> 31); + s = (int)sw; + + /* + * Compute exp(-r); we know that 0 <= r < log(2) at this point, so + * we can use fpr_expm_p63(), which yields a result scaled to 2^63. + * We scale it up to 2^64, then right-shift it by s bits because + * we really want exp(-x) = 2^(-s)*exp(-r). + * + * The "-1" operation makes sure that the value fits on 64 bits + * (i.e. if r = 0, we may get 2^64, and we prefer 2^64-1 in that + * case). The bias is negligible since fpr_expm_p63() only computes + * with 51 bits of precision or so. + */ + z = ((fpr_expm_p63(r, ccs) << 1) - 1) >> s; + + /* + * Sample a bit with probability exp(-x). Since x = s*log(2) + r, + * exp(-x) = 2^-s * exp(-r), we compare lazily exp(-x) with the + * PRNG output to limit its consumption, the sign of the difference + * yields the expected result. + */ + i = 64; + do { + i -= 8; + w = prng_get_u8(p) - ((uint32_t)(z >> i) & 0xFF); + } while (!w && i > 0); + return (int)(w >> 31); +} + +/* + * The sampler produces a random integer that follows a discrete Gaussian + * distribution, centered on mu, and with standard deviation sigma. The + * provided parameter isigma is equal to 1/sigma. + * + * The value of sigma MUST lie between 1 and 2 (i.e. isigma lies between + * 0.5 and 1); in Falcon, sigma should always be between 1.2 and 1.9. + */ +int +PQCLEAN_FALCON512_AVX2_sampler(void *ctx, fpr mu, fpr isigma) { + sampler_context *spc; + int s, z0, z, b; + fpr r, dss, ccs, x; + + spc = ctx; + + /* + * Center is mu. We compute mu = s + r where s is an integer + * and 0 <= r < 1. + */ + s = (int)fpr_floor(mu); + r = fpr_sub(mu, fpr_of(s)); + + /* + * dss = 1/(2*sigma^2) = 0.5*(isigma^2). + */ + dss = fpr_half(fpr_sqr(isigma)); + + /* + * ccs = sigma_min / sigma = sigma_min * isigma. + */ + ccs = fpr_mul(isigma, spc->sigma_min); + + /* + * We now need to sample on center r. + */ + for (;;) { + /* + * Sample z for a Gaussian distribution. Then get a + * random bit b to turn the sampling into a bimodal + * distribution: if b = 1, we use z+1, otherwise we + * use -z. We thus have two situations: + * + * - b = 1: z >= 1 and sampled against a Gaussian + * centered on 1. + * - b = 0: z <= 0 and sampled against a Gaussian + * centered on 0. + */ + z0 = PQCLEAN_FALCON512_AVX2_gaussian0_sampler(&spc->p); + b = (int)prng_get_u8(&spc->p) & 1; + z = b + ((b << 1) - 1) * z0; + + /* + * Rejection sampling. We want a Gaussian centered on r; + * but we sampled against a Gaussian centered on b (0 or + * 1). But we know that z is always in the range where + * our sampling distribution is greater than the Gaussian + * distribution, so rejection works. + * + * We got z with distribution: + * G(z) = exp(-((z-b)^2)/(2*sigma0^2)) + * We target distribution: + * S(z) = exp(-((z-r)^2)/(2*sigma^2)) + * Rejection sampling works by keeping the value z with + * probability S(z)/G(z), and starting again otherwise. + * This requires S(z) <= G(z), which is the case here. + * Thus, we simply need to keep our z with probability: + * P = exp(-x) + * where: + * x = ((z-r)^2)/(2*sigma^2) - ((z-b)^2)/(2*sigma0^2) + * + * Here, we scale up the Bernouilli distribution, which + * makes rejection more probable, but makes rejection + * rate sufficiently decorrelated from the Gaussian + * center and standard deviation that the whole sampler + * can be said to be constant-time. + */ + x = fpr_mul(fpr_sqr(fpr_sub(fpr_of(z), r)), dss); + x = fpr_sub(x, fpr_mul(fpr_of(z0 * z0), fpr_inv_2sqrsigma0)); + if (BerExp(&spc->p, x, ccs)) { + /* + * Rejection sampling was centered on r, but the + * actual center is mu = s + r. + */ + return s + z; + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_sign_tree(int16_t *sig, inner_shake256_context *rng, + const fpr *expanded_key, + const uint16_t *hm, unsigned logn, uint8_t *tmp) { + fpr *ftmp; + + ftmp = (fpr *)tmp; + for (;;) { + /* + * Signature produces short vectors s1 and s2. The + * signature is acceptable only if the aggregate vector + * s1,s2 is short; we must use the same bound as the + * verifier. + * + * If the signature is acceptable, then we return only s2 + * (the verifier recomputes s1 from s2, the hashed message, + * and the public key). + */ + sampler_context spc; + samplerZ samp; + void *samp_ctx; + + /* + * Normal sampling. We use a fast PRNG seeded from our + * SHAKE context ('rng'). + */ + if (logn == 10) { + spc.sigma_min = fpr_sigma_min_10; + } else { + spc.sigma_min = fpr_sigma_min_9; + } + PQCLEAN_FALCON512_AVX2_prng_init(&spc.p, rng); + samp = PQCLEAN_FALCON512_AVX2_sampler; + samp_ctx = &spc; + + /* + * Do the actual signature. + */ + if (do_sign_tree(samp, samp_ctx, sig, + expanded_key, hm, logn, ftmp)) { + break; + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_sign_dyn(int16_t *sig, inner_shake256_context *rng, + const int8_t *f, const int8_t *g, + const int8_t *F, const int8_t *G, + const uint16_t *hm, unsigned logn, uint8_t *tmp) { + fpr *ftmp; + + ftmp = (fpr *)tmp; + for (;;) { + /* + * Signature produces short vectors s1 and s2. The + * signature is acceptable only if the aggregate vector + * s1,s2 is short; we must use the same bound as the + * verifier. + * + * If the signature is acceptable, then we return only s2 + * (the verifier recomputes s1 from s2, the hashed message, + * and the public key). + */ + sampler_context spc; + samplerZ samp; + void *samp_ctx; + + /* + * Normal sampling. We use a fast PRNG seeded from our + * SHAKE context ('rng'). + */ + if (logn == 10) { + spc.sigma_min = fpr_sigma_min_10; + } else { + spc.sigma_min = fpr_sigma_min_9; + } + PQCLEAN_FALCON512_AVX2_prng_init(&spc.p, rng); + samp = PQCLEAN_FALCON512_AVX2_sampler; + samp_ctx = &spc; + + /* + * Do the actual signature. + */ + if (do_sign_dyn(samp, samp_ctx, sig, + f, g, F, G, hm, logn, ftmp)) { + break; + } + } +} diff --git a/crypto_sign/falcon/falcon-512/avx2/vrfy.c b/crypto_sign/falcon/falcon-512/avx2/vrfy.c new file mode 100644 index 00000000..16fa9576 --- /dev/null +++ b/crypto_sign/falcon/falcon-512/avx2/vrfy.c @@ -0,0 +1,853 @@ +#include "inner.h" + +/* + * Falcon signature verification. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +/* ===================================================================== */ +/* + * Constants for NTT. + * + * n = 2^logn (2 <= n <= 1024) + * phi = X^n + 1 + * q = 12289 + * q0i = -1/q mod 2^16 + * R = 2^16 mod q + * R2 = 2^32 mod q + */ + +#define Q 12289 +#define Q0I 12287 +#define R 4091 +#define R2 10952 + +/* + * Table for NTT, binary case: + * GMb[x] = R*(g^rev(x)) mod q + * where g = 7 (it is a 2048-th primitive root of 1 modulo q) + * and rev() is the bit-reversal function over 10 bits. + */ +static const uint16_t GMb[] = { + 4091, 7888, 11060, 11208, 6960, 4342, 6275, 9759, + 1591, 6399, 9477, 5266, 586, 5825, 7538, 9710, + 1134, 6407, 1711, 965, 7099, 7674, 3743, 6442, + 10414, 8100, 1885, 1688, 1364, 10329, 10164, 9180, + 12210, 6240, 997, 117, 4783, 4407, 1549, 7072, + 2829, 6458, 4431, 8877, 7144, 2564, 5664, 4042, + 12189, 432, 10751, 1237, 7610, 1534, 3983, 7863, + 2181, 6308, 8720, 6570, 4843, 1690, 14, 3872, + 5569, 9368, 12163, 2019, 7543, 2315, 4673, 7340, + 1553, 1156, 8401, 11389, 1020, 2967, 10772, 7045, + 3316, 11236, 5285, 11578, 10637, 10086, 9493, 6180, + 9277, 6130, 3323, 883, 10469, 489, 1502, 2851, + 11061, 9729, 2742, 12241, 4970, 10481, 10078, 1195, + 730, 1762, 3854, 2030, 5892, 10922, 9020, 5274, + 9179, 3604, 3782, 10206, 3180, 3467, 4668, 2446, + 7613, 9386, 834, 7703, 6836, 3403, 5351, 12276, + 3580, 1739, 10820, 9787, 10209, 4070, 12250, 8525, + 10401, 2749, 7338, 10574, 6040, 943, 9330, 1477, + 6865, 9668, 3585, 6633, 12145, 4063, 3684, 7680, + 8188, 6902, 3533, 9807, 6090, 727, 10099, 7003, + 6945, 1949, 9731, 10559, 6057, 378, 7871, 8763, + 8901, 9229, 8846, 4551, 9589, 11664, 7630, 8821, + 5680, 4956, 6251, 8388, 10156, 8723, 2341, 3159, + 1467, 5460, 8553, 7783, 2649, 2320, 9036, 6188, + 737, 3698, 4699, 5753, 9046, 3687, 16, 914, + 5186, 10531, 4552, 1964, 3509, 8436, 7516, 5381, + 10733, 3281, 7037, 1060, 2895, 7156, 8887, 5357, + 6409, 8197, 2962, 6375, 5064, 6634, 5625, 278, + 932, 10229, 8927, 7642, 351, 9298, 237, 5858, + 7692, 3146, 12126, 7586, 2053, 11285, 3802, 5204, + 4602, 1748, 11300, 340, 3711, 4614, 300, 10993, + 5070, 10049, 11616, 12247, 7421, 10707, 5746, 5654, + 3835, 5553, 1224, 8476, 9237, 3845, 250, 11209, + 4225, 6326, 9680, 12254, 4136, 2778, 692, 8808, + 6410, 6718, 10105, 10418, 3759, 7356, 11361, 8433, + 6437, 3652, 6342, 8978, 5391, 2272, 6476, 7416, + 8418, 10824, 11986, 5733, 876, 7030, 2167, 2436, + 3442, 9217, 8206, 4858, 5964, 2746, 7178, 1434, + 7389, 8879, 10661, 11457, 4220, 1432, 10832, 4328, + 8557, 1867, 9454, 2416, 3816, 9076, 686, 5393, + 2523, 4339, 6115, 619, 937, 2834, 7775, 3279, + 2363, 7488, 6112, 5056, 824, 10204, 11690, 1113, + 2727, 9848, 896, 2028, 5075, 2654, 10464, 7884, + 12169, 5434, 3070, 6400, 9132, 11672, 12153, 4520, + 1273, 9739, 11468, 9937, 10039, 9720, 2262, 9399, + 11192, 315, 4511, 1158, 6061, 6751, 11865, 357, + 7367, 4550, 983, 8534, 8352, 10126, 7530, 9253, + 4367, 5221, 3999, 8777, 3161, 6990, 4130, 11652, + 3374, 11477, 1753, 292, 8681, 2806, 10378, 12188, + 5800, 11811, 3181, 1988, 1024, 9340, 2477, 10928, + 4582, 6750, 3619, 5503, 5233, 2463, 8470, 7650, + 7964, 6395, 1071, 1272, 3474, 11045, 3291, 11344, + 8502, 9478, 9837, 1253, 1857, 6233, 4720, 11561, + 6034, 9817, 3339, 1797, 2879, 6242, 5200, 2114, + 7962, 9353, 11363, 5475, 6084, 9601, 4108, 7323, + 10438, 9471, 1271, 408, 6911, 3079, 360, 8276, + 11535, 9156, 9049, 11539, 850, 8617, 784, 7919, + 8334, 12170, 1846, 10213, 12184, 7827, 11903, 5600, + 9779, 1012, 721, 2784, 6676, 6552, 5348, 4424, + 6816, 8405, 9959, 5150, 2356, 5552, 5267, 1333, + 8801, 9661, 7308, 5788, 4910, 909, 11613, 4395, + 8238, 6686, 4302, 3044, 2285, 12249, 1963, 9216, + 4296, 11918, 695, 4371, 9793, 4884, 2411, 10230, + 2650, 841, 3890, 10231, 7248, 8505, 11196, 6688, + 4059, 6060, 3686, 4722, 11853, 5816, 7058, 6868, + 11137, 7926, 4894, 12284, 4102, 3908, 3610, 6525, + 7938, 7982, 11977, 6755, 537, 4562, 1623, 8227, + 11453, 7544, 906, 11816, 9548, 10858, 9703, 2815, + 11736, 6813, 6979, 819, 8903, 6271, 10843, 348, + 7514, 8339, 6439, 694, 852, 5659, 2781, 3716, + 11589, 3024, 1523, 8659, 4114, 10738, 3303, 5885, + 2978, 7289, 11884, 9123, 9323, 11830, 98, 2526, + 2116, 4131, 11407, 1844, 3645, 3916, 8133, 2224, + 10871, 8092, 9651, 5989, 7140, 8480, 1670, 159, + 10923, 4918, 128, 7312, 725, 9157, 5006, 6393, + 3494, 6043, 10972, 6181, 11838, 3423, 10514, 7668, + 3693, 6658, 6905, 11953, 10212, 11922, 9101, 8365, + 5110, 45, 2400, 1921, 4377, 2720, 1695, 51, + 2808, 650, 1896, 9997, 9971, 11980, 8098, 4833, + 4135, 4257, 5838, 4765, 10985, 11532, 590, 12198, + 482, 12173, 2006, 7064, 10018, 3912, 12016, 10519, + 11362, 6954, 2210, 284, 5413, 6601, 3865, 10339, + 11188, 6231, 517, 9564, 11281, 3863, 1210, 4604, + 8160, 11447, 153, 7204, 5763, 5089, 9248, 12154, + 11748, 1354, 6672, 179, 5532, 2646, 5941, 12185, + 862, 3158, 477, 7279, 5678, 7914, 4254, 302, + 2893, 10114, 6890, 9560, 9647, 11905, 4098, 9824, + 10269, 1353, 10715, 5325, 6254, 3951, 1807, 6449, + 5159, 1308, 8315, 3404, 1877, 1231, 112, 6398, + 11724, 12272, 7286, 1459, 12274, 9896, 3456, 800, + 1397, 10678, 103, 7420, 7976, 936, 764, 632, + 7996, 8223, 8445, 7758, 10870, 9571, 2508, 1946, + 6524, 10158, 1044, 4338, 2457, 3641, 1659, 4139, + 4688, 9733, 11148, 3946, 2082, 5261, 2036, 11850, + 7636, 12236, 5366, 2380, 1399, 7720, 2100, 3217, + 10912, 8898, 7578, 11995, 2791, 1215, 3355, 2711, + 2267, 2004, 8568, 10176, 3214, 2337, 1750, 4729, + 4997, 7415, 6315, 12044, 4374, 7157, 4844, 211, + 8003, 10159, 9290, 11481, 1735, 2336, 5793, 9875, + 8192, 986, 7527, 1401, 870, 3615, 8465, 2756, + 9770, 2034, 10168, 3264, 6132, 54, 2880, 4763, + 11805, 3074, 8286, 9428, 4881, 6933, 1090, 10038, + 2567, 708, 893, 6465, 4962, 10024, 2090, 5718, + 10743, 780, 4733, 4623, 2134, 2087, 4802, 884, + 5372, 5795, 5938, 4333, 6559, 7549, 5269, 10664, + 4252, 3260, 5917, 10814, 5768, 9983, 8096, 7791, + 6800, 7491, 6272, 1907, 10947, 6289, 11803, 6032, + 11449, 1171, 9201, 7933, 2479, 7970, 11337, 7062, + 8911, 6728, 6542, 8114, 8828, 6595, 3545, 4348, + 4610, 2205, 6999, 8106, 5560, 10390, 9321, 2499, + 2413, 7272, 6881, 10582, 9308, 9437, 3554, 3326, + 5991, 11969, 3415, 12283, 9838, 12063, 4332, 7830, + 11329, 6605, 12271, 2044, 11611, 7353, 11201, 11582, + 3733, 8943, 9978, 1627, 7168, 3935, 5050, 2762, + 7496, 10383, 755, 1654, 12053, 4952, 10134, 4394, + 6592, 7898, 7497, 8904, 12029, 3581, 10748, 5674, + 10358, 4901, 7414, 8771, 710, 6764, 8462, 7193, + 5371, 7274, 11084, 290, 7864, 6827, 11822, 2509, + 6578, 4026, 5807, 1458, 5721, 5762, 4178, 2105, + 11621, 4852, 8897, 2856, 11510, 9264, 2520, 8776, + 7011, 2647, 1898, 7039, 5950, 11163, 5488, 6277, + 9182, 11456, 633, 10046, 11554, 5633, 9587, 2333, + 7008, 7084, 5047, 7199, 9865, 8997, 569, 6390, + 10845, 9679, 8268, 11472, 4203, 1997, 2, 9331, + 162, 6182, 2000, 3649, 9792, 6363, 7557, 6187, + 8510, 9935, 5536, 9019, 3706, 12009, 1452, 3067, + 5494, 9692, 4865, 6019, 7106, 9610, 4588, 10165, + 6261, 5887, 2652, 10172, 1580, 10379, 4638, 9949 +}; + +/* + * Table for inverse NTT, binary case: + * iGMb[x] = R*((1/g)^rev(x)) mod q + * Since g = 7, 1/g = 8778 mod 12289. + */ +static const uint16_t iGMb[] = { + 4091, 4401, 1081, 1229, 2530, 6014, 7947, 5329, + 2579, 4751, 6464, 11703, 7023, 2812, 5890, 10698, + 3109, 2125, 1960, 10925, 10601, 10404, 4189, 1875, + 5847, 8546, 4615, 5190, 11324, 10578, 5882, 11155, + 8417, 12275, 10599, 7446, 5719, 3569, 5981, 10108, + 4426, 8306, 10755, 4679, 11052, 1538, 11857, 100, + 8247, 6625, 9725, 5145, 3412, 7858, 5831, 9460, + 5217, 10740, 7882, 7506, 12172, 11292, 6049, 79, + 13, 6938, 8886, 5453, 4586, 11455, 2903, 4676, + 9843, 7621, 8822, 9109, 2083, 8507, 8685, 3110, + 7015, 3269, 1367, 6397, 10259, 8435, 10527, 11559, + 11094, 2211, 1808, 7319, 48, 9547, 2560, 1228, + 9438, 10787, 11800, 1820, 11406, 8966, 6159, 3012, + 6109, 2796, 2203, 1652, 711, 7004, 1053, 8973, + 5244, 1517, 9322, 11269, 900, 3888, 11133, 10736, + 4949, 7616, 9974, 4746, 10270, 126, 2921, 6720, + 6635, 6543, 1582, 4868, 42, 673, 2240, 7219, + 1296, 11989, 7675, 8578, 11949, 989, 10541, 7687, + 7085, 8487, 1004, 10236, 4703, 163, 9143, 4597, + 6431, 12052, 2991, 11938, 4647, 3362, 2060, 11357, + 12011, 6664, 5655, 7225, 5914, 9327, 4092, 5880, + 6932, 3402, 5133, 9394, 11229, 5252, 9008, 1556, + 6908, 4773, 3853, 8780, 10325, 7737, 1758, 7103, + 11375, 12273, 8602, 3243, 6536, 7590, 8591, 11552, + 6101, 3253, 9969, 9640, 4506, 3736, 6829, 10822, + 9130, 9948, 3566, 2133, 3901, 6038, 7333, 6609, + 3468, 4659, 625, 2700, 7738, 3443, 3060, 3388, + 3526, 4418, 11911, 6232, 1730, 2558, 10340, 5344, + 5286, 2190, 11562, 6199, 2482, 8756, 5387, 4101, + 4609, 8605, 8226, 144, 5656, 8704, 2621, 5424, + 10812, 2959, 11346, 6249, 1715, 4951, 9540, 1888, + 3764, 39, 8219, 2080, 2502, 1469, 10550, 8709, + 5601, 1093, 3784, 5041, 2058, 8399, 11448, 9639, + 2059, 9878, 7405, 2496, 7918, 11594, 371, 7993, + 3073, 10326, 40, 10004, 9245, 7987, 5603, 4051, + 7894, 676, 11380, 7379, 6501, 4981, 2628, 3488, + 10956, 7022, 6737, 9933, 7139, 2330, 3884, 5473, + 7865, 6941, 5737, 5613, 9505, 11568, 11277, 2510, + 6689, 386, 4462, 105, 2076, 10443, 119, 3955, + 4370, 11505, 3672, 11439, 750, 3240, 3133, 754, + 4013, 11929, 9210, 5378, 11881, 11018, 2818, 1851, + 4966, 8181, 2688, 6205, 6814, 926, 2936, 4327, + 10175, 7089, 6047, 9410, 10492, 8950, 2472, 6255, + 728, 7569, 6056, 10432, 11036, 2452, 2811, 3787, + 945, 8998, 1244, 8815, 11017, 11218, 5894, 4325, + 4639, 3819, 9826, 7056, 6786, 8670, 5539, 7707, + 1361, 9812, 2949, 11265, 10301, 9108, 478, 6489, + 101, 1911, 9483, 3608, 11997, 10536, 812, 8915, + 637, 8159, 5299, 9128, 3512, 8290, 7068, 7922, + 3036, 4759, 2163, 3937, 3755, 11306, 7739, 4922, + 11932, 424, 5538, 6228, 11131, 7778, 11974, 1097, + 2890, 10027, 2569, 2250, 2352, 821, 2550, 11016, + 7769, 136, 617, 3157, 5889, 9219, 6855, 120, + 4405, 1825, 9635, 7214, 10261, 11393, 2441, 9562, + 11176, 599, 2085, 11465, 7233, 6177, 4801, 9926, + 9010, 4514, 9455, 11352, 11670, 6174, 7950, 9766, + 6896, 11603, 3213, 8473, 9873, 2835, 10422, 3732, + 7961, 1457, 10857, 8069, 832, 1628, 3410, 4900, + 10855, 5111, 9543, 6325, 7431, 4083, 3072, 8847, + 9853, 10122, 5259, 11413, 6556, 303, 1465, 3871, + 4873, 5813, 10017, 6898, 3311, 5947, 8637, 5852, + 3856, 928, 4933, 8530, 1871, 2184, 5571, 5879, + 3481, 11597, 9511, 8153, 35, 2609, 5963, 8064, + 1080, 12039, 8444, 3052, 3813, 11065, 6736, 8454, + 2340, 7651, 1910, 10709, 2117, 9637, 6402, 6028, + 2124, 7701, 2679, 5183, 6270, 7424, 2597, 6795, + 9222, 10837, 280, 8583, 3270, 6753, 2354, 3779, + 6102, 4732, 5926, 2497, 8640, 10289, 6107, 12127, + 2958, 12287, 10292, 8086, 817, 4021, 2610, 1444, + 5899, 11720, 3292, 2424, 5090, 7242, 5205, 5281, + 9956, 2702, 6656, 735, 2243, 11656, 833, 3107, + 6012, 6801, 1126, 6339, 5250, 10391, 9642, 5278, + 3513, 9769, 3025, 779, 9433, 3392, 7437, 668, + 10184, 8111, 6527, 6568, 10831, 6482, 8263, 5711, + 9780, 467, 5462, 4425, 11999, 1205, 5015, 6918, + 5096, 3827, 5525, 11579, 3518, 4875, 7388, 1931, + 6615, 1541, 8708, 260, 3385, 4792, 4391, 5697, + 7895, 2155, 7337, 236, 10635, 11534, 1906, 4793, + 9527, 7239, 8354, 5121, 10662, 2311, 3346, 8556, + 707, 1088, 4936, 678, 10245, 18, 5684, 960, + 4459, 7957, 226, 2451, 6, 8874, 320, 6298, + 8963, 8735, 2852, 2981, 1707, 5408, 5017, 9876, + 9790, 2968, 1899, 6729, 4183, 5290, 10084, 7679, + 7941, 8744, 5694, 3461, 4175, 5747, 5561, 3378, + 5227, 952, 4319, 9810, 4356, 3088, 11118, 840, + 6257, 486, 6000, 1342, 10382, 6017, 4798, 5489, + 4498, 4193, 2306, 6521, 1475, 6372, 9029, 8037, + 1625, 7020, 4740, 5730, 7956, 6351, 6494, 6917, + 11405, 7487, 10202, 10155, 7666, 7556, 11509, 1546, + 6571, 10199, 2265, 7327, 5824, 11396, 11581, 9722, + 2251, 11199, 5356, 7408, 2861, 4003, 9215, 484, + 7526, 9409, 12235, 6157, 9025, 2121, 10255, 2519, + 9533, 3824, 8674, 11419, 10888, 4762, 11303, 4097, + 2414, 6496, 9953, 10554, 808, 2999, 2130, 4286, + 12078, 7445, 5132, 7915, 245, 5974, 4874, 7292, + 7560, 10539, 9952, 9075, 2113, 3721, 10285, 10022, + 9578, 8934, 11074, 9498, 294, 4711, 3391, 1377, + 9072, 10189, 4569, 10890, 9909, 6923, 53, 4653, + 439, 10253, 7028, 10207, 8343, 1141, 2556, 7601, + 8150, 10630, 8648, 9832, 7951, 11245, 2131, 5765, + 10343, 9781, 2718, 1419, 4531, 3844, 4066, 4293, + 11657, 11525, 11353, 4313, 4869, 12186, 1611, 10892, + 11489, 8833, 2393, 15, 10830, 5003, 17, 565, + 5891, 12177, 11058, 10412, 8885, 3974, 10981, 7130, + 5840, 10482, 8338, 6035, 6964, 1574, 10936, 2020, + 2465, 8191, 384, 2642, 2729, 5399, 2175, 9396, + 11987, 8035, 4375, 6611, 5010, 11812, 9131, 11427, + 104, 6348, 9643, 6757, 12110, 5617, 10935, 541, + 135, 3041, 7200, 6526, 5085, 12136, 842, 4129, + 7685, 11079, 8426, 1008, 2725, 11772, 6058, 1101, + 1950, 8424, 5688, 6876, 12005, 10079, 5335, 927, + 1770, 273, 8377, 2271, 5225, 10283, 116, 11807, + 91, 11699, 757, 1304, 7524, 6451, 8032, 8154, + 7456, 4191, 309, 2318, 2292, 10393, 11639, 9481, + 12238, 10594, 9569, 7912, 10368, 9889, 12244, 7179, + 3924, 3188, 367, 2077, 336, 5384, 5631, 8596, + 4621, 1775, 8866, 451, 6108, 1317, 6246, 8795, + 5896, 7283, 3132, 11564, 4977, 12161, 7371, 1366, + 12130, 10619, 3809, 5149, 6300, 2638, 4197, 1418, + 10065, 4156, 8373, 8644, 10445, 882, 8158, 10173, + 9763, 12191, 459, 2966, 3166, 405, 5000, 9311, + 6404, 8986, 1551, 8175, 3630, 10766, 9265, 700, + 8573, 9508, 6630, 11437, 11595, 5850, 3950, 4775, + 11941, 1446, 6018, 3386, 11470, 5310, 5476, 553, + 9474, 2586, 1431, 2741, 473, 11383, 4745, 836, + 4062, 10666, 7727, 11752, 5534, 312, 4307, 4351, + 5764, 8679, 8381, 8187, 5, 7395, 4363, 1152, + 5421, 5231, 6473, 436, 7567, 8603, 6229, 8230 +}; + +/* + * Reduce a small signed integer modulo q. The source integer MUST + * be between -q/2 and +q/2. + */ +static inline uint32_t +mq_conv_small(int x) { + /* + * If x < 0, the cast to uint32_t will set the high bit to 1. + */ + uint32_t y; + + y = (uint32_t)x; + y += Q & -(y >> 31); + return y; +} + +/* + * Addition modulo q. Operands must be in the 0..q-1 range. + */ +static inline uint32_t +mq_add(uint32_t x, uint32_t y) { + /* + * We compute x + y - q. If the result is negative, then the + * high bit will be set, and 'd >> 31' will be equal to 1; + * thus '-(d >> 31)' will be an all-one pattern. Otherwise, + * it will be an all-zero pattern. In other words, this + * implements a conditional addition of q. + */ + uint32_t d; + + d = x + y - Q; + d += Q & -(d >> 31); + return d; +} + +/* + * Subtraction modulo q. Operands must be in the 0..q-1 range. + */ +static inline uint32_t +mq_sub(uint32_t x, uint32_t y) { + /* + * As in mq_add(), we use a conditional addition to ensure the + * result is in the 0..q-1 range. + */ + uint32_t d; + + d = x - y; + d += Q & -(d >> 31); + return d; +} + +/* + * Division by 2 modulo q. Operand must be in the 0..q-1 range. + */ +static inline uint32_t +mq_rshift1(uint32_t x) { + x += Q & -(x & 1); + return (x >> 1); +} + +/* + * Montgomery multiplication modulo q. If we set R = 2^16 mod q, then + * this function computes: x * y / R mod q + * Operands must be in the 0..q-1 range. + */ +static inline uint32_t +mq_montymul(uint32_t x, uint32_t y) { + uint32_t z, w; + + /* + * We compute x*y + k*q with a value of k chosen so that the 16 + * low bits of the result are 0. We can then shift the value. + * After the shift, result may still be larger than q, but it + * will be lower than 2*q, so a conditional subtraction works. + */ + + z = x * y; + w = ((z * Q0I) & 0xFFFF) * Q; + + /* + * When adding z and w, the result will have its low 16 bits + * equal to 0. Since x, y and z are lower than q, the sum will + * be no more than (2^15 - 1) * q + (q - 1)^2, which will + * fit on 29 bits. + */ + z = (z + w) >> 16; + + /* + * After the shift, analysis shows that the value will be less + * than 2q. We do a subtraction then conditional subtraction to + * ensure the result is in the expected range. + */ + z -= Q; + z += Q & -(z >> 31); + return z; +} + +/* + * Montgomery squaring (computes (x^2)/R). + */ +static inline uint32_t +mq_montysqr(uint32_t x) { + return mq_montymul(x, x); +} + +/* + * Divide x by y modulo q = 12289. + */ +static inline uint32_t +mq_div_12289(uint32_t x, uint32_t y) { + /* + * We invert y by computing y^(q-2) mod q. + * + * We use the following addition chain for exponent e = 12287: + * + * e0 = 1 + * e1 = 2 * e0 = 2 + * e2 = e1 + e0 = 3 + * e3 = e2 + e1 = 5 + * e4 = 2 * e3 = 10 + * e5 = 2 * e4 = 20 + * e6 = 2 * e5 = 40 + * e7 = 2 * e6 = 80 + * e8 = 2 * e7 = 160 + * e9 = e8 + e2 = 163 + * e10 = e9 + e8 = 323 + * e11 = 2 * e10 = 646 + * e12 = 2 * e11 = 1292 + * e13 = e12 + e9 = 1455 + * e14 = 2 * e13 = 2910 + * e15 = 2 * e14 = 5820 + * e16 = e15 + e10 = 6143 + * e17 = 2 * e16 = 12286 + * e18 = e17 + e0 = 12287 + * + * Additions on exponents are converted to Montgomery + * multiplications. We define all intermediate results as so + * many local variables, and let the C compiler work out which + * must be kept around. + */ + uint32_t y0, y1, y2, y3, y4, y5, y6, y7, y8, y9; + uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18; + + y0 = mq_montymul(y, R2); + y1 = mq_montysqr(y0); + y2 = mq_montymul(y1, y0); + y3 = mq_montymul(y2, y1); + y4 = mq_montysqr(y3); + y5 = mq_montysqr(y4); + y6 = mq_montysqr(y5); + y7 = mq_montysqr(y6); + y8 = mq_montysqr(y7); + y9 = mq_montymul(y8, y2); + y10 = mq_montymul(y9, y8); + y11 = mq_montysqr(y10); + y12 = mq_montysqr(y11); + y13 = mq_montymul(y12, y9); + y14 = mq_montysqr(y13); + y15 = mq_montysqr(y14); + y16 = mq_montymul(y15, y10); + y17 = mq_montysqr(y16); + y18 = mq_montymul(y17, y0); + + /* + * Final multiplication with x, which is not in Montgomery + * representation, computes the correct division result. + */ + return mq_montymul(y18, x); +} + +/* + * Compute NTT on a ring element. + */ +static void +mq_NTT(uint16_t *a, unsigned logn) { + size_t n, t, m; + + n = (size_t)1 << logn; + t = n; + for (m = 1; m < n; m <<= 1) { + size_t ht, i, j1; + + ht = t >> 1; + for (i = 0, j1 = 0; i < m; i ++, j1 += t) { + size_t j, j2; + uint32_t s; + + s = GMb[m + i]; + j2 = j1 + ht; + for (j = j1; j < j2; j ++) { + uint32_t u, v; + + u = a[j]; + v = mq_montymul(a[j + ht], s); + a[j] = (uint16_t)mq_add(u, v); + a[j + ht] = (uint16_t)mq_sub(u, v); + } + } + t = ht; + } +} + +/* + * Compute the inverse NTT on a ring element, binary case. + */ +static void +mq_iNTT(uint16_t *a, unsigned logn) { + size_t n, t, m; + uint32_t ni; + + n = (size_t)1 << logn; + t = 1; + m = n; + while (m > 1) { + size_t hm, dt, i, j1; + + hm = m >> 1; + dt = t << 1; + for (i = 0, j1 = 0; i < hm; i ++, j1 += dt) { + size_t j, j2; + uint32_t s; + + j2 = j1 + t; + s = iGMb[hm + i]; + for (j = j1; j < j2; j ++) { + uint32_t u, v, w; + + u = a[j]; + v = a[j + t]; + a[j] = (uint16_t)mq_add(u, v); + w = mq_sub(u, v); + a[j + t] = (uint16_t) + mq_montymul(w, s); + } + } + t = dt; + m = hm; + } + + /* + * To complete the inverse NTT, we must now divide all values by + * n (the vector size). We thus need the inverse of n, i.e. we + * need to divide 1 by 2 logn times. But we also want it in + * Montgomery representation, i.e. we also want to multiply it + * by R = 2^16. In the common case, this should be a simple right + * shift. The loop below is generic and works also in corner cases; + * its computation time is negligible. + */ + ni = R; + for (m = n; m > 1; m >>= 1) { + ni = mq_rshift1(ni); + } + for (m = 0; m < n; m ++) { + a[m] = (uint16_t)mq_montymul(a[m], ni); + } +} + +/* + * Convert a polynomial (mod q) to Montgomery representation. + */ +static void +mq_poly_tomonty(uint16_t *f, unsigned logn) { + size_t u, n; + + n = (size_t)1 << logn; + for (u = 0; u < n; u ++) { + f[u] = (uint16_t)mq_montymul(f[u], R2); + } +} + +/* + * Multiply two polynomials together (NTT representation, and using + * a Montgomery multiplication). Result f*g is written over f. + */ +static void +mq_poly_montymul_ntt(uint16_t *f, const uint16_t *g, unsigned logn) { + size_t u, n; + + n = (size_t)1 << logn; + for (u = 0; u < n; u ++) { + f[u] = (uint16_t)mq_montymul(f[u], g[u]); + } +} + +/* + * Subtract polynomial g from polynomial f. + */ +static void +mq_poly_sub(uint16_t *f, const uint16_t *g, unsigned logn) { + size_t u, n; + + n = (size_t)1 << logn; + for (u = 0; u < n; u ++) { + f[u] = (uint16_t)mq_sub(f[u], g[u]); + } +} + +/* ===================================================================== */ + +/* see inner.h */ +void +PQCLEAN_FALCON512_AVX2_to_ntt_monty(uint16_t *h, unsigned logn) { + mq_NTT(h, logn); + mq_poly_tomonty(h, logn); +} + +/* see inner.h */ +int +PQCLEAN_FALCON512_AVX2_verify_raw(const uint16_t *c0, const int16_t *s2, + const uint16_t *h, unsigned logn, uint8_t *tmp) { + size_t u, n; + uint16_t *tt; + + n = (size_t)1 << logn; + tt = (uint16_t *)tmp; + + /* + * Reduce s2 elements modulo q ([0..q-1] range). + */ + for (u = 0; u < n; u ++) { + uint32_t w; + + w = (uint32_t)s2[u]; + w += Q & -(w >> 31); + tt[u] = (uint16_t)w; + } + + /* + * Compute -s1 = s2*h - c0 mod phi mod q (in tt[]). + */ + mq_NTT(tt, logn); + mq_poly_montymul_ntt(tt, h, logn); + mq_iNTT(tt, logn); + mq_poly_sub(tt, c0, logn); + + /* + * Normalize -s1 elements into the [-q/2..q/2] range. + */ + for (u = 0; u < n; u ++) { + int32_t w; + + w = (int32_t)tt[u]; + w -= (int32_t)(Q & -(((Q >> 1) - (uint32_t)w) >> 31)); + ((int16_t *)tt)[u] = (int16_t)w; + } + + /* + * Signature is valid if and only if the aggregate (-s1,s2) vector + * is short enough. + */ + return PQCLEAN_FALCON512_AVX2_is_short((int16_t *)tt, s2, logn); +} + +/* see inner.h */ +int +PQCLEAN_FALCON512_AVX2_compute_public(uint16_t *h, + const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp) { + size_t u, n; + uint16_t *tt; + + n = (size_t)1 << logn; + tt = (uint16_t *)tmp; + for (u = 0; u < n; u ++) { + tt[u] = (uint16_t)mq_conv_small(f[u]); + h[u] = (uint16_t)mq_conv_small(g[u]); + } + mq_NTT(h, logn); + mq_NTT(tt, logn); + for (u = 0; u < n; u ++) { + if (tt[u] == 0) { + return 0; + } + h[u] = (uint16_t)mq_div_12289(h[u], tt[u]); + } + mq_iNTT(h, logn); + return 1; +} + +/* see inner.h */ +int +PQCLEAN_FALCON512_AVX2_complete_private(int8_t *G, + const int8_t *f, const int8_t *g, const int8_t *F, + unsigned logn, uint8_t *tmp) { + size_t u, n; + uint16_t *t1, *t2; + + n = (size_t)1 << logn; + t1 = (uint16_t *)tmp; + t2 = t1 + n; + for (u = 0; u < n; u ++) { + t1[u] = (uint16_t)mq_conv_small(g[u]); + t2[u] = (uint16_t)mq_conv_small(F[u]); + } + mq_NTT(t1, logn); + mq_NTT(t2, logn); + mq_poly_tomonty(t1, logn); + mq_poly_montymul_ntt(t1, t2, logn); + for (u = 0; u < n; u ++) { + t2[u] = (uint16_t)mq_conv_small(f[u]); + } + mq_NTT(t2, logn); + for (u = 0; u < n; u ++) { + if (t2[u] == 0) { + return 0; + } + t1[u] = (uint16_t)mq_div_12289(t1[u], t2[u]); + } + mq_iNTT(t1, logn); + for (u = 0; u < n; u ++) { + uint32_t w; + int32_t gi; + + w = t1[u]; + w -= (Q & ~ -((w - (Q >> 1)) >> 31)); + gi = *(int32_t *)&w; + if (gi < -127 || gi > +127) { + return 0; + } + G[u] = (int8_t)gi; + } + return 1; +} + +/* see inner.h */ +int +PQCLEAN_FALCON512_AVX2_is_invertible( + const int16_t *s2, unsigned logn, uint8_t *tmp) { + size_t u, n; + uint16_t *tt; + uint32_t r; + + n = (size_t)1 << logn; + tt = (uint16_t *)tmp; + for (u = 0; u < n; u ++) { + uint32_t w; + + w = (uint32_t)s2[u]; + w += Q & -(w >> 31); + tt[u] = (uint16_t)w; + } + mq_NTT(tt, logn); + r = 0; + for (u = 0; u < n; u ++) { + r |= (uint32_t)(tt[u] - 1); + } + return (int)(1u - (r >> 31)); +} + +/* see inner.h */ +int +PQCLEAN_FALCON512_AVX2_verify_recover(uint16_t *h, + const uint16_t *c0, const int16_t *s1, const int16_t *s2, + unsigned logn, uint8_t *tmp) { + size_t u, n; + uint16_t *tt; + uint32_t r; + + n = (size_t)1 << logn; + + /* + * Reduce elements of s1 and s2 modulo q; then write s2 into tt[] + * and c0 - s1 into h[]. + */ + tt = (uint16_t *)tmp; + for (u = 0; u < n; u ++) { + uint32_t w; + + w = (uint32_t)s2[u]; + w += Q & -(w >> 31); + tt[u] = (uint16_t)w; + + w = (uint32_t)s1[u]; + w += Q & -(w >> 31); + w = mq_sub(c0[u], w); + h[u] = (uint16_t)w; + } + + /* + * Compute h = (c0 - s1) / s2. If one of the coefficients of s2 + * is zero (in NTT representation) then the operation fails. We + * keep that information into a flag so that we do not deviate + * from strict constant-time processing; if all coefficients of + * s2 are non-zero, then the high bit of r will be zero. + */ + mq_NTT(tt, logn); + mq_NTT(h, logn); + r = 0; + for (u = 0; u < n; u ++) { + r |= (uint32_t)(tt[u] - 1); + h[u] = (uint16_t)mq_div_12289(h[u], tt[u]); + } + mq_iNTT(h, logn); + + /* + * Signature is acceptable if and only if it is short enough, + * and s2 was invertible mod phi mod q. The caller must still + * check that the rebuilt public key matches the expected + * value (e.g. through a hash). + */ + r = ~r & (uint32_t) - PQCLEAN_FALCON512_AVX2_is_short(s1, s2, logn); + return (int)(r >> 31); +} + +/* see inner.h */ +int +PQCLEAN_FALCON512_AVX2_count_nttzero(const int16_t *sig, unsigned logn, uint8_t *tmp) { + uint16_t *s2; + size_t u, n; + uint32_t r; + + n = (size_t)1 << logn; + s2 = (uint16_t *)tmp; + for (u = 0; u < n; u ++) { + uint32_t w; + + w = (uint32_t)sig[u]; + w += Q & -(w >> 31); + s2[u] = (uint16_t)w; + } + mq_NTT(s2, logn); + r = 0; + for (u = 0; u < n; u ++) { + uint32_t w; + + w = (uint32_t)s2[u] - 1u; + r += (w >> 31); + } + return (int)r; +} diff --git a/crypto_sign/falcon/falcon-512/clean/LICENSE b/crypto_sign/falcon/falcon-512/clean/LICENSE new file mode 100644 index 00000000..12c7b56c --- /dev/null +++ b/crypto_sign/falcon/falcon-512/clean/LICENSE @@ -0,0 +1,24 @@ +\ +MIT License + +Copyright (c) 2017-2019 Falcon Project + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/crypto_sign/falcon/falcon-512/clean/Makefile.Microsoft_nmake b/crypto_sign/falcon/falcon-512/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..af9621a5 --- /dev/null +++ b/crypto_sign/falcon/falcon-512/clean/Makefile.Microsoft_nmake @@ -0,0 +1,23 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libfalcon-512_clean.lib +OBJECTS=codec.obj common.obj fft.obj fpr.obj keygen.obj pqclean.obj rng.obj sign.obj vrfy.obj + +# Warning C4146 is raised when a unary minus operator is applied to an +# unsigned type; this has nonetheless been standard and portable for as +# long as there has been a C standard, and we do that a lot, especially +# for constant-time computations. Thus, we disable that spurious warning. +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /wd4146 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/falcon/falcon-512/clean/api.h b/crypto_sign/falcon/falcon-512/clean/api.h new file mode 100644 index 00000000..a2e5e1d5 --- /dev/null +++ b/crypto_sign/falcon/falcon-512/clean/api.h @@ -0,0 +1,80 @@ +#ifndef PQCLEAN_FALCON512_CLEAN_API_H +#define PQCLEAN_FALCON512_CLEAN_API_H + +#include +#include + +#define PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES 1281 +#define PQCLEAN_FALCON512_CLEAN_CRYPTO_PUBLICKEYBYTES 897 +#define PQCLEAN_FALCON512_CLEAN_CRYPTO_BYTES 690 + +#define PQCLEAN_FALCON512_CLEAN_CRYPTO_ALGNAME "Falcon-512" + +/* + * Generate a new key pair. Public key goes into pk[], private key in sk[]. + * Key sizes are exact (in bytes): + * public (pk): PQCLEAN_FALCON512_CLEAN_CRYPTO_PUBLICKEYBYTES + * private (sk): PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES + * + * Return value: 0 on success, -1 on error. + */ +int PQCLEAN_FALCON512_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/* + * Compute a signature on a provided message (m, mlen), with a given + * private key (sk). Signature is written in sig[], with length written + * into *siglen. Signature length is variable; maximum signature length + * (in bytes) is PQCLEAN_FALCON512_CLEAN_CRYPTO_BYTES. + * + * sig[], m[] and sk[] may overlap each other arbitrarily. + * + * Return value: 0 on success, -1 on error. + */ +int PQCLEAN_FALCON512_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/* + * Verify a signature (sig, siglen) on a message (m, mlen) with a given + * public key (pk). + * + * sig[], m[] and pk[] may overlap each other arbitrarily. + * + * Return value: 0 on success, -1 on error. + */ +int PQCLEAN_FALCON512_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/* + * Compute a signature on a message and pack the signature and message + * into a single object, written into sm[]. The length of that output is + * written in *smlen; that length may be larger than the message length + * (mlen) by up to PQCLEAN_FALCON512_CLEAN_CRYPTO_BYTES. + * + * sm[] and m[] may overlap each other arbitrarily; however, sm[] shall + * not overlap with sk[]. + * + * Return value: 0 on success, -1 on error. + */ +int PQCLEAN_FALCON512_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/* + * Open a signed message object (sm, smlen) and verify the signature; + * on success, the message itself is written into m[] and its length + * into *mlen. The message is shorter than the signed message object, + * but the size difference depends on the signature value; the difference + * may range up to PQCLEAN_FALCON512_CLEAN_CRYPTO_BYTES. + * + * m[], sm[] and pk[] may overlap each other arbitrarily. + * + * Return value: 0 on success, -1 on error. + */ +int PQCLEAN_FALCON512_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/falcon/falcon-512/clean/codec.c b/crypto_sign/falcon/falcon-512/clean/codec.c new file mode 100644 index 00000000..76709bc9 --- /dev/null +++ b/crypto_sign/falcon/falcon-512/clean/codec.c @@ -0,0 +1,555 @@ +#include "inner.h" + +/* + * Encoding/decoding of keys and signatures. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +/* see inner.h */ +size_t +PQCLEAN_FALCON512_CLEAN_modq_encode( + void *out, size_t max_out_len, + const uint16_t *x, unsigned logn) { + size_t n, out_len, u; + uint8_t *buf; + uint32_t acc; + int acc_len; + + n = (size_t)1 << logn; + for (u = 0; u < n; u ++) { + if (x[u] >= 12289) { + return 0; + } + } + out_len = ((n * 14) + 7) >> 3; + if (out == NULL) { + return out_len; + } + if (out_len > max_out_len) { + return 0; + } + buf = out; + acc = 0; + acc_len = 0; + for (u = 0; u < n; u ++) { + acc = (acc << 14) | x[u]; + acc_len += 14; + while (acc_len >= 8) { + acc_len -= 8; + *buf ++ = (uint8_t)(acc >> acc_len); + } + } + if (acc_len > 0) { + *buf = (uint8_t)(acc << (8 - acc_len)); + } + return out_len; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON512_CLEAN_modq_decode( + uint16_t *x, unsigned logn, + const void *in, size_t max_in_len) { + size_t n, in_len, u; + const uint8_t *buf; + uint32_t acc; + int acc_len; + + n = (size_t)1 << logn; + in_len = ((n * 14) + 7) >> 3; + if (in_len > max_in_len) { + return 0; + } + buf = in; + acc = 0; + acc_len = 0; + u = 0; + while (u < n) { + acc = (acc << 8) | (*buf ++); + acc_len += 8; + if (acc_len >= 14) { + unsigned w; + + acc_len -= 14; + w = (acc >> acc_len) & 0x3FFF; + if (w >= 12289) { + return 0; + } + x[u ++] = (uint16_t)w; + } + } + if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { + return 0; + } + return in_len; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON512_CLEAN_trim_i16_encode( + void *out, size_t max_out_len, + const int16_t *x, unsigned logn, unsigned bits) { + size_t n, u, out_len; + int minv, maxv; + uint8_t *buf; + uint32_t acc, mask; + unsigned acc_len; + + n = (size_t)1 << logn; + maxv = (1 << (bits - 1)) - 1; + minv = -maxv; + for (u = 0; u < n; u ++) { + if (x[u] < minv || x[u] > maxv) { + return 0; + } + } + out_len = ((n * bits) + 7) >> 3; + if (out == NULL) { + return out_len; + } + if (out_len > max_out_len) { + return 0; + } + buf = out; + acc = 0; + acc_len = 0; + mask = ((uint32_t)1 << bits) - 1; + for (u = 0; u < n; u ++) { + acc = (acc << bits) | ((uint16_t)x[u] & mask); + acc_len += bits; + while (acc_len >= 8) { + acc_len -= 8; + *buf ++ = (uint8_t)(acc >> acc_len); + } + } + if (acc_len > 0) { + *buf ++ = (uint8_t)(acc << (8 - acc_len)); + } + return out_len; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON512_CLEAN_trim_i16_decode( + int16_t *x, unsigned logn, unsigned bits, + const void *in, size_t max_in_len) { + size_t n, in_len; + const uint8_t *buf; + size_t u; + uint32_t acc, mask1, mask2; + unsigned acc_len; + + n = (size_t)1 << logn; + in_len = ((n * bits) + 7) >> 3; + if (in_len > max_in_len) { + return 0; + } + buf = in; + u = 0; + acc = 0; + acc_len = 0; + mask1 = ((uint32_t)1 << bits) - 1; + mask2 = (uint32_t)1 << (bits - 1); + while (u < n) { + acc = (acc << 8) | *buf ++; + acc_len += 8; + while (acc_len >= bits && u < n) { + uint32_t w; + + acc_len -= bits; + w = (acc >> acc_len) & mask1; + w |= -(w & mask2); + if (w == -mask2) { + /* + * The -2^(bits-1) value is forbidden. + */ + return 0; + } + w |= -(w & mask2); + x[u ++] = (int16_t) * (int32_t *)&w; + } + } + if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { + /* + * Extra bits in the last byte must be zero. + */ + return 0; + } + return in_len; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON512_CLEAN_trim_i8_encode( + void *out, size_t max_out_len, + const int8_t *x, unsigned logn, unsigned bits) { + size_t n, u, out_len; + int minv, maxv; + uint8_t *buf; + uint32_t acc, mask; + unsigned acc_len; + + n = (size_t)1 << logn; + maxv = (1 << (bits - 1)) - 1; + minv = -maxv; + for (u = 0; u < n; u ++) { + if (x[u] < minv || x[u] > maxv) { + return 0; + } + } + out_len = ((n * bits) + 7) >> 3; + if (out == NULL) { + return out_len; + } + if (out_len > max_out_len) { + return 0; + } + buf = out; + acc = 0; + acc_len = 0; + mask = ((uint32_t)1 << bits) - 1; + for (u = 0; u < n; u ++) { + acc = (acc << bits) | ((uint8_t)x[u] & mask); + acc_len += bits; + while (acc_len >= 8) { + acc_len -= 8; + *buf ++ = (uint8_t)(acc >> acc_len); + } + } + if (acc_len > 0) { + *buf ++ = (uint8_t)(acc << (8 - acc_len)); + } + return out_len; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON512_CLEAN_trim_i8_decode( + int8_t *x, unsigned logn, unsigned bits, + const void *in, size_t max_in_len) { + size_t n, in_len; + const uint8_t *buf; + size_t u; + uint32_t acc, mask1, mask2; + unsigned acc_len; + + n = (size_t)1 << logn; + in_len = ((n * bits) + 7) >> 3; + if (in_len > max_in_len) { + return 0; + } + buf = in; + u = 0; + acc = 0; + acc_len = 0; + mask1 = ((uint32_t)1 << bits) - 1; + mask2 = (uint32_t)1 << (bits - 1); + while (u < n) { + acc = (acc << 8) | *buf ++; + acc_len += 8; + while (acc_len >= bits && u < n) { + uint32_t w; + + acc_len -= bits; + w = (acc >> acc_len) & mask1; + w |= -(w & mask2); + if (w == -mask2) { + /* + * The -2^(bits-1) value is forbidden. + */ + return 0; + } + x[u ++] = (int8_t) * (int32_t *)&w; + } + } + if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { + /* + * Extra bits in the last byte must be zero. + */ + return 0; + } + return in_len; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON512_CLEAN_comp_encode( + void *out, size_t max_out_len, + const int16_t *x, unsigned logn) { + uint8_t *buf; + size_t n, u, v; + uint32_t acc; + unsigned acc_len; + + n = (size_t)1 << logn; + buf = out; + + /* + * Make sure that all values are within the -2047..+2047 range. + */ + for (u = 0; u < n; u ++) { + if (x[u] < -2047 || x[u] > +2047) { + return 0; + } + } + + acc = 0; + acc_len = 0; + v = 0; + for (u = 0; u < n; u ++) { + int t; + unsigned w; + + /* + * Get sign and absolute value of next integer; push the + * sign bit. + */ + acc <<= 1; + t = x[u]; + if (t < 0) { + t = -t; + acc |= 1; + } + w = (unsigned)t; + + /* + * Push the low 7 bits of the absolute value. + */ + acc <<= 7; + acc |= w & 127u; + w >>= 7; + + /* + * We pushed exactly 8 bits. + */ + acc_len += 8; + + /* + * Push as many zeros as necessary, then a one. Since the + * absolute value is at most 2047, w can only range up to + * 15 at this point, thus we will add at most 16 bits + * here. With the 8 bits above and possibly up to 7 bits + * from previous iterations, we may go up to 31 bits, which + * will fit in the accumulator, which is an uint32_t. + */ + acc <<= (w + 1); + acc |= 1; + acc_len += w + 1; + + /* + * Produce all full bytes. + */ + while (acc_len >= 8) { + acc_len -= 8; + if (buf != NULL) { + if (v >= max_out_len) { + return 0; + } + buf[v] = (uint8_t)(acc >> acc_len); + } + v ++; + } + } + + /* + * Flush remaining bits (if any). + */ + if (acc_len > 0) { + if (buf != NULL) { + if (v >= max_out_len) { + return 0; + } + buf[v] = (uint8_t)(acc << (8 - acc_len)); + } + v ++; + } + + return v; +} + +/* see inner.h */ +size_t +PQCLEAN_FALCON512_CLEAN_comp_decode( + int16_t *x, unsigned logn, + const void *in, size_t max_in_len) { + const uint8_t *buf; + size_t n, u, v; + uint32_t acc; + unsigned acc_len; + + n = (size_t)1 << logn; + buf = in; + acc = 0; + acc_len = 0; + v = 0; + for (u = 0; u < n; u ++) { + unsigned b, s, m; + + /* + * Get next eight bits: sign and low seven bits of the + * absolute value. + */ + if (v >= max_in_len) { + return 0; + } + acc = (acc << 8) | (uint32_t)buf[v ++]; + b = acc >> acc_len; + s = b & 128; + m = b & 127; + + /* + * Get next bits until a 1 is reached. + */ + for (;;) { + if (acc_len == 0) { + if (v >= max_in_len) { + return 0; + } + acc = (acc << 8) | (uint32_t)buf[v ++]; + acc_len = 8; + } + acc_len --; + if (((acc >> acc_len) & 1) != 0) { + break; + } + m += 128; + if (m > 2047) { + return 0; + } + } + x[u] = (int16_t) m; + if (s) { + x[u] = (int16_t) - x[u]; + } + } + return v; +} + +/* + * Key elements and signatures are polynomials with small integer + * coefficients. Here are some statistics gathered over many + * generated key pairs (10000 or more for each degree): + * + * log(n) n max(f,g) std(f,g) max(F,G) std(F,G) + * 1 2 129 56.31 143 60.02 + * 2 4 123 40.93 160 46.52 + * 3 8 97 28.97 159 38.01 + * 4 16 100 21.48 154 32.50 + * 5 32 71 15.41 151 29.36 + * 6 64 59 11.07 138 27.77 + * 7 128 39 7.91 144 27.00 + * 8 256 32 5.63 148 26.61 + * 9 512 22 4.00 137 26.46 + * 10 1024 15 2.84 146 26.41 + * + * We want a compact storage format for private key, and, as part of + * key generation, we are allowed to reject some keys which would + * otherwise be fine (this does not induce any noticeable vulnerability + * as long as we reject only a small proportion of possible keys). + * Hence, we enforce at key generation time maximum values for the + * elements of f, g, F and G, so that their encoding can be expressed + * in fixed-width values. Limits have been chosen so that generated + * keys are almost always within bounds, thus not impacting neither + * security or performance. + * + * IMPORTANT: the code assumes that all coefficients of f, g, F and G + * ultimately fit in the -127..+127 range. Thus, none of the elements + * of max_fg_bits[] and max_FG_bits[] shall be greater than 8. + */ + +const uint8_t PQCLEAN_FALCON512_CLEAN_max_fg_bits[] = { + 0, /* unused */ + 8, + 8, + 8, + 8, + 8, + 7, + 7, + 6, + 6, + 5 +}; + +const uint8_t PQCLEAN_FALCON512_CLEAN_max_FG_bits[] = { + 0, /* unused */ + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8 +}; + +/* + * When generating a new key pair, we can always reject keys which + * feature an abnormally large coefficient. This can also be done for + * signatures, albeit with some care: in case the signature process is + * used in a derandomized setup (explicitly seeded with the message and + * private key), we have to follow the specification faithfully, and the + * specification only enforces a limit on the L2 norm of the signature + * vector. The limit on the L2 norm implies that the absolute value of + * a coefficient of the signature cannot be more than the following: + * + * log(n) n max sig coeff (theoretical) + * 1 2 412 + * 2 4 583 + * 3 8 824 + * 4 16 1166 + * 5 32 1649 + * 6 64 2332 + * 7 128 3299 + * 8 256 4665 + * 9 512 6598 + * 10 1024 9331 + * + * However, the largest observed signature coefficients during our + * experiments was 1077 (in absolute value), hence we can assume that, + * with overwhelming probability, signature coefficients will fit + * in -2047..2047, i.e. 12 bits. + */ + +const uint8_t PQCLEAN_FALCON512_CLEAN_max_sig_bits[] = { + 0, /* unused */ + 10, + 11, + 11, + 12, + 12, + 12, + 12, + 12, + 12, + 12 +}; diff --git a/crypto_sign/falcon/falcon-512/clean/common.c b/crypto_sign/falcon/falcon-512/clean/common.c new file mode 100644 index 00000000..dea433f6 --- /dev/null +++ b/crypto_sign/falcon/falcon-512/clean/common.c @@ -0,0 +1,294 @@ +#include "inner.h" + +/* + * Support functions for signatures (hash-to-point, norm). + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_hash_to_point_vartime( + inner_shake256_context *sc, + uint16_t *x, unsigned logn) { + /* + * This is the straightforward per-the-spec implementation. It + * is not constant-time, thus it might reveal information on the + * plaintext (at least, enough to check the plaintext against a + * list of potential plaintexts) in a scenario where the + * attacker does not have access to the signature value or to + * the public key, but knows the nonce (without knowledge of the + * nonce, the hashed output cannot be matched against potential + * plaintexts). + */ + size_t n; + + n = (size_t)1 << logn; + while (n > 0) { + uint8_t buf[2]; + uint32_t w; + + inner_shake256_extract(sc, (void *)buf, sizeof buf); + w = ((unsigned)buf[0] << 8) | (unsigned)buf[1]; + if (w < 61445) { + while (w >= 12289) { + w -= 12289; + } + *x ++ = (uint16_t)w; + n --; + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_hash_to_point_ct( + inner_shake256_context *sc, + uint16_t *x, unsigned logn, uint8_t *tmp) { + /* + * Each 16-bit sample is a value in 0..65535. The value is + * kept if it falls in 0..61444 (because 61445 = 5*12289) + * and rejected otherwise; thus, each sample has probability + * about 0.93758 of being selected. + * + * We want to oversample enough to be sure that we will + * have enough values with probability at least 1 - 2^(-256). + * Depending on degree N, this leads to the following + * required oversampling: + * + * logn n oversampling + * 1 2 65 + * 2 4 67 + * 3 8 71 + * 4 16 77 + * 5 32 86 + * 6 64 100 + * 7 128 122 + * 8 256 154 + * 9 512 205 + * 10 1024 287 + * + * If logn >= 7, then the provided temporary buffer is large + * enough. Otherwise, we use a stack buffer of 63 entries + * (i.e. 126 bytes) for the values that do not fit in tmp[]. + */ + + static const uint16_t overtab[] = { + 0, /* unused */ + 65, + 67, + 71, + 77, + 86, + 100, + 122, + 154, + 205, + 287 + }; + + unsigned n, n2, u, m, p, over; + uint16_t *tt1, tt2[63]; + + /* + * We first generate m 16-bit value. Values 0..n-1 go to x[]. + * Values n..2*n-1 go to tt1[]. Values 2*n and later go to tt2[]. + * We also reduce modulo q the values; rejected values are set + * to 0xFFFF. + */ + n = 1U << logn; + n2 = n << 1; + over = overtab[logn]; + m = n + over; + tt1 = (uint16_t *)tmp; + for (u = 0; u < m; u ++) { + uint8_t buf[2]; + uint32_t w, wr; + + inner_shake256_extract(sc, buf, sizeof buf); + w = ((uint32_t)buf[0] << 8) | (uint32_t)buf[1]; + wr = w - ((uint32_t)24578 & (((w - 24578) >> 31) - 1)); + wr = wr - ((uint32_t)24578 & (((wr - 24578) >> 31) - 1)); + wr = wr - ((uint32_t)12289 & (((wr - 12289) >> 31) - 1)); + wr |= ((w - 61445) >> 31) - 1; + if (u < n) { + x[u] = (uint16_t)wr; + } else if (u < n2) { + tt1[u - n] = (uint16_t)wr; + } else { + tt2[u - n2] = (uint16_t)wr; + } + } + + /* + * Now we must "squeeze out" the invalid values. We do this in + * a logarithmic sequence of passes; each pass computes where a + * value should go, and moves it down by 'p' slots if necessary, + * where 'p' uses an increasing powers-of-two scale. It can be + * shown that in all cases where the loop decides that a value + * has to be moved down by p slots, the destination slot is + * "free" (i.e. contains an invalid value). + */ + for (p = 1; p <= over; p <<= 1) { + unsigned v; + + /* + * In the loop below: + * + * - v contains the index of the final destination of + * the value; it is recomputed dynamically based on + * whether values are valid or not. + * + * - u is the index of the value we consider ("source"); + * its address is s. + * + * - The loop may swap the value with the one at index + * u-p. The address of the swap destination is d. + */ + v = 0; + for (u = 0; u < m; u ++) { + uint16_t *s, *d; + unsigned j, sv, dv, mk; + + if (u < n) { + s = &x[u]; + } else if (u < n2) { + s = &tt1[u - n]; + } else { + s = &tt2[u - n2]; + } + sv = *s; + + /* + * The value in sv should ultimately go to + * address v, i.e. jump back by u-v slots. + */ + j = u - v; + + /* + * We increment v for the next iteration, but + * only if the source value is valid. The mask + * 'mk' is -1 if the value is valid, 0 otherwise, + * so we _subtract_ mk. + */ + mk = (sv >> 15) - 1U; + v -= mk; + + /* + * In this loop we consider jumps by p slots; if + * u < p then there is nothing more to do. + */ + if (u < p) { + continue; + } + + /* + * Destination for the swap: value at address u-p. + */ + if ((u - p) < n) { + d = &x[u - p]; + } else if ((u - p) < n2) { + d = &tt1[(u - p) - n]; + } else { + d = &tt2[(u - p) - n2]; + } + dv = *d; + + /* + * The swap should be performed only if the source + * is valid AND the jump j has its 'p' bit set. + */ + mk &= -(((j & p) + 0x1FF) >> 9); + + *s = (uint16_t)(sv ^ (mk & (sv ^ dv))); + *d = (uint16_t)(dv ^ (mk & (sv ^ dv))); + } + } +} + +/* see inner.h */ +int +PQCLEAN_FALCON512_CLEAN_is_short( + const int16_t *s1, const int16_t *s2, unsigned logn) { + /* + * We use the l2-norm. Code below uses only 32-bit operations to + * compute the square of the norm with saturation to 2^32-1 if + * the value exceeds 2^31-1. + */ + size_t n, u; + uint32_t s, ng; + + n = (size_t)1 << logn; + s = 0; + ng = 0; + for (u = 0; u < n; u ++) { + int32_t z; + + z = s1[u]; + s += (uint32_t)(z * z); + ng |= s; + z = s2[u]; + s += (uint32_t)(z * z); + ng |= s; + } + s |= -(ng >> 31); + + /* + * Acceptance bound on the l2-norm is: + * 1.2*1.55*sqrt(q)*sqrt(2*N) + * Value 7085 is floor((1.2^2)*(1.55^2)*2*1024). + */ + return s < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn)); +} + +/* see inner.h */ +int +PQCLEAN_FALCON512_CLEAN_is_short_half( + uint32_t sqn, const int16_t *s2, unsigned logn) { + size_t n, u; + uint32_t ng; + + n = (size_t)1 << logn; + ng = -(sqn >> 31); + for (u = 0; u < n; u ++) { + int32_t z; + + z = s2[u]; + sqn += (uint32_t)(z * z); + ng |= sqn; + } + sqn |= -(ng >> 31); + + /* + * Acceptance bound on the l2-norm is: + * 1.2*1.55*sqrt(q)*sqrt(2*N) + * Value 7085 is floor((1.2^2)*(1.55^2)*2*1024). + */ + return sqn < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn)); +} diff --git a/crypto_sign/falcon/falcon-512/clean/fft.c b/crypto_sign/falcon/falcon-512/clean/fft.c new file mode 100644 index 00000000..a7d9bdad --- /dev/null +++ b/crypto_sign/falcon/falcon-512/clean/fft.c @@ -0,0 +1,700 @@ +#include "inner.h" + +/* + * FFT code. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +/* + * Rules for complex number macros: + * -------------------------------- + * + * Operand order is: destination, source1, source2... + * + * Each operand is a real and an imaginary part. + * + * All overlaps are allowed. + */ + +/* + * Addition of two complex numbers (d = a + b). + */ +#define FPC_ADD(d_re, d_im, a_re, a_im, b_re, b_im) do { \ + fpr fpct_re, fpct_im; \ + fpct_re = fpr_add(a_re, b_re); \ + fpct_im = fpr_add(a_im, b_im); \ + (d_re) = fpct_re; \ + (d_im) = fpct_im; \ + } while (0) + +/* + * Subtraction of two complex numbers (d = a - b). + */ +#define FPC_SUB(d_re, d_im, a_re, a_im, b_re, b_im) do { \ + fpr fpct_re, fpct_im; \ + fpct_re = fpr_sub(a_re, b_re); \ + fpct_im = fpr_sub(a_im, b_im); \ + (d_re) = fpct_re; \ + (d_im) = fpct_im; \ + } while (0) + +/* + * Multplication of two complex numbers (d = a * b). + */ +#define FPC_MUL(d_re, d_im, a_re, a_im, b_re, b_im) do { \ + fpr fpct_a_re, fpct_a_im; \ + fpr fpct_b_re, fpct_b_im; \ + fpr fpct_d_re, fpct_d_im; \ + fpct_a_re = (a_re); \ + fpct_a_im = (a_im); \ + fpct_b_re = (b_re); \ + fpct_b_im = (b_im); \ + fpct_d_re = fpr_sub( \ + fpr_mul(fpct_a_re, fpct_b_re), \ + fpr_mul(fpct_a_im, fpct_b_im)); \ + fpct_d_im = fpr_add( \ + fpr_mul(fpct_a_re, fpct_b_im), \ + fpr_mul(fpct_a_im, fpct_b_re)); \ + (d_re) = fpct_d_re; \ + (d_im) = fpct_d_im; \ + } while (0) + +/* + * Squaring of a complex number (d = a * a). + */ +#define FPC_SQR(d_re, d_im, a_re, a_im) do { \ + fpr fpct_a_re, fpct_a_im; \ + fpr fpct_d_re, fpct_d_im; \ + fpct_a_re = (a_re); \ + fpct_a_im = (a_im); \ + fpct_d_re = fpr_sub(fpr_sqr(fpct_a_re), fpr_sqr(fpct_a_im)); \ + fpct_d_im = fpr_double(fpr_mul(fpct_a_re, fpct_a_im)); \ + (d_re) = fpct_d_re; \ + (d_im) = fpct_d_im; \ + } while (0) + +/* + * Inversion of a complex number (d = 1 / a). + */ +#define FPC_INV(d_re, d_im, a_re, a_im) do { \ + fpr fpct_a_re, fpct_a_im; \ + fpr fpct_d_re, fpct_d_im; \ + fpr fpct_m; \ + fpct_a_re = (a_re); \ + fpct_a_im = (a_im); \ + fpct_m = fpr_add(fpr_sqr(fpct_a_re), fpr_sqr(fpct_a_im)); \ + fpct_m = fpr_inv(fpct_m); \ + fpct_d_re = fpr_mul(fpct_a_re, fpct_m); \ + fpct_d_im = fpr_mul(fpr_neg(fpct_a_im), fpct_m); \ + (d_re) = fpct_d_re; \ + (d_im) = fpct_d_im; \ + } while (0) + +/* + * Division of complex numbers (d = a / b). + */ +#define FPC_DIV(d_re, d_im, a_re, a_im, b_re, b_im) do { \ + fpr fpct_a_re, fpct_a_im; \ + fpr fpct_b_re, fpct_b_im; \ + fpr fpct_d_re, fpct_d_im; \ + fpr fpct_m; \ + fpct_a_re = (a_re); \ + fpct_a_im = (a_im); \ + fpct_b_re = (b_re); \ + fpct_b_im = (b_im); \ + fpct_m = fpr_add(fpr_sqr(fpct_b_re), fpr_sqr(fpct_b_im)); \ + fpct_m = fpr_inv(fpct_m); \ + fpct_b_re = fpr_mul(fpct_b_re, fpct_m); \ + fpct_b_im = fpr_mul(fpr_neg(fpct_b_im), fpct_m); \ + fpct_d_re = fpr_sub( \ + fpr_mul(fpct_a_re, fpct_b_re), \ + fpr_mul(fpct_a_im, fpct_b_im)); \ + fpct_d_im = fpr_add( \ + fpr_mul(fpct_a_re, fpct_b_im), \ + fpr_mul(fpct_a_im, fpct_b_re)); \ + (d_re) = fpct_d_re; \ + (d_im) = fpct_d_im; \ + } while (0) + +/* + * Let w = exp(i*pi/N); w is a primitive 2N-th root of 1. We define the + * values w_j = w^(2j+1) for all j from 0 to N-1: these are the roots + * of X^N+1 in the field of complex numbers. A crucial property is that + * w_{N-1-j} = conj(w_j) = 1/w_j for all j. + * + * FFT representation of a polynomial f (taken modulo X^N+1) is the + * set of values f(w_j). Since f is real, conj(f(w_j)) = f(conj(w_j)), + * thus f(w_{N-1-j}) = conj(f(w_j)). We thus store only half the values, + * for j = 0 to N/2-1; the other half can be recomputed easily when (if) + * needed. A consequence is that FFT representation has the same size + * as normal representation: N/2 complex numbers use N real numbers (each + * complex number is the combination of a real and an imaginary part). + * + * We use a specific ordering which makes computations easier. Let rev() + * be the bit-reversal function over log(N) bits. For j in 0..N/2-1, we + * store the real and imaginary parts of f(w_j) in slots: + * + * Re(f(w_j)) -> slot rev(j)/2 + * Im(f(w_j)) -> slot rev(j)/2+N/2 + * + * (Note that rev(j) is even for j < N/2.) + */ + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_FFT(fpr *f, unsigned logn) { + /* + * FFT algorithm in bit-reversal order uses the following + * iterative algorithm: + * + * t = N + * for m = 1; m < N; m *= 2: + * ht = t/2 + * for i1 = 0; i1 < m; i1 ++: + * j1 = i1 * t + * s = GM[m + i1] + * for j = j1; j < (j1 + ht); j ++: + * x = f[j] + * y = s * f[j + ht] + * f[j] = x + y + * f[j + ht] = x - y + * t = ht + * + * GM[k] contains w^rev(k) for primitive root w = exp(i*pi/N). + * + * In the description above, f[] is supposed to contain complex + * numbers. In our in-memory representation, the real and + * imaginary parts of f[k] are in array slots k and k+N/2. + * + * We only keep the first half of the complex numbers. We can + * see that after the first iteration, the first and second halves + * of the array of complex numbers have separate lives, so we + * simply ignore the second part. + */ + + unsigned u; + size_t t, n, hn, m; + + /* + * First iteration: compute f[j] + i * f[j+N/2] for all j < N/2 + * (because GM[1] = w^rev(1) = w^(N/2) = i). + * In our chosen representation, this is a no-op: everything is + * already where it should be. + */ + + /* + * Subsequent iterations are truncated to use only the first + * half of values. + */ + n = (size_t)1 << logn; + hn = n >> 1; + t = hn; + for (u = 1, m = 2; u < logn; u ++, m <<= 1) { + size_t ht, hm, i1, j1; + + ht = t >> 1; + hm = m >> 1; + for (i1 = 0, j1 = 0; i1 < hm; i1 ++, j1 += t) { + size_t j, j2; + + j2 = j1 + ht; + fpr s_re, s_im; + + s_re = fpr_gm_tab[((m + i1) << 1) + 0]; + s_im = fpr_gm_tab[((m + i1) << 1) + 1]; + for (j = j1; j < j2; j ++) { + fpr x_re, x_im, y_re, y_im; + + x_re = f[j]; + x_im = f[j + hn]; + y_re = f[j + ht]; + y_im = f[j + ht + hn]; + FPC_MUL(y_re, y_im, y_re, y_im, s_re, s_im); + FPC_ADD(f[j], f[j + hn], + x_re, x_im, y_re, y_im); + FPC_SUB(f[j + ht], f[j + ht + hn], + x_re, x_im, y_re, y_im); + } + } + t = ht; + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_iFFT(fpr *f, unsigned logn) { + /* + * Inverse FFT algorithm in bit-reversal order uses the following + * iterative algorithm: + * + * t = 1 + * for m = N; m > 1; m /= 2: + * hm = m/2 + * dt = t*2 + * for i1 = 0; i1 < hm; i1 ++: + * j1 = i1 * dt + * s = iGM[hm + i1] + * for j = j1; j < (j1 + t); j ++: + * x = f[j] + * y = f[j + t] + * f[j] = x + y + * f[j + t] = s * (x - y) + * t = dt + * for i1 = 0; i1 < N; i1 ++: + * f[i1] = f[i1] / N + * + * iGM[k] contains (1/w)^rev(k) for primitive root w = exp(i*pi/N) + * (actually, iGM[k] = 1/GM[k] = conj(GM[k])). + * + * In the main loop (not counting the final division loop), in + * all iterations except the last, the first and second half of f[] + * (as an array of complex numbers) are separate. In our chosen + * representation, we do not keep the second half. + * + * The last iteration recombines the recomputed half with the + * implicit half, and should yield only real numbers since the + * target polynomial is real; moreover, s = i at that step. + * Thus, when considering x and y: + * y = conj(x) since the final f[j] must be real + * Therefore, f[j] is filled with 2*Re(x), and f[j + t] is + * filled with 2*Im(x). + * But we already have Re(x) and Im(x) in array slots j and j+t + * in our chosen representation. That last iteration is thus a + * simple doubling of the values in all the array. + * + * We make the last iteration a no-op by tweaking the final + * division into a division by N/2, not N. + */ + size_t u, n, hn, t, m; + + n = (size_t)1 << logn; + t = 1; + m = n; + hn = n >> 1; + for (u = logn; u > 1; u --) { + size_t hm, dt, i1, j1; + + hm = m >> 1; + dt = t << 1; + for (i1 = 0, j1 = 0; j1 < hn; i1 ++, j1 += dt) { + size_t j, j2; + + j2 = j1 + t; + fpr s_re, s_im; + + s_re = fpr_gm_tab[((hm + i1) << 1) + 0]; + s_im = fpr_neg(fpr_gm_tab[((hm + i1) << 1) + 1]); + for (j = j1; j < j2; j ++) { + fpr x_re, x_im, y_re, y_im; + + x_re = f[j]; + x_im = f[j + hn]; + y_re = f[j + t]; + y_im = f[j + t + hn]; + FPC_ADD(f[j], f[j + hn], + x_re, x_im, y_re, y_im); + FPC_SUB(x_re, x_im, x_re, x_im, y_re, y_im); + FPC_MUL(f[j + t], f[j + t + hn], + x_re, x_im, s_re, s_im); + } + } + t = dt; + m = hm; + } + + /* + * Last iteration is a no-op, provided that we divide by N/2 + * instead of N. We need to make a special case for logn = 0. + */ + if (logn > 0) { + fpr ni; + + ni = fpr_p2_tab[logn]; + for (u = 0; u < n; u ++) { + f[u] = fpr_mul(f[u], ni); + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_poly_add( + fpr *a, const fpr *b, unsigned logn) { + size_t n, u; + + n = (size_t)1 << logn; + for (u = 0; u < n; u ++) { + a[u] = fpr_add(a[u], b[u]); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_poly_sub( + fpr *a, const fpr *b, unsigned logn) { + size_t n, u; + + n = (size_t)1 << logn; + for (u = 0; u < n; u ++) { + a[u] = fpr_sub(a[u], b[u]); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_poly_neg(fpr *a, unsigned logn) { + size_t n, u; + + n = (size_t)1 << logn; + for (u = 0; u < n; u ++) { + a[u] = fpr_neg(a[u]); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_poly_adj_fft(fpr *a, unsigned logn) { + size_t n, u; + + n = (size_t)1 << logn; + for (u = (n >> 1); u < n; u ++) { + a[u] = fpr_neg(a[u]); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_poly_mul_fft( + fpr *a, const fpr *b, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + for (u = 0; u < hn; u ++) { + fpr a_re, a_im, b_re, b_im; + + a_re = a[u]; + a_im = a[u + hn]; + b_re = b[u]; + b_im = b[u + hn]; + FPC_MUL(a[u], a[u + hn], a_re, a_im, b_re, b_im); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_poly_muladj_fft( + fpr *a, const fpr *b, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + for (u = 0; u < hn; u ++) { + fpr a_re, a_im, b_re, b_im; + + a_re = a[u]; + a_im = a[u + hn]; + b_re = b[u]; + b_im = fpr_neg(b[u + hn]); + FPC_MUL(a[u], a[u + hn], a_re, a_im, b_re, b_im); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_poly_mulselfadj_fft(fpr *a, unsigned logn) { + /* + * Since each coefficient is multiplied with its own conjugate, + * the result contains only real values. + */ + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + for (u = 0; u < hn; u ++) { + fpr a_re, a_im; + + a_re = a[u]; + a_im = a[u + hn]; + a[u] = fpr_add(fpr_sqr(a_re), fpr_sqr(a_im)); + a[u + hn] = fpr_zero; + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_poly_mulconst(fpr *a, fpr x, unsigned logn) { + size_t n, u; + + n = (size_t)1 << logn; + for (u = 0; u < n; u ++) { + a[u] = fpr_mul(a[u], x); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_poly_div_fft( + fpr *a, const fpr *b, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + for (u = 0; u < hn; u ++) { + fpr a_re, a_im, b_re, b_im; + + a_re = a[u]; + a_im = a[u + hn]; + b_re = b[u]; + b_im = b[u + hn]; + FPC_DIV(a[u], a[u + hn], a_re, a_im, b_re, b_im); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_poly_invnorm2_fft(fpr *d, + const fpr *a, const fpr *b, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + for (u = 0; u < hn; u ++) { + fpr a_re, a_im; + fpr b_re, b_im; + + a_re = a[u]; + a_im = a[u + hn]; + b_re = b[u]; + b_im = b[u + hn]; + d[u] = fpr_inv(fpr_add( + fpr_add(fpr_sqr(a_re), fpr_sqr(a_im)), + fpr_add(fpr_sqr(b_re), fpr_sqr(b_im)))); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_poly_add_muladj_fft(fpr *d, + const fpr *F, const fpr *G, + const fpr *f, const fpr *g, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + for (u = 0; u < hn; u ++) { + fpr F_re, F_im, G_re, G_im; + fpr f_re, f_im, g_re, g_im; + fpr a_re, a_im, b_re, b_im; + + F_re = F[u]; + F_im = F[u + hn]; + G_re = G[u]; + G_im = G[u + hn]; + f_re = f[u]; + f_im = f[u + hn]; + g_re = g[u]; + g_im = g[u + hn]; + + FPC_MUL(a_re, a_im, F_re, F_im, f_re, fpr_neg(f_im)); + FPC_MUL(b_re, b_im, G_re, G_im, g_re, fpr_neg(g_im)); + d[u] = fpr_add(a_re, b_re); + d[u + hn] = fpr_add(a_im, b_im); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_poly_mul_autoadj_fft( + fpr *a, const fpr *b, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + for (u = 0; u < hn; u ++) { + a[u] = fpr_mul(a[u], b[u]); + a[u + hn] = fpr_mul(a[u + hn], b[u]); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_poly_div_autoadj_fft( + fpr *a, const fpr *b, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + for (u = 0; u < hn; u ++) { + fpr ib; + + ib = fpr_inv(b[u]); + a[u] = fpr_mul(a[u], ib); + a[u + hn] = fpr_mul(a[u + hn], ib); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_poly_LDL_fft( + const fpr *g00, + fpr *g01, fpr *g11, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + for (u = 0; u < hn; u ++) { + fpr g00_re, g00_im, g01_re, g01_im, g11_re, g11_im; + fpr mu_re, mu_im; + + g00_re = g00[u]; + g00_im = g00[u + hn]; + g01_re = g01[u]; + g01_im = g01[u + hn]; + g11_re = g11[u]; + g11_im = g11[u + hn]; + FPC_DIV(mu_re, mu_im, g01_re, g01_im, g00_re, g00_im); + FPC_MUL(g01_re, g01_im, mu_re, mu_im, g01_re, fpr_neg(g01_im)); + FPC_SUB(g11[u], g11[u + hn], g11_re, g11_im, g01_re, g01_im); + g01[u] = mu_re; + g01[u + hn] = fpr_neg(mu_im); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_poly_LDLmv_fft( + fpr *d11, fpr *l10, + const fpr *g00, const fpr *g01, + const fpr *g11, unsigned logn) { + size_t n, hn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + for (u = 0; u < hn; u ++) { + fpr g00_re, g00_im, g01_re, g01_im, g11_re, g11_im; + fpr mu_re, mu_im; + + g00_re = g00[u]; + g00_im = g00[u + hn]; + g01_re = g01[u]; + g01_im = g01[u + hn]; + g11_re = g11[u]; + g11_im = g11[u + hn]; + FPC_DIV(mu_re, mu_im, g01_re, g01_im, g00_re, g00_im); + FPC_MUL(g01_re, g01_im, mu_re, mu_im, g01_re, fpr_neg(g01_im)); + FPC_SUB(d11[u], d11[u + hn], g11_re, g11_im, g01_re, g01_im); + l10[u] = mu_re; + l10[u + hn] = fpr_neg(mu_im); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_poly_split_fft( + fpr *f0, fpr *f1, + const fpr *f, unsigned logn) { + /* + * The FFT representation we use is in bit-reversed order + * (element i contains f(w^(rev(i))), where rev() is the + * bit-reversal function over the ring degree. This changes + * indexes with regards to the Falcon specification. + */ + size_t n, hn, qn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + qn = hn >> 1; + + /* + * We process complex values by pairs. For logn = 1, there is only + * one complex value (the other one is the implicit conjugate), + * so we add the two lines below because the loop will be + * skipped. + */ + f0[0] = f[0]; + f1[0] = f[hn]; + + for (u = 0; u < qn; u ++) { + fpr a_re, a_im, b_re, b_im; + fpr t_re, t_im; + + a_re = f[(u << 1) + 0]; + a_im = f[(u << 1) + 0 + hn]; + b_re = f[(u << 1) + 1]; + b_im = f[(u << 1) + 1 + hn]; + + FPC_ADD(t_re, t_im, a_re, a_im, b_re, b_im); + f0[u] = fpr_half(t_re); + f0[u + qn] = fpr_half(t_im); + + FPC_SUB(t_re, t_im, a_re, a_im, b_re, b_im); + FPC_MUL(t_re, t_im, t_re, t_im, + fpr_gm_tab[((u + hn) << 1) + 0], + fpr_neg(fpr_gm_tab[((u + hn) << 1) + 1])); + f1[u] = fpr_half(t_re); + f1[u + qn] = fpr_half(t_im); + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_poly_merge_fft( + fpr *f, + const fpr *f0, const fpr *f1, unsigned logn) { + size_t n, hn, qn, u; + + n = (size_t)1 << logn; + hn = n >> 1; + qn = hn >> 1; + + /* + * An extra copy to handle the special case logn = 1. + */ + f[0] = f0[0]; + f[hn] = f1[0]; + + for (u = 0; u < qn; u ++) { + fpr a_re, a_im, b_re, b_im; + fpr t_re, t_im; + + a_re = f0[u]; + a_im = f0[u + qn]; + FPC_MUL(b_re, b_im, f1[u], f1[u + qn], + fpr_gm_tab[((u + hn) << 1) + 0], + fpr_gm_tab[((u + hn) << 1) + 1]); + FPC_ADD(t_re, t_im, a_re, a_im, b_re, b_im); + f[(u << 1) + 0] = t_re; + f[(u << 1) + 0 + hn] = t_im; + FPC_SUB(t_re, t_im, a_re, a_im, b_re, b_im); + f[(u << 1) + 1] = t_re; + f[(u << 1) + 1 + hn] = t_im; + } +} diff --git a/crypto_sign/falcon/falcon-512/clean/fpr.c b/crypto_sign/falcon/falcon-512/clean/fpr.c new file mode 100644 index 00000000..091462a7 --- /dev/null +++ b/crypto_sign/falcon/falcon-512/clean/fpr.c @@ -0,0 +1,1634 @@ +#include "inner.h" + +/* + * Floating-point operations. + * + * This file implements the non-inline functions declared in + * fpr.h, as well as the constants for FFT / iFFT. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + + +/* + * Normalize a provided unsigned integer to the 2^63..2^64-1 range by + * left-shifting it if necessary. The exponent e is adjusted accordingly + * (i.e. if the value was left-shifted by n bits, then n is subtracted + * from e). If source m is 0, then it remains 0, but e is altered. + * Both m and e must be simple variables (no expressions allowed). + */ +#define FPR_NORM64(m, e) do { \ + uint32_t nt; \ + \ + (e) -= 63; \ + \ + nt = (uint32_t)((m) >> 32); \ + nt = (nt | -nt) >> 31; \ + (m) ^= ((m) ^ ((m) << 32)) & ((uint64_t)nt - 1); \ + (e) += (int)(nt << 5); \ + \ + nt = (uint32_t)((m) >> 48); \ + nt = (nt | -nt) >> 31; \ + (m) ^= ((m) ^ ((m) << 16)) & ((uint64_t)nt - 1); \ + (e) += (int)(nt << 4); \ + \ + nt = (uint32_t)((m) >> 56); \ + nt = (nt | -nt) >> 31; \ + (m) ^= ((m) ^ ((m) << 8)) & ((uint64_t)nt - 1); \ + (e) += (int)(nt << 3); \ + \ + nt = (uint32_t)((m) >> 60); \ + nt = (nt | -nt) >> 31; \ + (m) ^= ((m) ^ ((m) << 4)) & ((uint64_t)nt - 1); \ + (e) += (int)(nt << 2); \ + \ + nt = (uint32_t)((m) >> 62); \ + nt = (nt | -nt) >> 31; \ + (m) ^= ((m) ^ ((m) << 2)) & ((uint64_t)nt - 1); \ + (e) += (int)(nt << 1); \ + \ + nt = (uint32_t)((m) >> 63); \ + (m) ^= ((m) ^ ((m) << 1)) & ((uint64_t)nt - 1); \ + (e) += (int)(nt); \ + } while (0) + + +fpr +fpr_scaled(int64_t i, int sc) { + /* + * To convert from int to float, we have to do the following: + * 1. Get the absolute value of the input, and its sign + * 2. Shift right or left the value as appropriate + * 3. Pack the result + * + * We can assume that the source integer is not -2^63. + */ + int s, e; + uint32_t t; + uint64_t m; + + /* + * Extract sign bit. + * We have: -i = 1 + ~i + */ + s = (int)((uint64_t)i >> 63); + i ^= -(int64_t)s; + i += s; + + /* + * For now we suppose that i != 0. + * Otherwise, we set m to i and left-shift it as much as needed + * to get a 1 in the top bit. We can do that in a logarithmic + * number of conditional shifts. + */ + m = (uint64_t)i; + e = 9 + sc; + FPR_NORM64(m, e); + + /* + * Now m is in the 2^63..2^64-1 range. We must divide it by 512; + * if one of the dropped bits is a 1, this should go into the + * "sticky bit". + */ + m |= ((uint32_t)m & 0x1FF) + 0x1FF; + m >>= 9; + + /* + * Corrective action: if i = 0 then all of the above was + * incorrect, and we clamp e and m down to zero. + */ + t = (uint32_t)((uint64_t)(i | -i) >> 63); + m &= -(uint64_t)t; + e &= -(int)t; + + /* + * Assemble back everything. The FPR() function will handle cases + * where e is too low. + */ + return FPR(s, e, m); +} + + + +fpr +fpr_add(fpr x, fpr y) { + uint64_t m, xu, yu, za; + uint32_t cs; + int ex, ey, sx, sy, cc; + + /* + * Make sure that the first operand (x) has the larger absolute + * value. This guarantees that the exponent of y is less than + * or equal to the exponent of x, and, if they are equal, then + * the mantissa of y will not be greater than the mantissa of x. + * + * After this swap, the result will have the sign x, except in + * the following edge case: abs(x) = abs(y), and x and y have + * opposite sign bits; in that case, the result shall be +0 + * even if the sign bit of x is 1. To handle this case properly, + * we do the swap is abs(x) = abs(y) AND the sign of x is 1. + */ + m = ((uint64_t)1 << 63) - 1; + za = (x & m) - (y & m); + cs = (uint32_t)(za >> 63) + | ((1U - (uint32_t)(-za >> 63)) & (uint32_t)(x >> 63)); + m = (x ^ y) & -(uint64_t)cs; + x ^= m; + y ^= m; + + /* + * Extract sign bits, exponents and mantissas. The mantissas are + * scaled up to 2^55..2^56-1, and the exponent is unbiased. If + * an operand is zero, its mantissa is set to 0 at this step, and + * its exponent will be -1078. + */ + ex = (int)(x >> 52); + sx = ex >> 11; + ex &= 0x7FF; + m = (uint64_t)(uint32_t)((ex + 0x7FF) >> 11) << 52; + xu = ((x & (((uint64_t)1 << 52) - 1)) | m) << 3; + ex -= 1078; + ey = (int)(y >> 52); + sy = ey >> 11; + ey &= 0x7FF; + m = (uint64_t)(uint32_t)((ey + 0x7FF) >> 11) << 52; + yu = ((y & (((uint64_t)1 << 52) - 1)) | m) << 3; + ey -= 1078; + + /* + * x has the larger exponent; hence, we only need to right-shift y. + * If the shift count is larger than 59 bits then we clamp the + * value to zero. + */ + cc = ex - ey; + yu &= -(uint64_t)((uint32_t)(cc - 60) >> 31); + cc &= 63; + + /* + * The lowest bit of yu is "sticky". + */ + m = fpr_ulsh(1, cc) - 1; + yu |= (yu & m) + m; + yu = fpr_ursh(yu, cc); + + /* + * If the operands have the same sign, then we add the mantissas; + * otherwise, we subtract the mantissas. + */ + xu += yu - ((yu << 1) & -(uint64_t)(sx ^ sy)); + + /* + * The result may be smaller, or slightly larger. We normalize + * it to the 2^63..2^64-1 range (if xu is zero, then it stays + * at zero). + */ + FPR_NORM64(xu, ex); + + /* + * Scale down the value to 2^54..s^55-1, handling the last bit + * as sticky. + */ + xu |= ((uint32_t)xu & 0x1FF) + 0x1FF; + xu >>= 9; + ex += 9; + + /* + * In general, the result has the sign of x. However, if the + * result is exactly zero, then the following situations may + * be encountered: + * x > 0, y = -x -> result should be +0 + * x < 0, y = -x -> result should be +0 + * x = +0, y = +0 -> result should be +0 + * x = -0, y = +0 -> result should be +0 + * x = +0, y = -0 -> result should be +0 + * x = -0, y = -0 -> result should be -0 + * + * But at the conditional swap step at the start of the + * function, we ensured that if abs(x) = abs(y) and the + * sign of x was 1, then x and y were swapped. Thus, the + * two following cases cannot actually happen: + * x < 0, y = -x + * x = -0, y = +0 + * In all other cases, the sign bit of x is conserved, which + * is what the FPR() function does. The FPR() function also + * properly clamps values to zero when the exponent is too + * low, but does not alter the sign in that case. + */ + return FPR(sx, ex, xu); +} + + + +fpr +fpr_mul(fpr x, fpr y) { + uint64_t xu, yu, w, zu, zv; + uint32_t x0, x1, y0, y1, z0, z1, z2; + int ex, ey, d, e, s; + + /* + * Extract absolute values as scaled unsigned integers. We + * don't extract exponents yet. + */ + xu = (x & (((uint64_t)1 << 52) - 1)) | ((uint64_t)1 << 52); + yu = (y & (((uint64_t)1 << 52) - 1)) | ((uint64_t)1 << 52); + + /* + * We have two 53-bit integers to multiply; we need to split + * each into a lower half and a upper half. Moreover, we + * prefer to have lower halves to be of 25 bits each, for + * reasons explained later on. + */ + x0 = (uint32_t)xu & 0x01FFFFFF; + x1 = (uint32_t)(xu >> 25); + y0 = (uint32_t)yu & 0x01FFFFFF; + y1 = (uint32_t)(yu >> 25); + w = (uint64_t)x0 * (uint64_t)y0; + z0 = (uint32_t)w & 0x01FFFFFF; + z1 = (uint32_t)(w >> 25); + w = (uint64_t)x0 * (uint64_t)y1; + z1 += (uint32_t)w & 0x01FFFFFF; + z2 = (uint32_t)(w >> 25); + w = (uint64_t)x1 * (uint64_t)y0; + z1 += (uint32_t)w & 0x01FFFFFF; + z2 += (uint32_t)(w >> 25); + zu = (uint64_t)x1 * (uint64_t)y1; + z2 += (z1 >> 25); + z1 &= 0x01FFFFFF; + zu += z2; + + /* + * Since xu and yu are both in the 2^52..2^53-1 range, the + * product is in the 2^104..2^106-1 range. We first reassemble + * it and round it into the 2^54..2^56-1 range; the bottom bit + * is made "sticky". Since the low limbs z0 and z1 are 25 bits + * each, we just take the upper part (zu), and consider z0 and + * z1 only for purposes of stickiness. + * (This is the reason why we chose 25-bit limbs above.) + */ + zu |= ((z0 | z1) + 0x01FFFFFF) >> 25; + + /* + * We normalize zu to the 2^54..s^55-1 range: it could be one + * bit too large at this point. This is done with a conditional + * right-shift that takes into account the sticky bit. + */ + zv = (zu >> 1) | (zu & 1); + w = zu >> 55; + zu ^= (zu ^ zv) & -w; + + /* + * Get the aggregate scaling factor: + * + * - Each exponent is biased by 1023. + * + * - Integral mantissas are scaled by 2^52, hence an + * extra 52 bias for each exponent. + * + * - However, we right-shifted z by 50 bits, and then + * by 0 or 1 extra bit (depending on the value of w). + * + * In total, we must add the exponents, then subtract + * 2 * (1023 + 52), then add 50 + w. + */ + ex = (int)((x >> 52) & 0x7FF); + ey = (int)((y >> 52) & 0x7FF); + e = ex + ey - 2100 + (int)w; + + /* + * Sign bit is the XOR of the operand sign bits. + */ + s = (int)((x ^ y) >> 63); + + /* + * Corrective actions for zeros: if either of the operands is + * zero, then the computations above were wrong. Test for zero + * is whether ex or ey is zero. We just have to set the mantissa + * (zu) to zero, the FPR() function will normalize e. + */ + d = ((ex + 0x7FF) & (ey + 0x7FF)) >> 11; + zu &= -(uint64_t)d; + + /* + * FPR() packs the result and applies proper rounding. + */ + return FPR(s, e, zu); +} + + + +fpr +fpr_div(fpr x, fpr y) { + uint64_t xu, yu, q, q2, w; + int i, ex, ey, e, d, s; + + /* + * Extract mantissas of x and y (unsigned). + */ + xu = (x & (((uint64_t)1 << 52) - 1)) | ((uint64_t)1 << 52); + yu = (y & (((uint64_t)1 << 52) - 1)) | ((uint64_t)1 << 52); + + /* + * Perform bit-by-bit division of xu by yu. We run it for 55 bits. + */ + q = 0; + for (i = 0; i < 55; i ++) { + /* + * If yu is less than or equal xu, then subtract it and + * push a 1 in the quotient; otherwise, leave xu unchanged + * and push a 0. + */ + uint64_t b; + + b = ((xu - yu) >> 63) - 1; + xu -= b & yu; + q |= b & 1; + xu <<= 1; + q <<= 1; + } + + /* + * We got 55 bits in the quotient, followed by an extra zero. We + * want that 56th bit to be "sticky": it should be a 1 if and + * only if the remainder (xu) is non-zero. + */ + q |= (xu | -xu) >> 63; + + /* + * Quotient is at most 2^56-1. Its top bit may be zero, but in + * that case the next-to-top bit will be a one, since the + * initial xu and yu were both in the 2^52..2^53-1 range. + * We perform a conditional shift to normalize q to the + * 2^54..2^55-1 range (with the bottom bit being sticky). + */ + q2 = (q >> 1) | (q & 1); + w = q >> 55; + q ^= (q ^ q2) & -w; + + /* + * Extract exponents to compute the scaling factor: + * + * - Each exponent is biased and we scaled them up by + * 52 bits; but these biases will cancel out. + * + * - The division loop produced a 55-bit shifted result, + * so we must scale it down by 55 bits. + * + * - If w = 1, we right-shifted the integer by 1 bit, + * hence we must add 1 to the scaling. + */ + ex = (int)((x >> 52) & 0x7FF); + ey = (int)((y >> 52) & 0x7FF); + e = ex - ey - 55 + (int)w; + + /* + * Sign is the XOR of the signs of the operands. + */ + s = (int)((x ^ y) >> 63); + + /* + * Corrective actions for zeros: if x = 0, then the computation + * is wrong, and we must clamp e and q to 0. We do not care + * about the case y = 0 (as per assumptions in this module, + * the caller does not perform divisions by zero). + */ + d = (ex + 0x7FF) >> 11; + s &= d; + e &= -d; + q &= -(uint64_t)d; + + /* + * FPR() packs the result and applies proper rounding. + */ + return FPR(s, e, q); +} + + + +fpr +fpr_sqrt(fpr x) { + uint64_t xu, q, s, r; + int ex, e; + + /* + * Extract the mantissa and the exponent. We don't care about + * the sign: by assumption, the operand is nonnegative. + * We want the "true" exponent corresponding to a mantissa + * in the 1..2 range. + */ + xu = (x & (((uint64_t)1 << 52) - 1)) | ((uint64_t)1 << 52); + ex = (int)((x >> 52) & 0x7FF); + e = ex - 1023; + + /* + * If the exponent is odd, double the mantissa and decrement + * the exponent. The exponent is then halved to account for + * the square root. + */ + xu += xu & -(uint64_t)(e & 1); + e >>= 1; + + /* + * Double the mantissa. + */ + xu <<= 1; + + /* + * We now have a mantissa in the 2^53..2^55-1 range. It + * represents a value between 1 (inclusive) and 4 (exclusive) + * in fixed point notation (with 53 fractional bits). We + * compute the square root bit by bit. + */ + q = 0; + s = 0; + r = (uint64_t)1 << 53; + for (int i = 0; i < 54; i ++) { + uint64_t t, b; + + t = s + r; + b = ((xu - t) >> 63) - 1; + s += (r << 1) & b; + xu -= t & b; + q += r & b; + xu <<= 1; + r >>= 1; + } + + /* + * Now, q is a rounded-low 54-bit value, with a leading 1, + * 52 fractional digits, and an additional guard bit. We add + * an extra sticky bit to account for what remains of the operand. + */ + q <<= 1; + q |= (xu | -xu) >> 63; + + /* + * Result q is in the 2^54..2^55-1 range; we bias the exponent + * by 54 bits (the value e at that point contains the "true" + * exponent, but q is now considered an integer, i.e. scaled + * up. + */ + e -= 54; + + /* + * Corrective action for an operand of value zero. + */ + q &= -(uint64_t)((ex + 0x7FF) >> 11); + + /* + * Apply rounding and back result. + */ + return FPR(0, e, q); +} + + +uint64_t +fpr_expm_p63(fpr x, fpr ccs) { + /* + * Polynomial approximation of exp(-x) is taken from FACCT: + * https://eprint.iacr.org/2018/1234 + * Specifically, values are extracted from the implementation + * referenced from the FACCT article, and available at: + * https://github.com/raykzhao/gaussian + * Here, the coefficients have been scaled up by 2^63 and + * converted to integers. + * + * Tests over more than 24 billions of random inputs in the + * 0..log(2) range have never shown a deviation larger than + * 2^(-50) from the true mathematical value. + */ + static const uint64_t C[] = { + 0x00000004741183A3u, + 0x00000036548CFC06u, + 0x0000024FDCBF140Au, + 0x0000171D939DE045u, + 0x0000D00CF58F6F84u, + 0x000680681CF796E3u, + 0x002D82D8305B0FEAu, + 0x011111110E066FD0u, + 0x0555555555070F00u, + 0x155555555581FF00u, + 0x400000000002B400u, + 0x7FFFFFFFFFFF4800u, + 0x8000000000000000u + }; + + uint64_t z, y; + size_t u; + uint32_t z0, z1, y0, y1; + uint64_t a, b; + + y = C[0]; + z = (uint64_t)fpr_trunc(fpr_mul(x, fpr_ptwo63)) << 1; + for (u = 1; u < (sizeof C) / sizeof(C[0]); u ++) { + /* + * Compute product z * y over 128 bits, but keep only + * the top 64 bits. + * + * TODO: On some architectures/compilers we could use + * some intrinsics (__umulh() on MSVC) or other compiler + * extensions (unsigned __int128 on GCC / Clang) for + * improved speed; however, most 64-bit architectures + * also have appropriate IEEE754 floating-point support, + * which is better. + */ + uint64_t c; + + z0 = (uint32_t)z; + z1 = (uint32_t)(z >> 32); + y0 = (uint32_t)y; + y1 = (uint32_t)(y >> 32); + a = ((uint64_t)z0 * (uint64_t)y1) + + (((uint64_t)z0 * (uint64_t)y0) >> 32); + b = ((uint64_t)z1 * (uint64_t)y0); + c = (a >> 32) + (b >> 32); + c += (((uint64_t)(uint32_t)a + (uint64_t)(uint32_t)b) >> 32); + c += (uint64_t)z1 * (uint64_t)y1; + y = C[u] - c; + } + + /* + * The scaling factor must be applied at the end. Since y is now + * in fixed-point notation, we have to convert the factor to the + * same format, and do an extra integer multiplication. + */ + z = (uint64_t)fpr_trunc(fpr_mul(ccs, fpr_ptwo63)) << 1; + z0 = (uint32_t)z; + z1 = (uint32_t)(z >> 32); + y0 = (uint32_t)y; + y1 = (uint32_t)(y >> 32); + a = ((uint64_t)z0 * (uint64_t)y1) + + (((uint64_t)z0 * (uint64_t)y0) >> 32); + b = ((uint64_t)z1 * (uint64_t)y0); + y = (a >> 32) + (b >> 32); + y += (((uint64_t)(uint32_t)a + (uint64_t)(uint32_t)b) >> 32); + y += (uint64_t)z1 * (uint64_t)y1; + + return y; +} + +const fpr fpr_gm_tab[] = { + 0, 0, + 9223372036854775808U, 4607182418800017408U, + 4604544271217802189U, 4604544271217802189U, + 13827916308072577997U, 4604544271217802189U, + 4606496786581982534U, 4600565431771507043U, + 13823937468626282851U, 4606496786581982534U, + 4600565431771507043U, 4606496786581982534U, + 13829868823436758342U, 4600565431771507043U, + 4607009347991985328U, 4596196889902818827U, + 13819568926757594635U, 4607009347991985328U, + 4603179351334086856U, 4605664432017547683U, + 13829036468872323491U, 4603179351334086856U, + 4605664432017547683U, 4603179351334086856U, + 13826551388188862664U, 4605664432017547683U, + 4596196889902818827U, 4607009347991985328U, + 13830381384846761136U, 4596196889902818827U, + 4607139046673687846U, 4591727299969791020U, + 13815099336824566828U, 4607139046673687846U, + 4603889326261607894U, 4605137878724712257U, + 13828509915579488065U, 4603889326261607894U, + 4606118860100255153U, 4602163548591158843U, + 13825535585445934651U, 4606118860100255153U, + 4598900923775164166U, 4606794571824115162U, + 13830166608678890970U, 4598900923775164166U, + 4606794571824115162U, 4598900923775164166U, + 13822272960629939974U, 4606794571824115162U, + 4602163548591158843U, 4606118860100255153U, + 13829490896955030961U, 4602163548591158843U, + 4605137878724712257U, 4603889326261607894U, + 13827261363116383702U, 4605137878724712257U, + 4591727299969791020U, 4607139046673687846U, + 13830511083528463654U, 4591727299969791020U, + 4607171569234046334U, 4587232218149935124U, + 13810604255004710932U, 4607171569234046334U, + 4604224084862889120U, 4604849113969373103U, + 13828221150824148911U, 4604224084862889120U, + 4606317631232591731U, 4601373767755717824U, + 13824745804610493632U, 4606317631232591731U, + 4599740487990714333U, 4606655894547498725U, + 13830027931402274533U, 4599740487990714333U, + 4606912484326125783U, 4597922303871901467U, + 13821294340726677275U, 4606912484326125783U, + 4602805845399633902U, 4605900952042040894U, + 13829272988896816702U, 4602805845399633902U, + 4605409869824231233U, 4603540801876750389U, + 13826912838731526197U, 4605409869824231233U, + 4594454542771183930U, 4607084929468638487U, + 13830456966323414295U, 4594454542771183930U, + 4607084929468638487U, 4594454542771183930U, + 13817826579625959738U, 4607084929468638487U, + 4603540801876750389U, 4605409869824231233U, + 13828781906679007041U, 4603540801876750389U, + 4605900952042040894U, 4602805845399633902U, + 13826177882254409710U, 4605900952042040894U, + 4597922303871901467U, 4606912484326125783U, + 13830284521180901591U, 4597922303871901467U, + 4606655894547498725U, 4599740487990714333U, + 13823112524845490141U, 4606655894547498725U, + 4601373767755717824U, 4606317631232591731U, + 13829689668087367539U, 4601373767755717824U, + 4604849113969373103U, 4604224084862889120U, + 13827596121717664928U, 4604849113969373103U, + 4587232218149935124U, 4607171569234046334U, + 13830543606088822142U, 4587232218149935124U, + 4607179706000002317U, 4582730748936808062U, + 13806102785791583870U, 4607179706000002317U, + 4604386048625945823U, 4604698657331085206U, + 13828070694185861014U, 4604386048625945823U, + 4606409688975526202U, 4600971798440897930U, + 13824343835295673738U, 4606409688975526202U, + 4600154912527631775U, 4606578871587619388U, + 13829950908442395196U, 4600154912527631775U, + 4606963563043808649U, 4597061974398750563U, + 13820434011253526371U, 4606963563043808649U, + 4602994049708411683U, 4605784983948558848U, + 13829157020803334656U, 4602994049708411683U, + 4605539368864982914U, 4603361638657888991U, + 13826733675512664799U, 4605539368864982914U, + 4595327571478659014U, 4607049811591515049U, + 13830421848446290857U, 4595327571478659014U, + 4607114680469659603U, 4593485039402578702U, + 13816857076257354510U, 4607114680469659603U, + 4603716733069447353U, 4605276012900672507U, + 13828648049755448315U, 4603716733069447353U, + 4606012266443150634U, 4602550884377336506U, + 13825922921232112314U, 4606012266443150634U, + 4598476289818621559U, 4606856142606846307U, + 13830228179461622115U, 4598476289818621559U, + 4606727809065869586U, 4599322407794599425U, + 13822694444649375233U, 4606727809065869586U, + 4601771097584682078U, 4606220668805321205U, + 13829592705660097013U, 4601771097584682078U, + 4604995550503212910U, 4604058477489546729U, + 13827430514344322537U, 4604995550503212910U, + 4589965306122607094U, 4607158013403433018U, + 13830530050258208826U, 4589965306122607094U, + 4607158013403433018U, 4589965306122607094U, + 13813337342977382902U, 4607158013403433018U, + 4604058477489546729U, 4604995550503212910U, + 13828367587357988718U, 4604058477489546729U, + 4606220668805321205U, 4601771097584682078U, + 13825143134439457886U, 4606220668805321205U, + 4599322407794599425U, 4606727809065869586U, + 13830099845920645394U, 4599322407794599425U, + 4606856142606846307U, 4598476289818621559U, + 13821848326673397367U, 4606856142606846307U, + 4602550884377336506U, 4606012266443150634U, + 13829384303297926442U, 4602550884377336506U, + 4605276012900672507U, 4603716733069447353U, + 13827088769924223161U, 4605276012900672507U, + 4593485039402578702U, 4607114680469659603U, + 13830486717324435411U, 4593485039402578702U, + 4607049811591515049U, 4595327571478659014U, + 13818699608333434822U, 4607049811591515049U, + 4603361638657888991U, 4605539368864982914U, + 13828911405719758722U, 4603361638657888991U, + 4605784983948558848U, 4602994049708411683U, + 13826366086563187491U, 4605784983948558848U, + 4597061974398750563U, 4606963563043808649U, + 13830335599898584457U, 4597061974398750563U, + 4606578871587619388U, 4600154912527631775U, + 13823526949382407583U, 4606578871587619388U, + 4600971798440897930U, 4606409688975526202U, + 13829781725830302010U, 4600971798440897930U, + 4604698657331085206U, 4604386048625945823U, + 13827758085480721631U, 4604698657331085206U, + 4582730748936808062U, 4607179706000002317U, + 13830551742854778125U, 4582730748936808062U, + 4607181740574479067U, 4578227681973159812U, + 13801599718827935620U, 4607181740574479067U, + 4604465633578481725U, 4604621949701367983U, + 13827993986556143791U, 4604465633578481725U, + 4606453861145241227U, 4600769149537129431U, + 13824141186391905239U, 4606453861145241227U, + 4600360675823176935U, 4606538458821337243U, + 13829910495676113051U, 4600360675823176935U, + 4606987119037722413U, 4596629994023683153U, + 13820002030878458961U, 4606987119037722413U, + 4603087070374583113U, 4605725276488455441U, + 13829097313343231249U, 4603087070374583113U, + 4605602459698789090U, 4603270878689749849U, + 13826642915544525657U, 4605602459698789090U, + 4595762727260045105U, 4607030246558998647U, + 13830402283413774455U, 4595762727260045105U, + 4607127537664763515U, 4592606767730311893U, + 13815978804585087701U, 4607127537664763515U, + 4603803453461190356U, 4605207475328619533U, + 13828579512183395341U, 4603803453461190356U, + 4606066157444814153U, 4602357870542944470U, + 13825729907397720278U, 4606066157444814153U, + 4598688984595225406U, 4606826008603986804U, + 13830198045458762612U, 4598688984595225406U, + 4606761837001494797U, 4599112075441176914U, + 13822484112295952722U, 4606761837001494797U, + 4601967947786150793U, 4606170366472647579U, + 13829542403327423387U, 4601967947786150793U, + 4605067233569943231U, 4603974338538572089U, + 13827346375393347897U, 4605067233569943231U, + 4590846768565625881U, 4607149205763218185U, + 13830521242617993993U, 4590846768565625881U, + 4607165468267934125U, 4588998070480937184U, + 13812370107335712992U, 4607165468267934125U, + 4604141730443515286U, 4604922840319727473U, + 13828294877174503281U, 4604141730443515286U, + 4606269759522929756U, 4601573027631668967U, + 13824945064486444775U, 4606269759522929756U, + 4599531889160152938U, 4606692493141721470U, + 13830064529996497278U, 4599531889160152938U, + 4606884969294623682U, 4598262871476403630U, + 13821634908331179438U, 4606884969294623682U, + 4602710690099904183U, 4605957195211051218U, + 13829329232065827026U, 4602710690099904183U, + 4605343481119364930U, 4603629178146150899U, + 13827001215000926707U, 4605343481119364930U, + 4594016801320007031U, 4607100477024622401U, + 13830472513879398209U, 4594016801320007031U, + 4607068040143112603U, 4594891488091520602U, + 13818263524946296410U, 4607068040143112603U, + 4603451617570386922U, 4605475169017376660U, + 13828847205872152468U, 4603451617570386922U, + 4605843545406134034U, 4602900303344142735U, + 13826272340198918543U, 4605843545406134034U, + 4597492765973365521U, 4606938683557690074U, + 13830310720412465882U, 4597492765973365521U, + 4606618018794815019U, 4599948172872067014U, + 13823320209726842822U, 4606618018794815019U, + 4601173347964633034U, 4606364276725003740U, + 13829736313579779548U, 4601173347964633034U, + 4604774382555066977U, 4604305528345395596U, + 13827677565200171404U, 4604774382555066977U, + 4585465300892538317U, 4607176315382986589U, + 13830548352237762397U, 4585465300892538317U, + 4607176315382986589U, 4585465300892538317U, + 13808837337747314125U, 4607176315382986589U, + 4604305528345395596U, 4604774382555066977U, + 13828146419409842785U, 4604305528345395596U, + 4606364276725003740U, 4601173347964633034U, + 13824545384819408842U, 4606364276725003740U, + 4599948172872067014U, 4606618018794815019U, + 13829990055649590827U, 4599948172872067014U, + 4606938683557690074U, 4597492765973365521U, + 13820864802828141329U, 4606938683557690074U, + 4602900303344142735U, 4605843545406134034U, + 13829215582260909842U, 4602900303344142735U, + 4605475169017376660U, 4603451617570386922U, + 13826823654425162730U, 4605475169017376660U, + 4594891488091520602U, 4607068040143112603U, + 13830440076997888411U, 4594891488091520602U, + 4607100477024622401U, 4594016801320007031U, + 13817388838174782839U, 4607100477024622401U, + 4603629178146150899U, 4605343481119364930U, + 13828715517974140738U, 4603629178146150899U, + 4605957195211051218U, 4602710690099904183U, + 13826082726954679991U, 4605957195211051218U, + 4598262871476403630U, 4606884969294623682U, + 13830257006149399490U, 4598262871476403630U, + 4606692493141721470U, 4599531889160152938U, + 13822903926014928746U, 4606692493141721470U, + 4601573027631668967U, 4606269759522929756U, + 13829641796377705564U, 4601573027631668967U, + 4604922840319727473U, 4604141730443515286U, + 13827513767298291094U, 4604922840319727473U, + 4588998070480937184U, 4607165468267934125U, + 13830537505122709933U, 4588998070480937184U, + 4607149205763218185U, 4590846768565625881U, + 13814218805420401689U, 4607149205763218185U, + 4603974338538572089U, 4605067233569943231U, + 13828439270424719039U, 4603974338538572089U, + 4606170366472647579U, 4601967947786150793U, + 13825339984640926601U, 4606170366472647579U, + 4599112075441176914U, 4606761837001494797U, + 13830133873856270605U, 4599112075441176914U, + 4606826008603986804U, 4598688984595225406U, + 13822061021450001214U, 4606826008603986804U, + 4602357870542944470U, 4606066157444814153U, + 13829438194299589961U, 4602357870542944470U, + 4605207475328619533U, 4603803453461190356U, + 13827175490315966164U, 4605207475328619533U, + 4592606767730311893U, 4607127537664763515U, + 13830499574519539323U, 4592606767730311893U, + 4607030246558998647U, 4595762727260045105U, + 13819134764114820913U, 4607030246558998647U, + 4603270878689749849U, 4605602459698789090U, + 13828974496553564898U, 4603270878689749849U, + 4605725276488455441U, 4603087070374583113U, + 13826459107229358921U, 4605725276488455441U, + 4596629994023683153U, 4606987119037722413U, + 13830359155892498221U, 4596629994023683153U, + 4606538458821337243U, 4600360675823176935U, + 13823732712677952743U, 4606538458821337243U, + 4600769149537129431U, 4606453861145241227U, + 13829825898000017035U, 4600769149537129431U, + 4604621949701367983U, 4604465633578481725U, + 13827837670433257533U, 4604621949701367983U, + 4578227681973159812U, 4607181740574479067U, + 13830553777429254875U, 4578227681973159812U, + 4607182249242036882U, 4573724215515480177U, + 13797096252370255985U, 4607182249242036882U, + 4604505071555817232U, 4604583231088591477U, + 13827955267943367285U, 4604505071555817232U, + 4606475480113671417U, 4600667422348321968U, + 13824039459203097776U, 4606475480113671417U, + 4600463181646572228U, 4606517779747998088U, + 13829889816602773896U, 4600463181646572228U, + 4606998399608725124U, 4596413578358834022U, + 13819785615213609830U, 4606998399608725124U, + 4603133304188877240U, 4605694995810664660U, + 13829067032665440468U, 4603133304188877240U, + 4605633586259814045U, 4603225210076562971U, + 13826597246931338779U, 4605633586259814045U, + 4595979936813835462U, 4607019963775302583U, + 13830392000630078391U, 4595979936813835462U, + 4607133460805585796U, 4592167175087283203U, + 13815539211942059011U, 4607133460805585796U, + 4603846496621587377U, 4605172808754305228U, + 13828544845609081036U, 4603846496621587377U, + 4606092657816072624U, 4602260871257280788U, + 13825632908112056596U, 4606092657816072624U, + 4598795050632330097U, 4606810452769876110U, + 13830182489624651918U, 4598795050632330097U, + 4606778366364612594U, 4599006600037663623U, + 13822378636892439431U, 4606778366364612594U, + 4602065906208722008U, 4606144763310860551U, + 13829516800165636359U, 4602065906208722008U, + 4605102686554936490U, 4603931940768740167U, + 13827303977623515975U, 4605102686554936490U, + 4591287158938884897U, 4607144295058764886U, + 13830516331913540694U, 4591287158938884897U, + 4607168688050493276U, 4588115294056142819U, + 13811487330910918627U, 4607168688050493276U, + 4604183020748362039U, 4604886103475043762U, + 13828258140329819570U, 4604183020748362039U, + 4606293848208650998U, 4601473544562720001U, + 13824845581417495809U, 4606293848208650998U, + 4599636300858866724U, 4606674353838411301U, + 13830046390693187109U, 4599636300858866724U, + 4606898891031025132U, 4598136582470364665U, + 13821508619325140473U, 4606898891031025132U, + 4602758354025980442U, 4605929219593405673U, + 13829301256448181481U, 4602758354025980442U, + 4605376811039722786U, 4603585091850767959U, + 13826957128705543767U, 4605376811039722786U, + 4594235767444503503U, 4607092871118901179U, + 13830464907973676987U, 4594235767444503503U, + 4607076652372832968U, 4594673119063280916U, + 13818045155918056724U, 4607076652372832968U, + 4603496309891590679U, 4605442656228245717U, + 13828814693083021525U, 4603496309891590679U, + 4605872393621214213U, 4602853162432841185U, + 13826225199287616993U, 4605872393621214213U, + 4597707695679609371U, 4606925748668145757U, + 13830297785522921565U, 4597707695679609371U, + 4606637115963965612U, 4599844446633109139U, + 13823216483487884947U, 4606637115963965612U, + 4601273700967202825U, 4606341107699334546U, + 13829713144554110354U, 4601273700967202825U, + 4604811873195349477U, 4604264921241055824U, + 13827636958095831632U, 4604811873195349477U, + 4586348876009622851U, 4607174111710118367U, + 13830546148564894175U, 4586348876009622851U, + 4607178180169683960U, 4584498631466405633U, + 13807870668321181441U, 4607178180169683960U, + 4604345904647073908U, 4604736643460027021U, + 13828108680314802829U, 4604345904647073908U, + 4606387137437298591U, 4601072712526242277U, + 13824444749381018085U, 4606387137437298591U, + 4600051662802353687U, 4606598603759044570U, + 13829970640613820378U, 4600051662802353687U, + 4606951288507767453U, 4597277522845151878U, + 13820649559699927686U, 4606951288507767453U, + 4602947266358709886U, 4605814408482919348U, + 13829186445337695156U, 4602947266358709886U, + 4605507406967535927U, 4603406726595779752U, + 13826778763450555560U, 4605507406967535927U, + 4595109641634432498U, 4607059093103722971U, + 13830431129958498779U, 4595109641634432498U, + 4607107746899444102U, 4593797652641645341U, + 13817169689496421149U, 4607107746899444102U, + 4603673059103075106U, 4605309881318010327U, + 13828681918172786135U, 4603673059103075106U, + 4605984877841711338U, 4602646891659203088U, + 13826018928513978896U, 4605984877841711338U, + 4598369669086960528U, 4606870719641066940U, + 13830242756495842748U, 4598369669086960528U, + 4606710311774494716U, 4599427256825614420U, + 13822799293680390228U, 4606710311774494716U, + 4601672213217083403U, 4606245366082353408U, + 13829617402937129216U, 4601672213217083403U, + 4604959323120302796U, 4604100215502905499U, + 13827472252357681307U, 4604959323120302796U, + 4589524267239410099U, 4607161910007591876U, + 13830533946862367684U, 4589524267239410099U, + 4607153778602162496U, 4590406145430462614U, + 13813778182285238422U, 4607153778602162496U, + 4604016517974851588U, 4605031521104517324U, + 13828403557959293132U, 4604016517974851588U, + 4606195668621671667U, 4601869677011524443U, + 13825241713866300251U, 4606195668621671667U, + 4599217346014614711U, 4606744984357082948U, + 13830117021211858756U, 4599217346014614711U, + 4606841238740778884U, 4598582729657176439U, + 13821954766511952247U, 4606841238740778884U, + 4602454542796181607U, 4606039359984203741U, + 13829411396838979549U, 4602454542796181607U, + 4605241877142478242U, 4603760198400967492U, + 13827132235255743300U, 4605241877142478242U, + 4593046061348462537U, 4607121277474223905U, + 13830493314328999713U, 4593046061348462537U, + 4607040195955932526U, 4595545269419264690U, + 13818917306274040498U, 4607040195955932526U, + 4603316355454250015U, 4605571053506370248U, + 13828943090361146056U, 4603316355454250015U, + 4605755272910869620U, 4603040651631881451U, + 13826412688486657259U, 4605755272910869620U, + 4596846128749438754U, 4606975506703684317U, + 13830347543558460125U, 4596846128749438754U, + 4606558823023444576U, 4600257918160607478U, + 13823629955015383286U, 4606558823023444576U, + 4600870609507958271U, 4606431930490633905U, + 13829803967345409713U, 4600870609507958271U, + 4604660425598397818U, 4604425958770613225U, + 13827797995625389033U, 4604660425598397818U, + 4580962600092897021U, 4607180892816495009U, + 13830552929671270817U, 4580962600092897021U, + 4607180892816495009U, 4580962600092897021U, + 13804334636947672829U, 4607180892816495009U, + 4604425958770613225U, 4604660425598397818U, + 13828032462453173626U, 4604425958770613225U, + 4606431930490633905U, 4600870609507958271U, + 13824242646362734079U, 4606431930490633905U, + 4600257918160607478U, 4606558823023444576U, + 13829930859878220384U, 4600257918160607478U, + 4606975506703684317U, 4596846128749438754U, + 13820218165604214562U, 4606975506703684317U, + 4603040651631881451U, 4605755272910869620U, + 13829127309765645428U, 4603040651631881451U, + 4605571053506370248U, 4603316355454250015U, + 13826688392309025823U, 4605571053506370248U, + 4595545269419264690U, 4607040195955932526U, + 13830412232810708334U, 4595545269419264690U, + 4607121277474223905U, 4593046061348462537U, + 13816418098203238345U, 4607121277474223905U, + 4603760198400967492U, 4605241877142478242U, + 13828613913997254050U, 4603760198400967492U, + 4606039359984203741U, 4602454542796181607U, + 13825826579650957415U, 4606039359984203741U, + 4598582729657176439U, 4606841238740778884U, + 13830213275595554692U, 4598582729657176439U, + 4606744984357082948U, 4599217346014614711U, + 13822589382869390519U, 4606744984357082948U, + 4601869677011524443U, 4606195668621671667U, + 13829567705476447475U, 4601869677011524443U, + 4605031521104517324U, 4604016517974851588U, + 13827388554829627396U, 4605031521104517324U, + 4590406145430462614U, 4607153778602162496U, + 13830525815456938304U, 4590406145430462614U, + 4607161910007591876U, 4589524267239410099U, + 13812896304094185907U, 4607161910007591876U, + 4604100215502905499U, 4604959323120302796U, + 13828331359975078604U, 4604100215502905499U, + 4606245366082353408U, 4601672213217083403U, + 13825044250071859211U, 4606245366082353408U, + 4599427256825614420U, 4606710311774494716U, + 13830082348629270524U, 4599427256825614420U, + 4606870719641066940U, 4598369669086960528U, + 13821741705941736336U, 4606870719641066940U, + 4602646891659203088U, 4605984877841711338U, + 13829356914696487146U, 4602646891659203088U, + 4605309881318010327U, 4603673059103075106U, + 13827045095957850914U, 4605309881318010327U, + 4593797652641645341U, 4607107746899444102U, + 13830479783754219910U, 4593797652641645341U, + 4607059093103722971U, 4595109641634432498U, + 13818481678489208306U, 4607059093103722971U, + 4603406726595779752U, 4605507406967535927U, + 13828879443822311735U, 4603406726595779752U, + 4605814408482919348U, 4602947266358709886U, + 13826319303213485694U, 4605814408482919348U, + 4597277522845151878U, 4606951288507767453U, + 13830323325362543261U, 4597277522845151878U, + 4606598603759044570U, 4600051662802353687U, + 13823423699657129495U, 4606598603759044570U, + 4601072712526242277U, 4606387137437298591U, + 13829759174292074399U, 4601072712526242277U, + 4604736643460027021U, 4604345904647073908U, + 13827717941501849716U, 4604736643460027021U, + 4584498631466405633U, 4607178180169683960U, + 13830550217024459768U, 4584498631466405633U, + 4607174111710118367U, 4586348876009622851U, + 13809720912864398659U, 4607174111710118367U, + 4604264921241055824U, 4604811873195349477U, + 13828183910050125285U, 4604264921241055824U, + 4606341107699334546U, 4601273700967202825U, + 13824645737821978633U, 4606341107699334546U, + 4599844446633109139U, 4606637115963965612U, + 13830009152818741420U, 4599844446633109139U, + 4606925748668145757U, 4597707695679609371U, + 13821079732534385179U, 4606925748668145757U, + 4602853162432841185U, 4605872393621214213U, + 13829244430475990021U, 4602853162432841185U, + 4605442656228245717U, 4603496309891590679U, + 13826868346746366487U, 4605442656228245717U, + 4594673119063280916U, 4607076652372832968U, + 13830448689227608776U, 4594673119063280916U, + 4607092871118901179U, 4594235767444503503U, + 13817607804299279311U, 4607092871118901179U, + 4603585091850767959U, 4605376811039722786U, + 13828748847894498594U, 4603585091850767959U, + 4605929219593405673U, 4602758354025980442U, + 13826130390880756250U, 4605929219593405673U, + 4598136582470364665U, 4606898891031025132U, + 13830270927885800940U, 4598136582470364665U, + 4606674353838411301U, 4599636300858866724U, + 13823008337713642532U, 4606674353838411301U, + 4601473544562720001U, 4606293848208650998U, + 13829665885063426806U, 4601473544562720001U, + 4604886103475043762U, 4604183020748362039U, + 13827555057603137847U, 4604886103475043762U, + 4588115294056142819U, 4607168688050493276U, + 13830540724905269084U, 4588115294056142819U, + 4607144295058764886U, 4591287158938884897U, + 13814659195793660705U, 4607144295058764886U, + 4603931940768740167U, 4605102686554936490U, + 13828474723409712298U, 4603931940768740167U, + 4606144763310860551U, 4602065906208722008U, + 13825437943063497816U, 4606144763310860551U, + 4599006600037663623U, 4606778366364612594U, + 13830150403219388402U, 4599006600037663623U, + 4606810452769876110U, 4598795050632330097U, + 13822167087487105905U, 4606810452769876110U, + 4602260871257280788U, 4606092657816072624U, + 13829464694670848432U, 4602260871257280788U, + 4605172808754305228U, 4603846496621587377U, + 13827218533476363185U, 4605172808754305228U, + 4592167175087283203U, 4607133460805585796U, + 13830505497660361604U, 4592167175087283203U, + 4607019963775302583U, 4595979936813835462U, + 13819351973668611270U, 4607019963775302583U, + 4603225210076562971U, 4605633586259814045U, + 13829005623114589853U, 4603225210076562971U, + 4605694995810664660U, 4603133304188877240U, + 13826505341043653048U, 4605694995810664660U, + 4596413578358834022U, 4606998399608725124U, + 13830370436463500932U, 4596413578358834022U, + 4606517779747998088U, 4600463181646572228U, + 13823835218501348036U, 4606517779747998088U, + 4600667422348321968U, 4606475480113671417U, + 13829847516968447225U, 4600667422348321968U, + 4604583231088591477U, 4604505071555817232U, + 13827877108410593040U, 4604583231088591477U, + 4573724215515480177U, 4607182249242036882U, + 13830554286096812690U, 4573724215515480177U, + 4607182376410422530U, 4569220649180767418U, + 13792592686035543226U, 4607182376410422530U, + 4604524701268679793U, 4604563781218984604U, + 13827935818073760412U, 4604524701268679793U, + 4606486172460753999U, 4600616459743653188U, + 13823988496598428996U, 4606486172460753999U, + 4600514338912178239U, 4606507322377452870U, + 13829879359232228678U, 4600514338912178239U, + 4607003915349878877U, 4596305267720071930U, + 13819677304574847738U, 4607003915349878877U, + 4603156351203636159U, 4605679749231851918U, + 13829051786086627726U, 4603156351203636159U, + 4605649044311923410U, 4603202304363743346U, + 13826574341218519154U, 4605649044311923410U, + 4596088445927168004U, 4607014697483910382U, + 13830386734338686190U, 4596088445927168004U, + 4607136295912168606U, 4591947271803021404U, + 13815319308657797212U, 4607136295912168606U, + 4603867938232615808U, 4605155376589456981U, + 13828527413444232789U, 4603867938232615808U, + 4606105796280968177U, 4602212250118051877U, + 13825584286972827685U, 4606105796280968177U, + 4598848011564831930U, 4606802552898869248U, + 13830174589753645056U, 4598848011564831930U, + 4606786509620734768U, 4598953786765296928U, + 13822325823620072736U, 4606786509620734768U, + 4602114767134999006U, 4606131849150971908U, + 13829503886005747716U, 4602114767134999006U, + 4605120315324767624U, 4603910660507251362U, + 13827282697362027170U, 4605120315324767624U, + 4591507261658050721U, 4607141713064252300U, + 13830513749919028108U, 4591507261658050721U, + 4607170170974224083U, 4587673791460508439U, + 13811045828315284247U, 4607170170974224083U, + 4604203581176243359U, 4604867640218014515U, + 13828239677072790323U, 4604203581176243359U, + 4606305777984577632U, 4601423692641949331U, + 13824795729496725139U, 4606305777984577632U, + 4599688422741010356U, 4606665164148251002U, + 13830037201003026810U, 4599688422741010356U, + 4606905728766014348U, 4598029484874872834U, + 13821401521729648642U, 4606905728766014348U, + 4602782121393764535U, 4605915122243179241U, + 13829287159097955049U, 4602782121393764535U, + 4605393374401988274U, 4603562972219549215U, + 13826935009074325023U, 4605393374401988274U, + 4594345179472540681U, 4607088942243446236U, + 13830460979098222044U, 4594345179472540681U, + 4607080832832247697U, 4594563856311064231U, + 13817935893165840039U, 4607080832832247697U, + 4603518581031047189U, 4605426297151190466U, + 13828798334005966274U, 4603518581031047189U, + 4605886709123365959U, 4602829525820289164U, + 13826201562675064972U, 4605886709123365959U, + 4597815040470278984U, 4606919157647773535U, + 13830291194502549343U, 4597815040470278984U, + 4606646545123403481U, 4599792496117920694U, + 13823164532972696502U, 4606646545123403481U, + 4601323770373937522U, 4606329407841126011U, + 13829701444695901819U, 4601323770373937522U, + 4604830524903495634U, 4604244531615310815U, + 13827616568470086623U, 4604830524903495634U, + 4586790578280679046U, 4607172882816799076U, + 13830544919671574884U, 4586790578280679046U, + 4607178985458280057U, 4583614727651146525U, + 13806986764505922333U, 4607178985458280057U, + 4604366005771528720U, 4604717681185626434U, + 13828089718040402242U, 4604366005771528720U, + 4606398451906509788U, 4601022290077223616U, + 13824394326931999424U, 4606398451906509788U, + 4600103317933788342U, 4606588777269136769U, + 13829960814123912577U, 4600103317933788342U, + 4606957467106717424U, 4597169786279785693U, + 13820541823134561501U, 4606957467106717424U, + 4602970680601913687U, 4605799732098147061U, + 13829171768952922869U, 4602970680601913687U, + 4605523422498301790U, 4603384207141321914U, + 13826756243996097722U, 4605523422498301790U, + 4595218635031890910U, 4607054494135176056U, + 13830426530989951864U, 4595218635031890910U, + 4607111255739239816U, 4593688012422887515U, + 13817060049277663323U, 4607111255739239816U, + 4603694922063032361U, 4605292980606880364U, + 13828665017461656172U, 4603694922063032361U, + 4605998608960791335U, 4602598930031891166U, + 13825970966886666974U, 4605998608960791335U, + 4598423001813699022U, 4606863472012527185U, + 13830235508867302993U, 4598423001813699022U, + 4606719100629313491U, 4599374859150636784U, + 13822746896005412592U, 4606719100629313491U, + 4601721693286060937U, 4606233055365547081U, + 13829605092220322889U, 4601721693286060937U, + 4604977468824438271U, 4604079374282302598U, + 13827451411137078406U, 4604977468824438271U, + 4589744810590291021U, 4607160003989618959U, + 13830532040844394767U, 4589744810590291021U, + 4607155938267770208U, 4590185751760970393U, + 13813557788615746201U, 4607155938267770208U, + 4604037525321326463U, 4605013567986435066U, + 13828385604841210874U, 4604037525321326463U, + 4606208206518262803U, 4601820425647934753U, + 13825192462502710561U, 4606208206518262803U, + 4599269903251194481U, 4606736437002195879U, + 13830108473856971687U, 4599269903251194481U, + 4606848731493011465U, 4598529532600161144U, + 13821901569454936952U, 4606848731493011465U, + 4602502755147763107U, 4606025850160239809U, + 13829397887015015617U, 4602502755147763107U, + 4605258978359093269U, 4603738491917026584U, + 13827110528771802392U, 4605258978359093269U, + 4593265590854265407U, 4607118021058468598U, + 13830490057913244406U, 4593265590854265407U, + 4607045045516813836U, 4595436449949385485U, + 13818808486804161293U, 4607045045516813836U, + 4603339021357904144U, 4605555245917486022U, + 13828927282772261830U, 4603339021357904144U, + 4605770164172969910U, 4603017373458244943U, + 13826389410313020751U, 4605770164172969910U, + 4596954088216812973U, 4606969576261663845U, + 13830341613116439653U, 4596954088216812973U, + 4606568886807728474U, 4600206446098256018U, + 13823578482953031826U, 4606568886807728474U, + 4600921238092511730U, 4606420848538580260U, + 13829792885393356068U, 4600921238092511730U, + 4604679572075463103U, 4604406033021674239U, + 13827778069876450047U, 4604679572075463103U, + 4581846703643734566U, 4607180341788068727U, + 13830552378642844535U, 4581846703643734566U, + 4607181359080094673U, 4579996072175835083U, + 13803368109030610891U, 4607181359080094673U, + 4604445825685214043U, 4604641218080103285U, + 13828013254934879093U, 4604445825685214043U, + 4606442934727379583U, 4600819913163773071U, + 13824191950018548879U, 4606442934727379583U, + 4600309328230211502U, 4606548680329491866U, + 13829920717184267674U, 4600309328230211502U, + 4606981354314050484U, 4596738097012783531U, + 13820110133867559339U, 4606981354314050484U, + 4603063884010218172U, 4605740310302420207U, + 13829112347157196015U, 4603063884010218172U, + 4605586791482848547U, 4603293641160266722U, + 13826665678015042530U, 4605586791482848547U, + 4595654028864046335U, 4607035262954517034U, + 13830407299809292842U, 4595654028864046335U, + 4607124449686274900U, 4592826452951465409U, + 13816198489806241217U, 4607124449686274900U, + 4603781852316960384U, 4605224709411790590U, + 13828596746266566398U, 4603781852316960384U, + 4606052795787882823U, 4602406247776385022U, + 13825778284631160830U, 4606052795787882823U, + 4598635880488956483U, 4606833664420673202U, + 13830205701275449010U, 4598635880488956483U, + 4606753451050079834U, 4599164736579548843U, + 13822536773434324651U, 4606753451050079834U, + 4601918851211878557U, 4606183055233559255U, + 13829555092088335063U, 4601918851211878557U, + 4605049409688478101U, 4603995455647851249U, + 13827367492502627057U, 4605049409688478101U, + 4590626485056654602U, 4607151534426937478U, + 13830523571281713286U, 4590626485056654602U, + 4607163731439411601U, 4589303678145802340U, + 13812675715000578148U, 4607163731439411601U, + 4604121000955189926U, 4604941113561600762U, + 13828313150416376570U, 4604121000955189926U, + 4606257600839867033U, 4601622657843474729U, + 13824994694698250537U, 4606257600839867033U, + 4599479600326345459U, 4606701442584137310U, + 13830073479438913118U, 4599479600326345459U, + 4606877885424248132U, 4598316292140394014U, + 13821688328995169822U, 4606877885424248132U, + 4602686793990243041U, 4605971073215153165U, + 13829343110069928973U, 4602686793990243041U, + 4605326714874986465U, 4603651144395358093U, + 13827023181250133901U, 4605326714874986465U, + 4593907249284540294U, 4607104153983298999U, + 13830476190838074807U, 4593907249284540294U, + 4607063608453868552U, 4595000592312171144U, + 13818372629166946952U, 4607063608453868552U, + 4603429196809300824U, 4605491322423429598U, + 13828863359278205406U, 4603429196809300824U, + 4605829012964735987U, 4602923807199184054U, + 13826295844053959862U, 4605829012964735987U, + 4597385183080791534U, 4606945027305114062U, + 13830317064159889870U, 4597385183080791534U, + 4606608350964852124U, 4599999947619525579U, + 13823371984474301387U, 4606608350964852124U, + 4601123065313358619U, 4606375745674388705U, + 13829747782529164513U, 4601123065313358619U, + 4604755543975806820U, 4604325745441780828U, + 13827697782296556636U, 4604755543975806820U, + 4585023436363055487U, 4607177290141793710U, + 13830549326996569518U, 4585023436363055487U, + 4607175255902437396U, 4585907115494236537U, + 13809279152349012345U, 4607175255902437396U, + 4604285253548209224U, 4604793159020491611U, + 13828165195875267419U, 4604285253548209224U, + 4606352730697093817U, 4601223560006786057U, + 13824595596861561865U, 4606352730697093817U, + 4599896339047301634U, 4606627607157935956U, + 13829999644012711764U, 4599896339047301634U, + 4606932257325205256U, 4597600270510262682U, + 13820972307365038490U, 4606932257325205256U, + 4602876755014813164U, 4605858005670328613U, + 13829230042525104421U, 4602876755014813164U, + 4605458946901419122U, 4603473988668005304U, + 13826846025522781112U, 4605458946901419122U, + 4594782329999411347U, 4607072388129742377U, + 13830444424984518185U, 4594782329999411347U, + 4607096716058023245U, 4594126307716900071U, + 13817498344571675879U, 4607096716058023245U, + 4603607160562208225U, 4605360179893335444U, + 13828732216748111252U, 4603607160562208225U, + 4605943243960030558U, 4602734543519989142U, + 13826106580374764950U, 4605943243960030558U, + 4598209407597805010U, 4606891971185517504U, + 13830264008040293312U, 4598209407597805010U, + 4606683463531482757U, 4599584122834874440U, + 13822956159689650248U, 4606683463531482757U, + 4601523323048804569U, 4606281842017099424U, + 13829653878871875232U, 4601523323048804569U, + 4604904503566677638U, 4604162403772767740U, + 13827534440627543548U, 4604904503566677638U, + 4588556721781247689U, 4607167120476811757U, + 13830539157331587565U, 4588556721781247689U, + 4607146792632922887U, 4591066993883984169U, + 13814439030738759977U, 4607146792632922887U, + 4603953166845776383U, 4605084992581147553U, + 13828457029435923361U, 4603953166845776383U, + 4606157602458368090U, 4602016966272225497U, + 13825389003127001305U, 4606157602458368090U, + 4599059363095165615U, 4606770142132396069U, + 13830142178987171877U, 4599059363095165615U, + 4606818271362779153U, 4598742041476147134U, + 13822114078330922942U, 4606818271362779153U, + 4602309411551204896U, 4606079444829232727U, + 13829451481684008535U, 4602309411551204896U, + 4605190175055178825U, 4603825001630339212U, + 13827197038485115020U, 4605190175055178825U, + 4592387007752762956U, 4607130541380624519U, + 13830502578235400327U, 4592387007752762956U, + 4607025146816593591U, 4595871363584150300U, + 13819243400438926108U, 4607025146816593591U, + 4603248068256948438U, 4605618058006716661U, + 13828990094861492469U, 4603248068256948438U, + 4605710171610479304U, 4603110210506737381U, + 13826482247361513189U, 4605710171610479304U, + 4596521820799644122U, 4606992800820440327U, + 13830364837675216135U, 4596521820799644122U, + 4606528158595189433U, 4600411960456200676U, + 13823783997310976484U, 4606528158595189433U, + 4600718319105833937U, 4606464709641375231U, + 13829836746496151039U, 4600718319105833937U, + 4604602620643553229U, 4604485382263976838U, + 13827857419118752646U, 4604602620643553229U, + 4576459225186735875U, 4607182037296057423U, + 13830554074150833231U, 4576459225186735875U, + 4607182037296057423U, 4576459225186735875U, + 13799831262041511683U, 4607182037296057423U, + 4604485382263976838U, 4604602620643553229U, + 13827974657498329037U, 4604485382263976838U, + 4606464709641375231U, 4600718319105833937U, + 13824090355960609745U, 4606464709641375231U, + 4600411960456200676U, 4606528158595189433U, + 13829900195449965241U, 4600411960456200676U, + 4606992800820440327U, 4596521820799644122U, + 13819893857654419930U, 4606992800820440327U, + 4603110210506737381U, 4605710171610479304U, + 13829082208465255112U, 4603110210506737381U, + 4605618058006716661U, 4603248068256948438U, + 13826620105111724246U, 4605618058006716661U, + 4595871363584150300U, 4607025146816593591U, + 13830397183671369399U, 4595871363584150300U, + 4607130541380624519U, 4592387007752762956U, + 13815759044607538764U, 4607130541380624519U, + 4603825001630339212U, 4605190175055178825U, + 13828562211909954633U, 4603825001630339212U, + 4606079444829232727U, 4602309411551204896U, + 13825681448405980704U, 4606079444829232727U, + 4598742041476147134U, 4606818271362779153U, + 13830190308217554961U, 4598742041476147134U, + 4606770142132396069U, 4599059363095165615U, + 13822431399949941423U, 4606770142132396069U, + 4602016966272225497U, 4606157602458368090U, + 13829529639313143898U, 4602016966272225497U, + 4605084992581147553U, 4603953166845776383U, + 13827325203700552191U, 4605084992581147553U, + 4591066993883984169U, 4607146792632922887U, + 13830518829487698695U, 4591066993883984169U, + 4607167120476811757U, 4588556721781247689U, + 13811928758636023497U, 4607167120476811757U, + 4604162403772767740U, 4604904503566677638U, + 13828276540421453446U, 4604162403772767740U, + 4606281842017099424U, 4601523323048804569U, + 13824895359903580377U, 4606281842017099424U, + 4599584122834874440U, 4606683463531482757U, + 13830055500386258565U, 4599584122834874440U, + 4606891971185517504U, 4598209407597805010U, + 13821581444452580818U, 4606891971185517504U, + 4602734543519989142U, 4605943243960030558U, + 13829315280814806366U, 4602734543519989142U, + 4605360179893335444U, 4603607160562208225U, + 13826979197416984033U, 4605360179893335444U, + 4594126307716900071U, 4607096716058023245U, + 13830468752912799053U, 4594126307716900071U, + 4607072388129742377U, 4594782329999411347U, + 13818154366854187155U, 4607072388129742377U, + 4603473988668005304U, 4605458946901419122U, + 13828830983756194930U, 4603473988668005304U, + 4605858005670328613U, 4602876755014813164U, + 13826248791869588972U, 4605858005670328613U, + 4597600270510262682U, 4606932257325205256U, + 13830304294179981064U, 4597600270510262682U, + 4606627607157935956U, 4599896339047301634U, + 13823268375902077442U, 4606627607157935956U, + 4601223560006786057U, 4606352730697093817U, + 13829724767551869625U, 4601223560006786057U, + 4604793159020491611U, 4604285253548209224U, + 13827657290402985032U, 4604793159020491611U, + 4585907115494236537U, 4607175255902437396U, + 13830547292757213204U, 4585907115494236537U, + 4607177290141793710U, 4585023436363055487U, + 13808395473217831295U, 4607177290141793710U, + 4604325745441780828U, 4604755543975806820U, + 13828127580830582628U, 4604325745441780828U, + 4606375745674388705U, 4601123065313358619U, + 13824495102168134427U, 4606375745674388705U, + 4599999947619525579U, 4606608350964852124U, + 13829980387819627932U, 4599999947619525579U, + 4606945027305114062U, 4597385183080791534U, + 13820757219935567342U, 4606945027305114062U, + 4602923807199184054U, 4605829012964735987U, + 13829201049819511795U, 4602923807199184054U, + 4605491322423429598U, 4603429196809300824U, + 13826801233664076632U, 4605491322423429598U, + 4595000592312171144U, 4607063608453868552U, + 13830435645308644360U, 4595000592312171144U, + 4607104153983298999U, 4593907249284540294U, + 13817279286139316102U, 4607104153983298999U, + 4603651144395358093U, 4605326714874986465U, + 13828698751729762273U, 4603651144395358093U, + 4605971073215153165U, 4602686793990243041U, + 13826058830845018849U, 4605971073215153165U, + 4598316292140394014U, 4606877885424248132U, + 13830249922279023940U, 4598316292140394014U, + 4606701442584137310U, 4599479600326345459U, + 13822851637181121267U, 4606701442584137310U, + 4601622657843474729U, 4606257600839867033U, + 13829629637694642841U, 4601622657843474729U, + 4604941113561600762U, 4604121000955189926U, + 13827493037809965734U, 4604941113561600762U, + 4589303678145802340U, 4607163731439411601U, + 13830535768294187409U, 4589303678145802340U, + 4607151534426937478U, 4590626485056654602U, + 13813998521911430410U, 4607151534426937478U, + 4603995455647851249U, 4605049409688478101U, + 13828421446543253909U, 4603995455647851249U, + 4606183055233559255U, 4601918851211878557U, + 13825290888066654365U, 4606183055233559255U, + 4599164736579548843U, 4606753451050079834U, + 13830125487904855642U, 4599164736579548843U, + 4606833664420673202U, 4598635880488956483U, + 13822007917343732291U, 4606833664420673202U, + 4602406247776385022U, 4606052795787882823U, + 13829424832642658631U, 4602406247776385022U, + 4605224709411790590U, 4603781852316960384U, + 13827153889171736192U, 4605224709411790590U, + 4592826452951465409U, 4607124449686274900U, + 13830496486541050708U, 4592826452951465409U, + 4607035262954517034U, 4595654028864046335U, + 13819026065718822143U, 4607035262954517034U, + 4603293641160266722U, 4605586791482848547U, + 13828958828337624355U, 4603293641160266722U, + 4605740310302420207U, 4603063884010218172U, + 13826435920864993980U, 4605740310302420207U, + 4596738097012783531U, 4606981354314050484U, + 13830353391168826292U, 4596738097012783531U, + 4606548680329491866U, 4600309328230211502U, + 13823681365084987310U, 4606548680329491866U, + 4600819913163773071U, 4606442934727379583U, + 13829814971582155391U, 4600819913163773071U, + 4604641218080103285U, 4604445825685214043U, + 13827817862539989851U, 4604641218080103285U, + 4579996072175835083U, 4607181359080094673U, + 13830553395934870481U, 4579996072175835083U, + 4607180341788068727U, 4581846703643734566U, + 13805218740498510374U, 4607180341788068727U, + 4604406033021674239U, 4604679572075463103U, + 13828051608930238911U, 4604406033021674239U, + 4606420848538580260U, 4600921238092511730U, + 13824293274947287538U, 4606420848538580260U, + 4600206446098256018U, 4606568886807728474U, + 13829940923662504282U, 4600206446098256018U, + 4606969576261663845U, 4596954088216812973U, + 13820326125071588781U, 4606969576261663845U, + 4603017373458244943U, 4605770164172969910U, + 13829142201027745718U, 4603017373458244943U, + 4605555245917486022U, 4603339021357904144U, + 13826711058212679952U, 4605555245917486022U, + 4595436449949385485U, 4607045045516813836U, + 13830417082371589644U, 4595436449949385485U, + 4607118021058468598U, 4593265590854265407U, + 13816637627709041215U, 4607118021058468598U, + 4603738491917026584U, 4605258978359093269U, + 13828631015213869077U, 4603738491917026584U, + 4606025850160239809U, 4602502755147763107U, + 13825874792002538915U, 4606025850160239809U, + 4598529532600161144U, 4606848731493011465U, + 13830220768347787273U, 4598529532600161144U, + 4606736437002195879U, 4599269903251194481U, + 13822641940105970289U, 4606736437002195879U, + 4601820425647934753U, 4606208206518262803U, + 13829580243373038611U, 4601820425647934753U, + 4605013567986435066U, 4604037525321326463U, + 13827409562176102271U, 4605013567986435066U, + 4590185751760970393U, 4607155938267770208U, + 13830527975122546016U, 4590185751760970393U, + 4607160003989618959U, 4589744810590291021U, + 13813116847445066829U, 4607160003989618959U, + 4604079374282302598U, 4604977468824438271U, + 13828349505679214079U, 4604079374282302598U, + 4606233055365547081U, 4601721693286060937U, + 13825093730140836745U, 4606233055365547081U, + 4599374859150636784U, 4606719100629313491U, + 13830091137484089299U, 4599374859150636784U, + 4606863472012527185U, 4598423001813699022U, + 13821795038668474830U, 4606863472012527185U, + 4602598930031891166U, 4605998608960791335U, + 13829370645815567143U, 4602598930031891166U, + 4605292980606880364U, 4603694922063032361U, + 13827066958917808169U, 4605292980606880364U, + 4593688012422887515U, 4607111255739239816U, + 13830483292594015624U, 4593688012422887515U, + 4607054494135176056U, 4595218635031890910U, + 13818590671886666718U, 4607054494135176056U, + 4603384207141321914U, 4605523422498301790U, + 13828895459353077598U, 4603384207141321914U, + 4605799732098147061U, 4602970680601913687U, + 13826342717456689495U, 4605799732098147061U, + 4597169786279785693U, 4606957467106717424U, + 13830329503961493232U, 4597169786279785693U, + 4606588777269136769U, 4600103317933788342U, + 13823475354788564150U, 4606588777269136769U, + 4601022290077223616U, 4606398451906509788U, + 13829770488761285596U, 4601022290077223616U, + 4604717681185626434U, 4604366005771528720U, + 13827738042626304528U, 4604717681185626434U, + 4583614727651146525U, 4607178985458280057U, + 13830551022313055865U, 4583614727651146525U, + 4607172882816799076U, 4586790578280679046U, + 13810162615135454854U, 4607172882816799076U, + 4604244531615310815U, 4604830524903495634U, + 13828202561758271442U, 4604244531615310815U, + 4606329407841126011U, 4601323770373937522U, + 13824695807228713330U, 4606329407841126011U, + 4599792496117920694U, 4606646545123403481U, + 13830018581978179289U, 4599792496117920694U, + 4606919157647773535U, 4597815040470278984U, + 13821187077325054792U, 4606919157647773535U, + 4602829525820289164U, 4605886709123365959U, + 13829258745978141767U, 4602829525820289164U, + 4605426297151190466U, 4603518581031047189U, + 13826890617885822997U, 4605426297151190466U, + 4594563856311064231U, 4607080832832247697U, + 13830452869687023505U, 4594563856311064231U, + 4607088942243446236U, 4594345179472540681U, + 13817717216327316489U, 4607088942243446236U, + 4603562972219549215U, 4605393374401988274U, + 13828765411256764082U, 4603562972219549215U, + 4605915122243179241U, 4602782121393764535U, + 13826154158248540343U, 4605915122243179241U, + 4598029484874872834U, 4606905728766014348U, + 13830277765620790156U, 4598029484874872834U, + 4606665164148251002U, 4599688422741010356U, + 13823060459595786164U, 4606665164148251002U, + 4601423692641949331U, 4606305777984577632U, + 13829677814839353440U, 4601423692641949331U, + 4604867640218014515U, 4604203581176243359U, + 13827575618031019167U, 4604867640218014515U, + 4587673791460508439U, 4607170170974224083U, + 13830542207828999891U, 4587673791460508439U, + 4607141713064252300U, 4591507261658050721U, + 13814879298512826529U, 4607141713064252300U, + 4603910660507251362U, 4605120315324767624U, + 13828492352179543432U, 4603910660507251362U, + 4606131849150971908U, 4602114767134999006U, + 13825486803989774814U, 4606131849150971908U, + 4598953786765296928U, 4606786509620734768U, + 13830158546475510576U, 4598953786765296928U, + 4606802552898869248U, 4598848011564831930U, + 13822220048419607738U, 4606802552898869248U, + 4602212250118051877U, 4606105796280968177U, + 13829477833135743985U, 4602212250118051877U, + 4605155376589456981U, 4603867938232615808U, + 13827239975087391616U, 4605155376589456981U, + 4591947271803021404U, 4607136295912168606U, + 13830508332766944414U, 4591947271803021404U, + 4607014697483910382U, 4596088445927168004U, + 13819460482781943812U, 4607014697483910382U, + 4603202304363743346U, 4605649044311923410U, + 13829021081166699218U, 4603202304363743346U, + 4605679749231851918U, 4603156351203636159U, + 13826528388058411967U, 4605679749231851918U, + 4596305267720071930U, 4607003915349878877U, + 13830375952204654685U, 4596305267720071930U, + 4606507322377452870U, 4600514338912178239U, + 13823886375766954047U, 4606507322377452870U, + 4600616459743653188U, 4606486172460753999U, + 13829858209315529807U, 4600616459743653188U, + 4604563781218984604U, 4604524701268679793U, + 13827896738123455601U, 4604563781218984604U, + 4569220649180767418U, 4607182376410422530U, + 13830554413265198338U, 4569220649180767418U +}; + +const fpr fpr_p2_tab[] = { + 4611686018427387904U, + 4607182418800017408U, + 4602678819172646912U, + 4598175219545276416U, + 4593671619917905920U, + 4589168020290535424U, + 4584664420663164928U, + 4580160821035794432U, + 4575657221408423936U, + 4571153621781053440U, + 4566650022153682944U +}; diff --git a/crypto_sign/falcon/falcon-512/clean/fpr.h b/crypto_sign/falcon/falcon-512/clean/fpr.h new file mode 100644 index 00000000..f88595e2 --- /dev/null +++ b/crypto_sign/falcon/falcon-512/clean/fpr.h @@ -0,0 +1,473 @@ +#ifndef PQCLEAN_FALCON512_CLEAN_FPR_H +#define PQCLEAN_FALCON512_CLEAN_FPR_H + +/* + * Floating-point operations. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +/* ====================================================================== */ +/* + * Custom floating-point implementation with integer arithmetics. We + * use IEEE-754 "binary64" format, with some simplifications: + * + * - Top bit is s = 1 for negative, 0 for positive. + * + * - Exponent e uses the next 11 bits (bits 52 to 62, inclusive). + * + * - Mantissa m uses the 52 low bits. + * + * Encoded value is, in general: (-1)^s * 2^(e-1023) * (1 + m*2^(-52)) + * i.e. the mantissa really is a 53-bit number (less than 2.0, but not + * less than 1.0), but the top bit (equal to 1 by definition) is omitted + * in the encoding. + * + * In IEEE-754, there are some special values: + * + * - If e = 2047, then the value is either an infinite (m = 0) or + * a NaN (m != 0). + * + * - If e = 0, then the value is either a zero (m = 0) or a subnormal, + * aka "denormalized number" (m != 0). + * + * Of these, we only need the zeros. The caller is responsible for not + * providing operands that would lead to infinites, NaNs or subnormals. + * If inputs are such that values go out of range, then indeterminate + * values are returned (it would still be deterministic, but no specific + * value may be relied upon). + * + * At the C level, the three parts are stored in a 64-bit unsigned + * word. + * + * One may note that a property of the IEEE-754 format is that order + * is preserved for positive values: if two positive floating-point + * values x and y are such that x < y, then their respective encodings + * as _signed_ 64-bit integers i64(x) and i64(y) will be such that + * i64(x) < i64(y). For negative values, order is reversed: if x < 0, + * y < 0, and x < y, then ia64(x) > ia64(y). + * + * IMPORTANT ASSUMPTIONS: + * ====================== + * + * For proper computations, and constant-time behaviour, we assume the + * following: + * + * - 32x32->64 multiplication (unsigned) has an execution time that + * is independent of its operands. This is true of most modern + * x86 and ARM cores. Notable exceptions are the ARM Cortex M0, M0+ + * and M3 (in the M0 and M0+, this is done in software, so it depends + * on that routine), and the PowerPC cores from the G3/G4 lines. + * For more info, see: https://www.bearssl.org/ctmul.html + * + * - Left-shifts and right-shifts of 32-bit values have an execution + * time which does not depend on the shifted value nor on the + * shift count. An historical exception is the Pentium IV, but most + * modern CPU have barrel shifters. Some small microcontrollers + * might have varying-time shifts (not the ARM Cortex M*, though). + * + * - Right-shift of a signed negative value performs a sign extension. + * As per the C standard, this operation returns an + * implementation-defined result (this is NOT an "undefined + * behaviour"). On most/all systems, an arithmetic shift is + * performed, because this is what makes most sense. + */ + +/* + * Normally we should declare the 'fpr' type to be a struct or union + * around the internal 64-bit value; however, we want to use the + * direct 64-bit integer type to enable a lighter call convention on + * ARM platforms. This means that direct (invalid) use of operators + * such as '*' or '+' will not be caught by the compiler. We rely on + * the "normal" (non-emulated) code to detect such instances. + */ +typedef uint64_t fpr; + +/* + * For computations, we split values into an integral mantissa in the + * 2^54..2^55 range, and an (adjusted) exponent. The lowest bit is + * "sticky" (it is set to 1 if any of the bits below it is 1); when + * re-encoding, the low two bits are dropped, but may induce an + * increment in the value for proper rounding. + */ + +/* + * Right-shift a 64-bit unsigned value by a possibly secret shift count. + * We assumed that the underlying architecture had a barrel shifter for + * 32-bit shifts, but for 64-bit shifts on a 32-bit system, this will + * typically invoke a software routine that is not necessarily + * constant-time; hence the function below. + * + * Shift count n MUST be in the 0..63 range. + */ +static inline uint64_t +fpr_ursh(uint64_t x, int n) { + x ^= (x ^ (x >> 32)) & -(uint64_t)(n >> 5); + return x >> (n & 31); +} + +/* + * Right-shift a 64-bit signed value by a possibly secret shift count + * (see fpr_ursh() for the rationale). + * + * Shift count n MUST be in the 0..63 range. + */ +static inline int64_t +fpr_irsh(int64_t x, int n) { + x ^= (x ^ (x >> 32)) & -(int64_t)(n >> 5); + return x >> (n & 31); +} + +/* + * Left-shift a 64-bit unsigned value by a possibly secret shift count + * (see fpr_ursh() for the rationale). + * + * Shift count n MUST be in the 0..63 range. + */ +static inline uint64_t +fpr_ulsh(uint64_t x, int n) { + x ^= (x ^ (x << 32)) & -(uint64_t)(n >> 5); + return x << (n & 31); +} + +/* + * Expectations: + * s = 0 or 1 + * exponent e is "arbitrary" and unbiased + * 2^54 <= m < 2^55 + * Numerical value is (-1)^2 * m * 2^e + * + * Exponents which are too low lead to value zero. If the exponent is + * too large, the returned value is indeterminate. + * + * If m = 0, then a zero is returned (using the provided sign). + * If e < -1076, then a zero is returned (regardless of the value of m). + * If e >= -1076 and e != 0, m must be within the expected range + * (2^54 to 2^55-1). + */ +static inline fpr +FPR(int s, int e, uint64_t m) { + fpr x; + uint32_t t; + unsigned f; + + /* + * If e >= -1076, then the value is "normal"; otherwise, it + * should be a subnormal, which we clamp down to zero. + */ + e += 1076; + t = (uint32_t)e >> 31; + m &= (uint64_t)t - 1; + + /* + * If m = 0 then we want a zero; make e = 0 too, but conserve + * the sign. + */ + t = (uint32_t)(m >> 54); + e &= -(int)t; + + /* + * The 52 mantissa bits come from m. Value m has its top bit set + * (unless it is a zero); we leave it "as is": the top bit will + * increment the exponent by 1, except when m = 0, which is + * exactly what we want. + */ + x = (((uint64_t)s << 63) | (m >> 2)) + ((uint64_t)(uint32_t)e << 52); + + /* + * Rounding: if the low three bits of m are 011, 110 or 111, + * then the value should be incremented to get the next + * representable value. This implements the usual + * round-to-nearest rule (with preference to even values in case + * of a tie). Note that the increment may make a carry spill + * into the exponent field, which is again exactly what we want + * in that case. + */ + f = (unsigned)m & 7U; + x += (0xC8U >> f) & 1; + return x; +} + +#define fpr_scaled PQCLEAN_FALCON512_CLEAN_fpr_scaled +fpr fpr_scaled(int64_t i, int sc); + +static inline fpr +fpr_of(int64_t i) { + return fpr_scaled(i, 0); +} + +static const fpr fpr_q = 4667981563525332992; +static const fpr fpr_inverse_of_q = 4545632735260551042; +static const fpr fpr_inv_2sqrsigma0 = 4594603506513722306; +static const fpr fpr_inv_sigma = 4573359825155195350; +static const fpr fpr_sigma_min_9 = 4608495221497168882; +static const fpr fpr_sigma_min_10 = 4608586345619182117; +static const fpr fpr_log2 = 4604418534313441775; +static const fpr fpr_inv_log2 = 4609176140021203710; +static const fpr fpr_bnorm_max = 4670353323383631276; +static const fpr fpr_zero = 0; +static const fpr fpr_one = 4607182418800017408; +static const fpr fpr_two = 4611686018427387904; +static const fpr fpr_onehalf = 4602678819172646912; +static const fpr fpr_invsqrt2 = 4604544271217802189; +static const fpr fpr_invsqrt8 = 4600040671590431693; +static const fpr fpr_ptwo31 = 4746794007248502784; +static const fpr fpr_ptwo31m1 = 4746794007244308480; +static const fpr fpr_mtwo31m1 = 13970166044099084288U; +static const fpr fpr_ptwo63m1 = 4890909195324358656; +static const fpr fpr_mtwo63m1 = 14114281232179134464U; +static const fpr fpr_ptwo63 = 4890909195324358656; + +static inline int64_t +fpr_rint(fpr x) { + uint64_t m, d; + int e; + uint32_t s, dd, f; + + /* + * We assume that the value fits in -(2^63-1)..+(2^63-1). We can + * thus extract the mantissa as a 63-bit integer, then right-shift + * it as needed. + */ + m = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1); + e = 1085 - ((int)(x >> 52) & 0x7FF); + + /* + * If a shift of more than 63 bits is needed, then simply set m + * to zero. This also covers the case of an input operand equal + * to zero. + */ + m &= -(uint64_t)((uint32_t)(e - 64) >> 31); + e &= 63; + + /* + * Right-shift m as needed. Shift count is e. Proper rounding + * mandates that: + * - If the highest dropped bit is zero, then round low. + * - If the highest dropped bit is one, and at least one of the + * other dropped bits is one, then round up. + * - If the highest dropped bit is one, and all other dropped + * bits are zero, then round up if the lowest kept bit is 1, + * or low otherwise (i.e. ties are broken by "rounding to even"). + * + * We thus first extract a word consisting of all the dropped bit + * AND the lowest kept bit; then we shrink it down to three bits, + * the lowest being "sticky". + */ + d = fpr_ulsh(m, 63 - e); + dd = (uint32_t)d | ((uint32_t)(d >> 32) & 0x1FFFFFFF); + f = (uint32_t)(d >> 61) | ((dd | -dd) >> 31); + m = fpr_ursh(m, e) + (uint64_t)((0xC8U >> f) & 1U); + + /* + * Apply the sign bit. + */ + s = (uint32_t)(x >> 63); + return ((int64_t)m ^ -(int64_t)s) + (int64_t)s; +} + +static inline int64_t +fpr_floor(fpr x) { + uint64_t t; + int64_t xi; + int e, cc; + + /* + * We extract the integer as a _signed_ 64-bit integer with + * a scaling factor. Since we assume that the value fits + * in the -(2^63-1)..+(2^63-1) range, we can left-shift the + * absolute value to make it in the 2^62..2^63-1 range: we + * will only need a right-shift afterwards. + */ + e = (int)(x >> 52) & 0x7FF; + t = x >> 63; + xi = (int64_t)(((x << 10) | ((uint64_t)1 << 62)) + & (((uint64_t)1 << 63) - 1)); + xi = (xi ^ -(int64_t)t) + (int64_t)t; + cc = 1085 - e; + + /* + * We perform an arithmetic right-shift on the value. This + * applies floor() semantics on both positive and negative values + * (rounding toward minus infinity). + */ + xi = fpr_irsh(xi, cc & 63); + + /* + * If the true shift count was 64 or more, then we should instead + * replace xi with 0 (if nonnegative) or -1 (if negative). Edge + * case: -0 will be floored to -1, not 0 (whether this is correct + * is debatable; in any case, the other functions normalize zero + * to +0). + * + * For an input of zero, the non-shifted xi was incorrect (we used + * a top implicit bit of value 1, not 0), but this does not matter + * since this operation will clamp it down. + */ + xi ^= (xi ^ -(int64_t)t) & -(int64_t)((uint32_t)(63 - cc) >> 31); + return xi; +} + +static inline int64_t +fpr_trunc(fpr x) { + uint64_t t, xu; + int e, cc; + + /* + * Extract the absolute value. Since we assume that the value + * fits in the -(2^63-1)..+(2^63-1) range, we can left-shift + * the absolute value into the 2^62..2^63-1 range, and then + * do a right shift afterwards. + */ + e = (int)(x >> 52) & 0x7FF; + xu = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1); + cc = 1085 - e; + xu = fpr_ursh(xu, cc & 63); + + /* + * If the exponent is too low (cc > 63), then the shift was wrong + * and we must clamp the value to 0. This also covers the case + * of an input equal to zero. + */ + xu &= -(uint64_t)((uint32_t)(cc - 64) >> 31); + + /* + * Apply back the sign, if the source value is negative. + */ + t = x >> 63; + xu = (xu ^ -t) + t; + return *(int64_t *)&xu; +} + +#define fpr_add PQCLEAN_FALCON512_CLEAN_fpr_add +fpr fpr_add(fpr x, fpr y); + +static inline fpr +fpr_sub(fpr x, fpr y) { + y ^= (uint64_t)1 << 63; + return fpr_add(x, y); +} + +static inline fpr +fpr_neg(fpr x) { + x ^= (uint64_t)1 << 63; + return x; +} + +static inline fpr +fpr_half(fpr x) { + /* + * To divide a value by 2, we just have to subtract 1 from its + * exponent, but we have to take care of zero. + */ + uint32_t t; + + x -= (uint64_t)1 << 52; + t = (((uint32_t)(x >> 52) & 0x7FF) + 1) >> 11; + x &= (uint64_t)t - 1; + return x; +} + +static inline fpr +fpr_double(fpr x) { + /* + * To double a value, we just increment by one the exponent. We + * don't care about infinites or NaNs; however, 0 is a + * special case. + */ + x += (uint64_t)((((unsigned)(x >> 52) & 0x7FFU) + 0x7FFU) >> 11) << 52; + return x; +} + +#define fpr_mul PQCLEAN_FALCON512_CLEAN_fpr_mul +fpr fpr_mul(fpr x, fpr y); + +static inline fpr +fpr_sqr(fpr x) { + return fpr_mul(x, x); +} + +#define fpr_div PQCLEAN_FALCON512_CLEAN_fpr_div +fpr fpr_div(fpr x, fpr y); + +static inline fpr +fpr_inv(fpr x) { + return fpr_div(4607182418800017408u, x); +} + +#define fpr_sqrt PQCLEAN_FALCON512_CLEAN_fpr_sqrt +fpr fpr_sqrt(fpr x); + +static inline int +fpr_lt(fpr x, fpr y) { + /* + * If both x and y are positive, then a signed comparison yields + * the proper result: + * - For positive values, the order is preserved. + * - The sign bit is at the same place as in integers, so + * sign is preserved. + * Moreover, we can compute [x < y] as sgn(x-y) and the computation + * of x-y will not overflow. + * + * If the signs differ, then sgn(x) gives the proper result. + * + * If both x and y are negative, then the order is reversed. + * Hence [x < y] = sgn(y-x). We must compute this separately from + * sgn(x-y); simply inverting sgn(x-y) would not handle the edge + * case x = y properly. + */ + int cc0, cc1; + int64_t sx; + int64_t sy; + + sx = *(int64_t *)&x; + sy = *(int64_t *)&y; + sy &= ~((sx ^ sy) >> 63); /* set sy=0 if signs differ */ + + cc0 = (int)((sx - sy) >> 63) & 1; /* Neither subtraction overflows when */ + cc1 = (int)((sy - sx) >> 63) & 1; /* the signs are the same. */ + + return cc0 ^ ((cc0 ^ cc1) & (int)((x & y) >> 63)); +} + +/* + * Compute exp(x) for x such that |x| <= ln 2. We want a precision of 50 + * bits or so. + */ +#define fpr_expm_p63 PQCLEAN_FALCON512_CLEAN_fpr_expm_p63 +uint64_t fpr_expm_p63(fpr x, fpr ccs); + +#define fpr_gm_tab PQCLEAN_FALCON512_CLEAN_fpr_gm_tab +extern const fpr fpr_gm_tab[]; + +#define fpr_p2_tab PQCLEAN_FALCON512_CLEAN_fpr_p2_tab +extern const fpr fpr_p2_tab[]; + +/* ====================================================================== */ +#endif diff --git a/crypto_sign/falcon/falcon-512/clean/inner.h b/crypto_sign/falcon/falcon-512/clean/inner.h new file mode 100644 index 00000000..b81197f1 --- /dev/null +++ b/crypto_sign/falcon/falcon-512/clean/inner.h @@ -0,0 +1,830 @@ +#ifndef PQCLEAN_FALCON512_CLEAN_INNER_H +#define PQCLEAN_FALCON512_CLEAN_INNER_H + + +/* + * Internal functions for Falcon. This is not the API intended to be + * used by applications; instead, this internal API provides all the + * primitives on which wrappers build to provide external APIs. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +/* + * IMPORTANT API RULES + * ------------------- + * + * This API has some non-trivial usage rules: + * + * + * - All public functions (i.e. the non-static ones) must be referenced + * with the PQCLEAN_FALCON512_CLEAN_ macro (e.g. PQCLEAN_FALCON512_CLEAN_verify_raw for the verify_raw() + * function). That macro adds a prefix to the name, which is + * configurable with the FALCON_PREFIX macro. This allows compiling + * the code into a specific "namespace" and potentially including + * several versions of this code into a single application (e.g. to + * have an AVX2 and a non-AVX2 variants and select the one to use at + * runtime based on availability of AVX2 opcodes). + * + * - Functions that need temporary buffers expects them as a final + * tmp[] array of type uint8_t*, with a size which is documented for + * each function. However, most have some alignment requirements, + * because they will use the array to store 16-bit, 32-bit or 64-bit + * values (e.g. uint64_t or double). The caller must ensure proper + * alignment. What happens on unaligned access depends on the + * underlying architecture, ranging from a slight time penalty + * to immediate termination of the process. + * + * - Some functions rely on specific rounding rules and precision for + * floating-point numbers. On some systems (in particular 32-bit x86 + * with the 387 FPU), this requires setting an hardware control + * word. The caller MUST use set_fpu_cw() to ensure proper precision: + * + * oldcw = set_fpu_cw(2); + * PQCLEAN_FALCON512_CLEAN_sign_dyn(...); + * set_fpu_cw(oldcw); + * + * On systems where the native floating-point precision is already + * proper, or integer-based emulation is used, the set_fpu_cw() + * function does nothing, so it can be called systematically. + */ +#include "fips202.h" +#include "fpr.h" +#include +#include +#include + + + + + +/* + * Some computations with floating-point elements, in particular + * rounding to the nearest integer, rely on operations using _exactly_ + * the precision of IEEE-754 binary64 type (i.e. 52 bits). On 32-bit + * x86, the 387 FPU may be used (depending on the target OS) and, in + * that case, may use more precision bits (i.e. 64 bits, for an 80-bit + * total type length); to prevent miscomputations, we define an explicit + * function that modifies the precision in the FPU control word. + * + * set_fpu_cw() sets the precision to the provided value, and returns + * the previously set precision; callers are supposed to restore the + * previous precision on exit. The correct (52-bit) precision is + * configured with the value "2". On unsupported compilers, or on + * targets other than 32-bit x86, or when the native 'double' type is + * not used, the set_fpu_cw() function does nothing at all. + */ +static inline unsigned +set_fpu_cw(unsigned x) { + return x; +} + + + + +/* ==================================================================== */ +/* + * SHAKE256 implementation (shake.c). + * + * API is defined to be easily replaced with the fips202.h API defined + * as part of PQClean. + */ + + + +#define inner_shake256_context shake256incctx +#define inner_shake256_init(sc) shake256_inc_init(sc) +#define inner_shake256_inject(sc, in, len) shake256_inc_absorb(sc, in, len) +#define inner_shake256_flip(sc) shake256_inc_finalize(sc) +#define inner_shake256_extract(sc, out, len) shake256_inc_squeeze(out, len, sc) +#define inner_shake256_ctx_release(sc) shake256_inc_ctx_release(sc) + + +/* ==================================================================== */ +/* + * Encoding/decoding functions (codec.c). + * + * Encoding functions take as parameters an output buffer (out) with + * a given maximum length (max_out_len); returned value is the actual + * number of bytes which have been written. If the output buffer is + * not large enough, then 0 is returned (some bytes may have been + * written to the buffer). If 'out' is NULL, then 'max_out_len' is + * ignored; instead, the function computes and returns the actual + * required output length (in bytes). + * + * Decoding functions take as parameters an input buffer (in) with + * its maximum length (max_in_len); returned value is the actual number + * of bytes that have been read from the buffer. If the provided length + * is too short, then 0 is returned. + * + * Values to encode or decode are vectors of integers, with N = 2^logn + * elements. + * + * Three encoding formats are defined: + * + * - modq: sequence of values modulo 12289, each encoded over exactly + * 14 bits. The encoder and decoder verify that integers are within + * the valid range (0..12288). Values are arrays of uint16. + * + * - trim: sequence of signed integers, a specified number of bits + * each. The number of bits is provided as parameter and includes + * the sign bit. Each integer x must be such that |x| < 2^(bits-1) + * (which means that the -2^(bits-1) value is forbidden); encode and + * decode functions check that property. Values are arrays of + * int16_t or int8_t, corresponding to names 'trim_i16' and + * 'trim_i8', respectively. + * + * - comp: variable-length encoding for signed integers; each integer + * uses a minimum of 9 bits, possibly more. This is normally used + * only for signatures. + * + */ + +size_t PQCLEAN_FALCON512_CLEAN_modq_encode(void *out, size_t max_out_len, + const uint16_t *x, unsigned logn); +size_t PQCLEAN_FALCON512_CLEAN_trim_i16_encode(void *out, size_t max_out_len, + const int16_t *x, unsigned logn, unsigned bits); +size_t PQCLEAN_FALCON512_CLEAN_trim_i8_encode(void *out, size_t max_out_len, + const int8_t *x, unsigned logn, unsigned bits); +size_t PQCLEAN_FALCON512_CLEAN_comp_encode(void *out, size_t max_out_len, + const int16_t *x, unsigned logn); + +size_t PQCLEAN_FALCON512_CLEAN_modq_decode(uint16_t *x, unsigned logn, + const void *in, size_t max_in_len); +size_t PQCLEAN_FALCON512_CLEAN_trim_i16_decode(int16_t *x, unsigned logn, unsigned bits, + const void *in, size_t max_in_len); +size_t PQCLEAN_FALCON512_CLEAN_trim_i8_decode(int8_t *x, unsigned logn, unsigned bits, + const void *in, size_t max_in_len); +size_t PQCLEAN_FALCON512_CLEAN_comp_decode(int16_t *x, unsigned logn, + const void *in, size_t max_in_len); + +/* + * Number of bits for key elements, indexed by logn (1 to 10). This + * is at most 8 bits for all degrees, but some degrees may have shorter + * elements. + */ +extern const uint8_t PQCLEAN_FALCON512_CLEAN_max_fg_bits[]; +extern const uint8_t PQCLEAN_FALCON512_CLEAN_max_FG_bits[]; + +/* + * Maximum size, in bits, of elements in a signature, indexed by logn + * (1 to 10). The size includes the sign bit. + */ +extern const uint8_t PQCLEAN_FALCON512_CLEAN_max_sig_bits[]; + +/* ==================================================================== */ +/* + * Support functions used for both signature generation and signature + * verification (common.c). + */ + +/* + * From a SHAKE256 context (must be already flipped), produce a new + * point. This is the non-constant-time version, which may leak enough + * information to serve as a stop condition on a brute force attack on + * the hashed message (provided that the nonce value is known). + */ +void PQCLEAN_FALCON512_CLEAN_hash_to_point_vartime(inner_shake256_context *sc, + uint16_t *x, unsigned logn); + +/* + * From a SHAKE256 context (must be already flipped), produce a new + * point. The temporary buffer (tmp) must have room for 2*2^logn bytes. + * This function is constant-time but is typically more expensive than + * PQCLEAN_FALCON512_CLEAN_hash_to_point_vartime(). + * + * tmp[] must have 16-bit alignment. + */ +void PQCLEAN_FALCON512_CLEAN_hash_to_point_ct(inner_shake256_context *sc, + uint16_t *x, unsigned logn, uint8_t *tmp); + +/* + * Tell whether a given vector (2N coordinates, in two halves) is + * acceptable as a signature. This compares the appropriate norm of the + * vector with the acceptance bound. Returned value is 1 on success + * (vector is short enough to be acceptable), 0 otherwise. + */ +int PQCLEAN_FALCON512_CLEAN_is_short(const int16_t *s1, const int16_t *s2, unsigned logn); + +/* + * Tell whether a given vector (2N coordinates, in two halves) is + * acceptable as a signature. Instead of the first half s1, this + * function receives the "saturated squared norm" of s1, i.e. the + * sum of the squares of the coordinates of s1 (saturated at 2^32-1 + * if the sum exceeds 2^31-1). + * + * Returned value is 1 on success (vector is short enough to be + * acceptable), 0 otherwise. + */ +int PQCLEAN_FALCON512_CLEAN_is_short_half(uint32_t sqn, const int16_t *s2, unsigned logn); + +/* ==================================================================== */ +/* + * Signature verification functions (vrfy.c). + */ + +/* + * Convert a public key to NTT + Montgomery format. Conversion is done + * in place. + */ +void PQCLEAN_FALCON512_CLEAN_to_ntt_monty(uint16_t *h, unsigned logn); + +/* + * Internal signature verification code: + * c0[] contains the hashed nonce+message + * s2[] is the decoded signature + * h[] contains the public key, in NTT + Montgomery format + * logn is the degree log + * tmp[] temporary, must have at least 2*2^logn bytes + * Returned value is 1 on success, 0 on error. + * + * tmp[] must have 16-bit alignment. + */ +int PQCLEAN_FALCON512_CLEAN_verify_raw(const uint16_t *c0, const int16_t *s2, + const uint16_t *h, unsigned logn, uint8_t *tmp); + +/* + * Compute the public key h[], given the private key elements f[] and + * g[]. This computes h = g/f mod phi mod q, where phi is the polynomial + * modulus. This function returns 1 on success, 0 on error (an error is + * reported if f is not invertible mod phi mod q). + * + * The tmp[] array must have room for at least 2*2^logn elements. + * tmp[] must have 16-bit alignment. + */ +int PQCLEAN_FALCON512_CLEAN_compute_public(uint16_t *h, + const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp); + +/* + * Recompute the fourth private key element. Private key consists in + * four polynomials with small coefficients f, g, F and G, which are + * such that fG - gF = q mod phi; furthermore, f is invertible modulo + * phi and modulo q. This function recomputes G from f, g and F. + * + * The tmp[] array must have room for at least 4*2^logn bytes. + * + * Returned value is 1 in success, 0 on error (f not invertible). + * tmp[] must have 16-bit alignment. + */ +int PQCLEAN_FALCON512_CLEAN_complete_private(int8_t *G, + const int8_t *f, const int8_t *g, const int8_t *F, + unsigned logn, uint8_t *tmp); + +/* + * Test whether a given polynomial is invertible modulo phi and q. + * Polynomial coefficients are small integers. + * + * tmp[] must have 16-bit alignment. + */ +int PQCLEAN_FALCON512_CLEAN_is_invertible( + const int16_t *s2, unsigned logn, uint8_t *tmp); + +/* + * Count the number of elements of value zero in the NTT representation + * of the given polynomial: this is the number of primitive 2n-th roots + * of unity (modulo q = 12289) that are roots of the provided polynomial + * (taken modulo q). + * + * tmp[] must have 16-bit alignment. + */ +int PQCLEAN_FALCON512_CLEAN_count_nttzero(const int16_t *sig, unsigned logn, uint8_t *tmp); + +/* + * Internal signature verification with public key recovery: + * h[] receives the public key (NOT in NTT/Montgomery format) + * c0[] contains the hashed nonce+message + * s1[] is the first signature half + * s2[] is the second signature half + * logn is the degree log + * tmp[] temporary, must have at least 2*2^logn bytes + * Returned value is 1 on success, 0 on error. Success is returned if + * the signature is a short enough vector; in that case, the public + * key has been written to h[]. However, the caller must still + * verify that h[] is the correct value (e.g. with regards to a known + * hash of the public key). + * + * h[] may not overlap with any of the other arrays. + * + * tmp[] must have 16-bit alignment. + */ +int PQCLEAN_FALCON512_CLEAN_verify_recover(uint16_t *h, + const uint16_t *c0, const int16_t *s1, const int16_t *s2, + unsigned logn, uint8_t *tmp); + +/* ==================================================================== */ +/* + * Implementation of floating-point real numbers (fpr.h, fpr.c). + */ + +/* + * Real numbers are implemented by an extra header file, included below. + * This is meant to support pluggable implementations. The default + * implementation relies on the C type 'double'. + * + * The included file must define the following types, functions and + * constants: + * + * fpr + * type for a real number + * + * fpr fpr_of(int64_t i) + * cast an integer into a real number; source must be in the + * -(2^63-1)..+(2^63-1) range + * + * fpr fpr_scaled(int64_t i, int sc) + * compute i*2^sc as a real number; source 'i' must be in the + * -(2^63-1)..+(2^63-1) range + * + * fpr fpr_ldexp(fpr x, int e) + * compute x*2^e + * + * int64_t fpr_rint(fpr x) + * round x to the nearest integer; x must be in the -(2^63-1) + * to +(2^63-1) range + * + * int64_t fpr_trunc(fpr x) + * round to an integer; this rounds towards zero; value must + * be in the -(2^63-1) to +(2^63-1) range + * + * fpr fpr_add(fpr x, fpr y) + * compute x + y + * + * fpr fpr_sub(fpr x, fpr y) + * compute x - y + * + * fpr fpr_neg(fpr x) + * compute -x + * + * fpr fpr_half(fpr x) + * compute x/2 + * + * fpr fpr_double(fpr x) + * compute x*2 + * + * fpr fpr_mul(fpr x, fpr y) + * compute x * y + * + * fpr fpr_sqr(fpr x) + * compute x * x + * + * fpr fpr_inv(fpr x) + * compute 1/x + * + * fpr fpr_div(fpr x, fpr y) + * compute x/y + * + * fpr fpr_sqrt(fpr x) + * compute the square root of x + * + * int fpr_lt(fpr x, fpr y) + * return 1 if x < y, 0 otherwise + * + * uint64_t fpr_expm_p63(fpr x) + * return exp(x), assuming that 0 <= x < log(2). Returned value + * is scaled to 63 bits (i.e. it really returns 2^63*exp(-x), + * rounded to the nearest integer). Computation should have a + * precision of at least 45 bits. + * + * const fpr fpr_gm_tab[] + * array of constants for FFT / iFFT + * + * const fpr fpr_p2_tab[] + * precomputed powers of 2 (by index, 0 to 10) + * + * Constants of type 'fpr': + * + * fpr fpr_q 12289 + * fpr fpr_inverse_of_q 1/12289 + * fpr fpr_inv_2sqrsigma0 1/(2*(1.8205^2)) + * fpr fpr_inv_sigma 1/(1.55*sqrt(12289)) + * fpr fpr_sigma_min_9 1.291500756233514568549480827642 + * fpr fpr_sigma_min_10 1.311734375905083682667395805765 + * fpr fpr_log2 log(2) + * fpr fpr_inv_log2 1/log(2) + * fpr fpr_bnorm_max 16822.4121 + * fpr fpr_zero 0 + * fpr fpr_one 1 + * fpr fpr_two 2 + * fpr fpr_onehalf 0.5 + * fpr fpr_ptwo31 2^31 + * fpr fpr_ptwo31m1 2^31-1 + * fpr fpr_mtwo31m1 -(2^31-1) + * fpr fpr_ptwo63m1 2^63-1 + * fpr fpr_mtwo63m1 -(2^63-1) + * fpr fpr_ptwo63 2^63 + */ + +/* ==================================================================== */ +/* + * RNG (rng.c). + * + * A PRNG based on ChaCha20 is implemented; it is seeded from a SHAKE256 + * context (flipped) and is used for bulk pseudorandom generation. + * A system-dependent seed generator is also provided. + */ + +/* + * Obtain a random seed from the system RNG. + * + * Returned value is 1 on success, 0 on error. + */ +int PQCLEAN_FALCON512_CLEAN_get_seed(void *seed, size_t seed_len); + +/* + * Structure for a PRNG. This includes a large buffer so that values + * get generated in advance. The 'state' is used to keep the current + * PRNG algorithm state (contents depend on the selected algorithm). + * + * The unions with 'dummy_u64' are there to ensure proper alignment for + * 64-bit direct access. + */ +typedef struct { + union { + uint8_t d[512]; /* MUST be 512, exactly */ + uint64_t dummy_u64; + } buf; + size_t ptr; + union { + uint8_t d[256]; + uint64_t dummy_u64; + } state; + int type; +} prng; + +/* + * Instantiate a PRNG. That PRNG will feed over the provided SHAKE256 + * context (in "flipped" state) to obtain its initial state. + */ +void PQCLEAN_FALCON512_CLEAN_prng_init(prng *p, inner_shake256_context *src); + +/* + * Refill the PRNG buffer. This is normally invoked automatically, and + * is declared here only so that prng_get_u64() may be inlined. + */ +void PQCLEAN_FALCON512_CLEAN_prng_refill(prng *p); + +/* + * Get some bytes from a PRNG. + */ +void PQCLEAN_FALCON512_CLEAN_prng_get_bytes(prng *p, void *dst, size_t len); + +/* + * Get a 64-bit random value from a PRNG. + */ +static inline uint64_t +prng_get_u64(prng *p) { + size_t u; + + /* + * If there are less than 9 bytes in the buffer, we refill it. + * This means that we may drop the last few bytes, but this allows + * for faster extraction code. Also, it means that we never leave + * an empty buffer. + */ + u = p->ptr; + if (u >= (sizeof p->buf.d) - 9) { + PQCLEAN_FALCON512_CLEAN_prng_refill(p); + u = 0; + } + p->ptr = u + 8; + + return (uint64_t)p->buf.d[u + 0] + | ((uint64_t)p->buf.d[u + 1] << 8) + | ((uint64_t)p->buf.d[u + 2] << 16) + | ((uint64_t)p->buf.d[u + 3] << 24) + | ((uint64_t)p->buf.d[u + 4] << 32) + | ((uint64_t)p->buf.d[u + 5] << 40) + | ((uint64_t)p->buf.d[u + 6] << 48) + | ((uint64_t)p->buf.d[u + 7] << 56); +} + +/* + * Get an 8-bit random value from a PRNG. + */ +static inline unsigned +prng_get_u8(prng *p) { + unsigned v; + + v = p->buf.d[p->ptr ++]; + if (p->ptr == sizeof p->buf.d) { + PQCLEAN_FALCON512_CLEAN_prng_refill(p); + } + return v; +} + +/* ==================================================================== */ +/* + * FFT (falcon-fft.c). + * + * A real polynomial is represented as an array of N 'fpr' elements. + * The FFT representation of a real polynomial contains N/2 complex + * elements; each is stored as two real numbers, for the real and + * imaginary parts, respectively. See falcon-fft.c for details on the + * internal representation. + */ + +/* + * Compute FFT in-place: the source array should contain a real + * polynomial (N coefficients); its storage area is reused to store + * the FFT representation of that polynomial (N/2 complex numbers). + * + * 'logn' MUST lie between 1 and 10 (inclusive). + */ +void PQCLEAN_FALCON512_CLEAN_FFT(fpr *f, unsigned logn); + +/* + * Compute the inverse FFT in-place: the source array should contain the + * FFT representation of a real polynomial (N/2 elements); the resulting + * real polynomial (N coefficients of type 'fpr') is written over the + * array. + * + * 'logn' MUST lie between 1 and 10 (inclusive). + */ +void PQCLEAN_FALCON512_CLEAN_iFFT(fpr *f, unsigned logn); + +/* + * Add polynomial b to polynomial a. a and b MUST NOT overlap. This + * function works in both normal and FFT representations. + */ +void PQCLEAN_FALCON512_CLEAN_poly_add(fpr *a, const fpr *b, unsigned logn); + +/* + * Subtract polynomial b from polynomial a. a and b MUST NOT overlap. This + * function works in both normal and FFT representations. + */ +void PQCLEAN_FALCON512_CLEAN_poly_sub(fpr *a, const fpr *b, unsigned logn); + +/* + * Negate polynomial a. This function works in both normal and FFT + * representations. + */ +void PQCLEAN_FALCON512_CLEAN_poly_neg(fpr *a, unsigned logn); + +/* + * Compute adjoint of polynomial a. This function works only in FFT + * representation. + */ +void PQCLEAN_FALCON512_CLEAN_poly_adj_fft(fpr *a, unsigned logn); + +/* + * Multiply polynomial a with polynomial b. a and b MUST NOT overlap. + * This function works only in FFT representation. + */ +void PQCLEAN_FALCON512_CLEAN_poly_mul_fft(fpr *a, const fpr *b, unsigned logn); + +/* + * Multiply polynomial a with the adjoint of polynomial b. a and b MUST NOT + * overlap. This function works only in FFT representation. + */ +void PQCLEAN_FALCON512_CLEAN_poly_muladj_fft(fpr *a, const fpr *b, unsigned logn); + +/* + * Multiply polynomial with its own adjoint. This function works only in FFT + * representation. + */ +void PQCLEAN_FALCON512_CLEAN_poly_mulselfadj_fft(fpr *a, unsigned logn); + +/* + * Multiply polynomial with a real constant. This function works in both + * normal and FFT representations. + */ +void PQCLEAN_FALCON512_CLEAN_poly_mulconst(fpr *a, fpr x, unsigned logn); + +/* + * Divide polynomial a by polynomial b, modulo X^N+1 (FFT representation). + * a and b MUST NOT overlap. + */ +void PQCLEAN_FALCON512_CLEAN_poly_div_fft(fpr *a, const fpr *b, unsigned logn); + +/* + * Given f and g (in FFT representation), compute 1/(f*adj(f)+g*adj(g)) + * (also in FFT representation). Since the result is auto-adjoint, all its + * coordinates in FFT representation are real; as such, only the first N/2 + * values of d[] are filled (the imaginary parts are skipped). + * + * Array d MUST NOT overlap with either a or b. + */ +void PQCLEAN_FALCON512_CLEAN_poly_invnorm2_fft(fpr *d, + const fpr *a, const fpr *b, unsigned logn); + +/* + * Given F, G, f and g (in FFT representation), compute F*adj(f)+G*adj(g) + * (also in FFT representation). Destination d MUST NOT overlap with + * any of the source arrays. + */ +void PQCLEAN_FALCON512_CLEAN_poly_add_muladj_fft(fpr *d, + const fpr *F, const fpr *G, + const fpr *f, const fpr *g, unsigned logn); + +/* + * Multiply polynomial a by polynomial b, where b is autoadjoint. Both + * a and b are in FFT representation. Since b is autoadjoint, all its + * FFT coefficients are real, and the array b contains only N/2 elements. + * a and b MUST NOT overlap. + */ +void PQCLEAN_FALCON512_CLEAN_poly_mul_autoadj_fft(fpr *a, + const fpr *b, unsigned logn); + +/* + * Divide polynomial a by polynomial b, where b is autoadjoint. Both + * a and b are in FFT representation. Since b is autoadjoint, all its + * FFT coefficients are real, and the array b contains only N/2 elements. + * a and b MUST NOT overlap. + */ +void PQCLEAN_FALCON512_CLEAN_poly_div_autoadj_fft(fpr *a, + const fpr *b, unsigned logn); + +/* + * Perform an LDL decomposition of an auto-adjoint matrix G, in FFT + * representation. On input, g00, g01 and g11 are provided (where the + * matrix G = [[g00, g01], [adj(g01), g11]]). On output, the d00, l10 + * and d11 values are written in g00, g01 and g11, respectively + * (with D = [[d00, 0], [0, d11]] and L = [[1, 0], [l10, 1]]). + * (In fact, d00 = g00, so the g00 operand is left unmodified.) + */ +void PQCLEAN_FALCON512_CLEAN_poly_LDL_fft(const fpr *g00, + fpr *g01, fpr *g11, unsigned logn); + +/* + * Perform an LDL decomposition of an auto-adjoint matrix G, in FFT + * representation. This is identical to poly_LDL_fft() except that + * g00, g01 and g11 are unmodified; the outputs d11 and l10 are written + * in two other separate buffers provided as extra parameters. + */ +void PQCLEAN_FALCON512_CLEAN_poly_LDLmv_fft(fpr *d11, fpr *l10, + const fpr *g00, const fpr *g01, + const fpr *g11, unsigned logn); + +/* + * Apply "split" operation on a polynomial in FFT representation: + * f = f0(x^2) + x*f1(x^2), for half-size polynomials f0 and f1 + * (polynomials modulo X^(N/2)+1). f0, f1 and f MUST NOT overlap. + */ +void PQCLEAN_FALCON512_CLEAN_poly_split_fft(fpr *f0, fpr *f1, + const fpr *f, unsigned logn); + +/* + * Apply "merge" operation on two polynomials in FFT representation: + * given f0 and f1, polynomials moduo X^(N/2)+1, this function computes + * f = f0(x^2) + x*f1(x^2), in FFT representation modulo X^N+1. + * f MUST NOT overlap with either f0 or f1. + */ +void PQCLEAN_FALCON512_CLEAN_poly_merge_fft(fpr *f, + const fpr *f0, const fpr *f1, unsigned logn); + +/* ==================================================================== */ +/* + * Key pair generation. + */ + +/* + * Required sizes of the temporary buffer (in bytes). + * + * This size is 28*2^logn bytes, except for degrees 2 and 4 (logn = 1 + * or 2) where it is slightly greater. + */ +#define FALCON_KEYGEN_TEMP_1 136 +#define FALCON_KEYGEN_TEMP_2 272 +#define FALCON_KEYGEN_TEMP_3 224 +#define FALCON_KEYGEN_TEMP_4 448 +#define FALCON_KEYGEN_TEMP_5 896 +#define FALCON_KEYGEN_TEMP_6 1792 +#define FALCON_KEYGEN_TEMP_7 3584 +#define FALCON_KEYGEN_TEMP_8 7168 +#define FALCON_KEYGEN_TEMP_9 14336 +#define FALCON_KEYGEN_TEMP_10 28672 + +/* + * Generate a new key pair. Randomness is extracted from the provided + * SHAKE256 context, which must have already been seeded and flipped. + * The tmp[] array must have suitable size (see FALCON_KEYGEN_TEMP_* + * macros) and be aligned for the uint32_t, uint64_t and fpr types. + * + * The private key elements are written in f, g, F and G, and the + * public key is written in h. Either or both of G and h may be NULL, + * in which case the corresponding element is not returned (they can + * be recomputed from f, g and F). + * + * tmp[] must have 64-bit alignment. + * This function uses floating-point rounding (see set_fpu_cw()). + */ +void PQCLEAN_FALCON512_CLEAN_keygen(inner_shake256_context *rng, + int8_t *f, int8_t *g, int8_t *F, int8_t *G, uint16_t *h, + unsigned logn, uint8_t *tmp); + +/* ==================================================================== */ +/* + * Signature generation. + */ + +/* + * Expand a private key into the B0 matrix in FFT representation and + * the LDL tree. All the values are written in 'expanded_key', for + * a total of (8*logn+40)*2^logn bytes. + * + * The tmp[] array must have room for at least 48*2^logn bytes. + * + * tmp[] must have 64-bit alignment. + * This function uses floating-point rounding (see set_fpu_cw()). + */ +void PQCLEAN_FALCON512_CLEAN_expand_privkey(fpr *expanded_key, + const int8_t *f, const int8_t *g, const int8_t *F, const int8_t *G, + unsigned logn, uint8_t *tmp); + +/* + * Compute a signature over the provided hashed message (hm); the + * signature value is one short vector. This function uses an + * expanded key (as generated by PQCLEAN_FALCON512_CLEAN_expand_privkey()). + * + * The sig[] and hm[] buffers may overlap. + * + * On successful output, the start of the tmp[] buffer contains the s1 + * vector (as int16_t elements). + * + * The minimal size (in bytes) of tmp[] is 48*2^logn bytes. + * + * tmp[] must have 64-bit alignment. + * This function uses floating-point rounding (see set_fpu_cw()). + */ +void PQCLEAN_FALCON512_CLEAN_sign_tree(int16_t *sig, inner_shake256_context *rng, + const fpr *expanded_key, + const uint16_t *hm, unsigned logn, uint8_t *tmp); + +/* + * Compute a signature over the provided hashed message (hm); the + * signature value is one short vector. This function uses a raw + * key and dynamically recompute the B0 matrix and LDL tree; this + * saves RAM since there is no needed for an expanded key, but + * increases the signature cost. + * + * The sig[] and hm[] buffers may overlap. + * + * On successful output, the start of the tmp[] buffer contains the s1 + * vector (as int16_t elements). + * + * The minimal size (in bytes) of tmp[] is 72*2^logn bytes. + * + * tmp[] must have 64-bit alignment. + * This function uses floating-point rounding (see set_fpu_cw()). + */ +void PQCLEAN_FALCON512_CLEAN_sign_dyn(int16_t *sig, inner_shake256_context *rng, + const int8_t *f, const int8_t *g, + const int8_t *F, const int8_t *G, + const uint16_t *hm, unsigned logn, uint8_t *tmp); + +/* + * Internal sampler engine. Exported for tests. + * + * sampler_context wraps around a source of random numbers (PRNG) and + * the sigma_min value (nominally dependent on the degree). + * + * sampler() takes as parameters: + * ctx pointer to the sampler_context structure + * mu center for the distribution + * isigma inverse of the distribution standard deviation + * It returns an integer sampled along the Gaussian distribution centered + * on mu and of standard deviation sigma = 1/isigma. + * + * gaussian0_sampler() takes as parameter a pointer to a PRNG, and + * returns an integer sampled along a half-Gaussian with standard + * deviation sigma0 = 1.8205 (center is 0, returned value is + * nonnegative). + */ + +typedef struct { + prng p; + fpr sigma_min; +} sampler_context; + +int PQCLEAN_FALCON512_CLEAN_sampler(void *ctx, fpr mu, fpr isigma); + +int PQCLEAN_FALCON512_CLEAN_gaussian0_sampler(prng *p); + +/* ==================================================================== */ + +#endif diff --git a/crypto_sign/falcon/falcon-512/clean/keygen.c b/crypto_sign/falcon/falcon-512/clean/keygen.c new file mode 100644 index 00000000..f72ecd99 --- /dev/null +++ b/crypto_sign/falcon/falcon-512/clean/keygen.c @@ -0,0 +1,4231 @@ +#include "inner.h" + +/* + * Falcon key pair generation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +#define MKN(logn) ((size_t)1 << (logn)) + +/* ==================================================================== */ +/* + * Modular arithmetics. + * + * We implement a few functions for computing modulo a small integer p. + * + * All functions require that 2^30 < p < 2^31. Moreover, operands must + * be in the 0..p-1 range. + * + * Modular addition and subtraction work for all such p. + * + * Montgomery multiplication requires that p is odd, and must be provided + * with an additional value p0i = -1/p mod 2^31. See below for some basics + * on Montgomery multiplication. + * + * Division computes an inverse modulo p by an exponentiation (with + * exponent p-2): this works only if p is prime. Multiplication + * requirements also apply, i.e. p must be odd and p0i must be provided. + * + * The NTT and inverse NTT need all of the above, and also that + * p = 1 mod 2048. + * + * ----------------------------------------------------------------------- + * + * We use Montgomery representation with 31-bit values: + * + * Let R = 2^31 mod p. When 2^30 < p < 2^31, R = 2^31 - p. + * Montgomery representation of an integer x modulo p is x*R mod p. + * + * Montgomery multiplication computes (x*y)/R mod p for + * operands x and y. Therefore: + * + * - if operands are x*R and y*R (Montgomery representations of x and + * y), then Montgomery multiplication computes (x*R*y*R)/R = (x*y)*R + * mod p, which is the Montgomery representation of the product x*y; + * + * - if operands are x*R and y (or x and y*R), then Montgomery + * multiplication returns x*y mod p: mixed-representation + * multiplications yield results in normal representation. + * + * To convert to Montgomery representation, we multiply by R, which is done + * by Montgomery-multiplying by R^2. Stand-alone conversion back from + * Montgomery representation is Montgomery-multiplication by 1. + */ + +/* + * Precomputed small primes. Each element contains the following: + * + * p The prime itself. + * + * g A primitive root of phi = X^N+1 (in field Z_p). + * + * s The inverse of the product of all previous primes in the array, + * computed modulo p and in Montgomery representation. + * + * All primes are such that p = 1 mod 2048, and are lower than 2^31. They + * are listed in decreasing order. + */ + +typedef struct { + uint32_t p; + uint32_t g; + uint32_t s; +} small_prime; + +static const small_prime PRIMES[] = { + { 2147473409, 383167813, 10239 }, + { 2147389441, 211808905, 471403745 }, + { 2147387393, 37672282, 1329335065 }, + { 2147377153, 1977035326, 968223422 }, + { 2147358721, 1067163706, 132460015 }, + { 2147352577, 1606082042, 598693809 }, + { 2147346433, 2033915641, 1056257184 }, + { 2147338241, 1653770625, 421286710 }, + { 2147309569, 631200819, 1111201074 }, + { 2147297281, 2038364663, 1042003613 }, + { 2147295233, 1962540515, 19440033 }, + { 2147239937, 2100082663, 353296760 }, + { 2147235841, 1991153006, 1703918027 }, + { 2147217409, 516405114, 1258919613 }, + { 2147205121, 409347988, 1089726929 }, + { 2147196929, 927788991, 1946238668 }, + { 2147178497, 1136922411, 1347028164 }, + { 2147100673, 868626236, 701164723 }, + { 2147082241, 1897279176, 617820870 }, + { 2147074049, 1888819123, 158382189 }, + { 2147051521, 25006327, 522758543 }, + { 2147043329, 327546255, 37227845 }, + { 2147039233, 766324424, 1133356428 }, + { 2146988033, 1862817362, 73861329 }, + { 2146963457, 404622040, 653019435 }, + { 2146959361, 1936581214, 995143093 }, + { 2146938881, 1559770096, 634921513 }, + { 2146908161, 422623708, 1985060172 }, + { 2146885633, 1751189170, 298238186 }, + { 2146871297, 578919515, 291810829 }, + { 2146846721, 1114060353, 915902322 }, + { 2146834433, 2069565474, 47859524 }, + { 2146818049, 1552824584, 646281055 }, + { 2146775041, 1906267847, 1597832891 }, + { 2146756609, 1847414714, 1228090888 }, + { 2146744321, 1818792070, 1176377637 }, + { 2146738177, 1118066398, 1054971214 }, + { 2146736129, 52057278, 933422153 }, + { 2146713601, 592259376, 1406621510 }, + { 2146695169, 263161877, 1514178701 }, + { 2146656257, 685363115, 384505091 }, + { 2146650113, 927727032, 537575289 }, + { 2146646017, 52575506, 1799464037 }, + { 2146643969, 1276803876, 1348954416 }, + { 2146603009, 814028633, 1521547704 }, + { 2146572289, 1846678872, 1310832121 }, + { 2146547713, 919368090, 1019041349 }, + { 2146508801, 671847612, 38582496 }, + { 2146492417, 283911680, 532424562 }, + { 2146490369, 1780044827, 896447978 }, + { 2146459649, 327980850, 1327906900 }, + { 2146447361, 1310561493, 958645253 }, + { 2146441217, 412148926, 287271128 }, + { 2146437121, 293186449, 2009822534 }, + { 2146430977, 179034356, 1359155584 }, + { 2146418689, 1517345488, 1790248672 }, + { 2146406401, 1615820390, 1584833571 }, + { 2146404353, 826651445, 607120498 }, + { 2146379777, 3816988, 1897049071 }, + { 2146363393, 1221409784, 1986921567 }, + { 2146355201, 1388081168, 849968120 }, + { 2146336769, 1803473237, 1655544036 }, + { 2146312193, 1023484977, 273671831 }, + { 2146293761, 1074591448, 467406983 }, + { 2146283521, 831604668, 1523950494 }, + { 2146203649, 712865423, 1170834574 }, + { 2146154497, 1764991362, 1064856763 }, + { 2146142209, 627386213, 1406840151 }, + { 2146127873, 1638674429, 2088393537 }, + { 2146099201, 1516001018, 690673370 }, + { 2146093057, 1294931393, 315136610 }, + { 2146091009, 1942399533, 973539425 }, + { 2146078721, 1843461814, 2132275436 }, + { 2146060289, 1098740778, 360423481 }, + { 2146048001, 1617213232, 1951981294 }, + { 2146041857, 1805783169, 2075683489 }, + { 2146019329, 272027909, 1753219918 }, + { 2145986561, 1206530344, 2034028118 }, + { 2145976321, 1243769360, 1173377644 }, + { 2145964033, 887200839, 1281344586 }, + { 2145906689, 1651026455, 906178216 }, + { 2145875969, 1673238256, 1043521212 }, + { 2145871873, 1226591210, 1399796492 }, + { 2145841153, 1465353397, 1324527802 }, + { 2145832961, 1150638905, 554084759 }, + { 2145816577, 221601706, 427340863 }, + { 2145785857, 608896761, 316590738 }, + { 2145755137, 1712054942, 1684294304 }, + { 2145742849, 1302302867, 724873116 }, + { 2145728513, 516717693, 431671476 }, + { 2145699841, 524575579, 1619722537 }, + { 2145691649, 1925625239, 982974435 }, + { 2145687553, 463795662, 1293154300 }, + { 2145673217, 771716636, 881778029 }, + { 2145630209, 1509556977, 837364988 }, + { 2145595393, 229091856, 851648427 }, + { 2145587201, 1796903241, 635342424 }, + { 2145525761, 715310882, 1677228081 }, + { 2145495041, 1040930522, 200685896 }, + { 2145466369, 949804237, 1809146322 }, + { 2145445889, 1673903706, 95316881 }, + { 2145390593, 806941852, 1428671135 }, + { 2145372161, 1402525292, 159350694 }, + { 2145361921, 2124760298, 1589134749 }, + { 2145359873, 1217503067, 1561543010 }, + { 2145355777, 338341402, 83865711 }, + { 2145343489, 1381532164, 641430002 }, + { 2145325057, 1883895478, 1528469895 }, + { 2145318913, 1335370424, 65809740 }, + { 2145312769, 2000008042, 1919775760 }, + { 2145300481, 961450962, 1229540578 }, + { 2145282049, 910466767, 1964062701 }, + { 2145232897, 816527501, 450152063 }, + { 2145218561, 1435128058, 1794509700 }, + { 2145187841, 33505311, 1272467582 }, + { 2145181697, 269767433, 1380363849 }, + { 2145175553, 56386299, 1316870546 }, + { 2145079297, 2106880293, 1391797340 }, + { 2145021953, 1347906152, 720510798 }, + { 2145015809, 206769262, 1651459955 }, + { 2145003521, 1885513236, 1393381284 }, + { 2144960513, 1810381315, 31937275 }, + { 2144944129, 1306487838, 2019419520 }, + { 2144935937, 37304730, 1841489054 }, + { 2144894977, 1601434616, 157985831 }, + { 2144888833, 98749330, 2128592228 }, + { 2144880641, 1772327002, 2076128344 }, + { 2144864257, 1404514762, 2029969964 }, + { 2144827393, 801236594, 406627220 }, + { 2144806913, 349217443, 1501080290 }, + { 2144796673, 1542656776, 2084736519 }, + { 2144778241, 1210734884, 1746416203 }, + { 2144759809, 1146598851, 716464489 }, + { 2144757761, 286328400, 1823728177 }, + { 2144729089, 1347555695, 1836644881 }, + { 2144727041, 1795703790, 520296412 }, + { 2144696321, 1302475157, 852964281 }, + { 2144667649, 1075877614, 504992927 }, + { 2144573441, 198765808, 1617144982 }, + { 2144555009, 321528767, 155821259 }, + { 2144550913, 814139516, 1819937644 }, + { 2144536577, 571143206, 962942255 }, + { 2144524289, 1746733766, 2471321 }, + { 2144512001, 1821415077, 124190939 }, + { 2144468993, 917871546, 1260072806 }, + { 2144458753, 378417981, 1569240563 }, + { 2144421889, 175229668, 1825620763 }, + { 2144409601, 1699216963, 351648117 }, + { 2144370689, 1071885991, 958186029 }, + { 2144348161, 1763151227, 540353574 }, + { 2144335873, 1060214804, 919598847 }, + { 2144329729, 663515846, 1448552668 }, + { 2144327681, 1057776305, 590222840 }, + { 2144309249, 1705149168, 1459294624 }, + { 2144296961, 325823721, 1649016934 }, + { 2144290817, 738775789, 447427206 }, + { 2144243713, 962347618, 893050215 }, + { 2144237569, 1655257077, 900860862 }, + { 2144161793, 242206694, 1567868672 }, + { 2144155649, 769415308, 1247993134 }, + { 2144137217, 320492023, 515841070 }, + { 2144120833, 1639388522, 770877302 }, + { 2144071681, 1761785233, 964296120 }, + { 2144065537, 419817825, 204564472 }, + { 2144028673, 666050597, 2091019760 }, + { 2144010241, 1413657615, 1518702610 }, + { 2143952897, 1238327946, 475672271 }, + { 2143940609, 307063413, 1176750846 }, + { 2143918081, 2062905559, 786785803 }, + { 2143899649, 1338112849, 1562292083 }, + { 2143891457, 68149545, 87166451 }, + { 2143885313, 921750778, 394460854 }, + { 2143854593, 719766593, 133877196 }, + { 2143836161, 1149399850, 1861591875 }, + { 2143762433, 1848739366, 1335934145 }, + { 2143756289, 1326674710, 102999236 }, + { 2143713281, 808061791, 1156900308 }, + { 2143690753, 388399459, 1926468019 }, + { 2143670273, 1427891374, 1756689401 }, + { 2143666177, 1912173949, 986629565 }, + { 2143645697, 2041160111, 371842865 }, + { 2143641601, 1279906897, 2023974350 }, + { 2143635457, 720473174, 1389027526 }, + { 2143621121, 1298309455, 1732632006 }, + { 2143598593, 1548762216, 1825417506 }, + { 2143567873, 620475784, 1073787233 }, + { 2143561729, 1932954575, 949167309 }, + { 2143553537, 354315656, 1652037534 }, + { 2143541249, 577424288, 1097027618 }, + { 2143531009, 357862822, 478640055 }, + { 2143522817, 2017706025, 1550531668 }, + { 2143506433, 2078127419, 1824320165 }, + { 2143488001, 613475285, 1604011510 }, + { 2143469569, 1466594987, 502095196 }, + { 2143426561, 1115430331, 1044637111 }, + { 2143383553, 9778045, 1902463734 }, + { 2143377409, 1557401276, 2056861771 }, + { 2143363073, 652036455, 1965915971 }, + { 2143260673, 1464581171, 1523257541 }, + { 2143246337, 1876119649, 764541916 }, + { 2143209473, 1614992673, 1920672844 }, + { 2143203329, 981052047, 2049774209 }, + { 2143160321, 1847355533, 728535665 }, + { 2143129601, 965558457, 603052992 }, + { 2143123457, 2140817191, 8348679 }, + { 2143100929, 1547263683, 694209023 }, + { 2143092737, 643459066, 1979934533 }, + { 2143082497, 188603778, 2026175670 }, + { 2143062017, 1657329695, 377451099 }, + { 2143051777, 114967950, 979255473 }, + { 2143025153, 1698431342, 1449196896 }, + { 2143006721, 1862741675, 1739650365 }, + { 2142996481, 756660457, 996160050 }, + { 2142976001, 927864010, 1166847574 }, + { 2142965761, 905070557, 661974566 }, + { 2142916609, 40932754, 1787161127 }, + { 2142892033, 1987985648, 675335382 }, + { 2142885889, 797497211, 1323096997 }, + { 2142871553, 2068025830, 1411877159 }, + { 2142861313, 1217177090, 1438410687 }, + { 2142830593, 409906375, 1767860634 }, + { 2142803969, 1197788993, 359782919 }, + { 2142785537, 643817365, 513932862 }, + { 2142779393, 1717046338, 218943121 }, + { 2142724097, 89336830, 416687049 }, + { 2142707713, 5944581, 1356813523 }, + { 2142658561, 887942135, 2074011722 }, + { 2142638081, 151851972, 1647339939 }, + { 2142564353, 1691505537, 1483107336 }, + { 2142533633, 1989920200, 1135938817 }, + { 2142529537, 959263126, 1531961857 }, + { 2142527489, 453251129, 1725566162 }, + { 2142502913, 1536028102, 182053257 }, + { 2142498817, 570138730, 701443447 }, + { 2142416897, 326965800, 411931819 }, + { 2142363649, 1675665410, 1517191733 }, + { 2142351361, 968529566, 1575712703 }, + { 2142330881, 1384953238, 1769087884 }, + { 2142314497, 1977173242, 1833745524 }, + { 2142289921, 95082313, 1714775493 }, + { 2142283777, 109377615, 1070584533 }, + { 2142277633, 16960510, 702157145 }, + { 2142263297, 553850819, 431364395 }, + { 2142208001, 241466367, 2053967982 }, + { 2142164993, 1795661326, 1031836848 }, + { 2142097409, 1212530046, 712772031 }, + { 2142087169, 1763869720, 822276067 }, + { 2142078977, 644065713, 1765268066 }, + { 2142074881, 112671944, 643204925 }, + { 2142044161, 1387785471, 1297890174 }, + { 2142025729, 783885537, 1000425730 }, + { 2142011393, 905662232, 1679401033 }, + { 2141974529, 799788433, 468119557 }, + { 2141943809, 1932544124, 449305555 }, + { 2141933569, 1527403256, 841867925 }, + { 2141931521, 1247076451, 743823916 }, + { 2141902849, 1199660531, 401687910 }, + { 2141890561, 150132350, 1720336972 }, + { 2141857793, 1287438162, 663880489 }, + { 2141833217, 618017731, 1819208266 }, + { 2141820929, 999578638, 1403090096 }, + { 2141786113, 81834325, 1523542501 }, + { 2141771777, 120001928, 463556492 }, + { 2141759489, 122455485, 2124928282 }, + { 2141749249, 141986041, 940339153 }, + { 2141685761, 889088734, 477141499 }, + { 2141673473, 324212681, 1122558298 }, + { 2141669377, 1175806187, 1373818177 }, + { 2141655041, 1113654822, 296887082 }, + { 2141587457, 991103258, 1585913875 }, + { 2141583361, 1401451409, 1802457360 }, + { 2141575169, 1571977166, 712760980 }, + { 2141546497, 1107849376, 1250270109 }, + { 2141515777, 196544219, 356001130 }, + { 2141495297, 1733571506, 1060744866 }, + { 2141483009, 321552363, 1168297026 }, + { 2141458433, 505818251, 733225819 }, + { 2141360129, 1026840098, 948342276 }, + { 2141325313, 945133744, 2129965998 }, + { 2141317121, 1871100260, 1843844634 }, + { 2141286401, 1790639498, 1750465696 }, + { 2141267969, 1376858592, 186160720 }, + { 2141255681, 2129698296, 1876677959 }, + { 2141243393, 2138900688, 1340009628 }, + { 2141214721, 1933049835, 1087819477 }, + { 2141212673, 1898664939, 1786328049 }, + { 2141202433, 990234828, 940682169 }, + { 2141175809, 1406392421, 993089586 }, + { 2141165569, 1263518371, 289019479 }, + { 2141073409, 1485624211, 507864514 }, + { 2141052929, 1885134788, 311252465 }, + { 2141040641, 1285021247, 280941862 }, + { 2141028353, 1527610374, 375035110 }, + { 2141011969, 1400626168, 164696620 }, + { 2140999681, 632959608, 966175067 }, + { 2140997633, 2045628978, 1290889438 }, + { 2140993537, 1412755491, 375366253 }, + { 2140942337, 719477232, 785367828 }, + { 2140925953, 45224252, 836552317 }, + { 2140917761, 1157376588, 1001839569 }, + { 2140887041, 278480752, 2098732796 }, + { 2140837889, 1663139953, 924094810 }, + { 2140788737, 802501511, 2045368990 }, + { 2140766209, 1820083885, 1800295504 }, + { 2140764161, 1169561905, 2106792035 }, + { 2140696577, 127781498, 1885987531 }, + { 2140684289, 16014477, 1098116827 }, + { 2140653569, 665960598, 1796728247 }, + { 2140594177, 1043085491, 377310938 }, + { 2140579841, 1732838211, 1504505945 }, + { 2140569601, 302071939, 358291016 }, + { 2140567553, 192393733, 1909137143 }, + { 2140557313, 406595731, 1175330270 }, + { 2140549121, 1748850918, 525007007 }, + { 2140477441, 499436566, 1031159814 }, + { 2140469249, 1886004401, 1029951320 }, + { 2140426241, 1483168100, 1676273461 }, + { 2140420097, 1779917297, 846024476 }, + { 2140413953, 522948893, 1816354149 }, + { 2140383233, 1931364473, 1296921241 }, + { 2140366849, 1917356555, 147196204 }, + { 2140354561, 16466177, 1349052107 }, + { 2140348417, 1875366972, 1860485634 }, + { 2140323841, 456498717, 1790256483 }, + { 2140321793, 1629493973, 150031888 }, + { 2140315649, 1904063898, 395510935 }, + { 2140280833, 1784104328, 831417909 }, + { 2140250113, 256087139, 697349101 }, + { 2140229633, 388553070, 243875754 }, + { 2140223489, 747459608, 1396270850 }, + { 2140200961, 507423743, 1895572209 }, + { 2140162049, 580106016, 2045297469 }, + { 2140149761, 712426444, 785217995 }, + { 2140137473, 1441607584, 536866543 }, + { 2140119041, 346538902, 1740434653 }, + { 2140090369, 282642885, 21051094 }, + { 2140076033, 1407456228, 319910029 }, + { 2140047361, 1619330500, 1488632070 }, + { 2140041217, 2089408064, 2012026134 }, + { 2140008449, 1705524800, 1613440760 }, + { 2139924481, 1846208233, 1280649481 }, + { 2139906049, 989438755, 1185646076 }, + { 2139867137, 1522314850, 372783595 }, + { 2139842561, 1681587377, 216848235 }, + { 2139826177, 2066284988, 1784999464 }, + { 2139824129, 480888214, 1513323027 }, + { 2139789313, 847937200, 858192859 }, + { 2139783169, 1642000434, 1583261448 }, + { 2139770881, 940699589, 179702100 }, + { 2139768833, 315623242, 964612676 }, + { 2139666433, 331649203, 764666914 }, + { 2139641857, 2118730799, 1313764644 }, + { 2139635713, 519149027, 519212449 }, + { 2139598849, 1526413634, 1769667104 }, + { 2139574273, 551148610, 820739925 }, + { 2139568129, 1386800242, 472447405 }, + { 2139549697, 813760130, 1412328531 }, + { 2139537409, 1615286260, 1609362979 }, + { 2139475969, 1352559299, 1696720421 }, + { 2139455489, 1048691649, 1584935400 }, + { 2139432961, 836025845, 950121150 }, + { 2139424769, 1558281165, 1635486858 }, + { 2139406337, 1728402143, 1674423301 }, + { 2139396097, 1727715782, 1483470544 }, + { 2139383809, 1092853491, 1741699084 }, + { 2139369473, 690776899, 1242798709 }, + { 2139351041, 1768782380, 2120712049 }, + { 2139334657, 1739968247, 1427249225 }, + { 2139332609, 1547189119, 623011170 }, + { 2139310081, 1346827917, 1605466350 }, + { 2139303937, 369317948, 828392831 }, + { 2139301889, 1560417239, 1788073219 }, + { 2139283457, 1303121623, 595079358 }, + { 2139248641, 1354555286, 573424177 }, + { 2139240449, 60974056, 885781403 }, + { 2139222017, 355573421, 1221054839 }, + { 2139215873, 566477826, 1724006500 }, + { 2139150337, 871437673, 1609133294 }, + { 2139144193, 1478130914, 1137491905 }, + { 2139117569, 1854880922, 964728507 }, + { 2139076609, 202405335, 756508944 }, + { 2139062273, 1399715741, 884826059 }, + { 2139045889, 1051045798, 1202295476 }, + { 2139033601, 1707715206, 632234634 }, + { 2139006977, 2035853139, 231626690 }, + { 2138951681, 183867876, 838350879 }, + { 2138945537, 1403254661, 404460202 }, + { 2138920961, 310865011, 1282911681 }, + { 2138910721, 1328496553, 103472415 }, + { 2138904577, 78831681, 993513549 }, + { 2138902529, 1319697451, 1055904361 }, + { 2138816513, 384338872, 1706202469 }, + { 2138810369, 1084868275, 405677177 }, + { 2138787841, 401181788, 1964773901 }, + { 2138775553, 1850532988, 1247087473 }, + { 2138767361, 874261901, 1576073565 }, + { 2138757121, 1187474742, 993541415 }, + { 2138748929, 1782458888, 1043206483 }, + { 2138744833, 1221500487, 800141243 }, + { 2138738689, 413465368, 1450660558 }, + { 2138695681, 739045140, 342611472 }, + { 2138658817, 1355845756, 672674190 }, + { 2138644481, 608379162, 1538874380 }, + { 2138632193, 1444914034, 686911254 }, + { 2138607617, 484707818, 1435142134 }, + { 2138591233, 539460669, 1290458549 }, + { 2138572801, 2093538990, 2011138646 }, + { 2138552321, 1149786988, 1076414907 }, + { 2138546177, 840688206, 2108985273 }, + { 2138533889, 209669619, 198172413 }, + { 2138523649, 1975879426, 1277003968 }, + { 2138490881, 1351891144, 1976858109 }, + { 2138460161, 1817321013, 1979278293 }, + { 2138429441, 1950077177, 203441928 }, + { 2138400769, 908970113, 628395069 }, + { 2138398721, 219890864, 758486760 }, + { 2138376193, 1306654379, 977554090 }, + { 2138351617, 298822498, 2004708503 }, + { 2138337281, 441457816, 1049002108 }, + { 2138320897, 1517731724, 1442269609 }, + { 2138290177, 1355911197, 1647139103 }, + { 2138234881, 531313247, 1746591962 }, + { 2138214401, 1899410930, 781416444 }, + { 2138202113, 1813477173, 1622508515 }, + { 2138191873, 1086458299, 1025408615 }, + { 2138183681, 1998800427, 827063290 }, + { 2138173441, 1921308898, 749670117 }, + { 2138103809, 1620902804, 2126787647 }, + { 2138099713, 828647069, 1892961817 }, + { 2138085377, 179405355, 1525506535 }, + { 2138060801, 615683235, 1259580138 }, + { 2138044417, 2030277840, 1731266562 }, + { 2138042369, 2087222316, 1627902259 }, + { 2138032129, 126388712, 1108640984 }, + { 2138011649, 715026550, 1017980050 }, + { 2137993217, 1693714349, 1351778704 }, + { 2137888769, 1289762259, 1053090405 }, + { 2137853953, 199991890, 1254192789 }, + { 2137833473, 941421685, 896995556 }, + { 2137817089, 750416446, 1251031181 }, + { 2137792513, 798075119, 368077456 }, + { 2137786369, 878543495, 1035375025 }, + { 2137767937, 9351178, 1156563902 }, + { 2137755649, 1382297614, 1686559583 }, + { 2137724929, 1345472850, 1681096331 }, + { 2137704449, 834666929, 630551727 }, + { 2137673729, 1646165729, 1892091571 }, + { 2137620481, 778943821, 48456461 }, + { 2137618433, 1730837875, 1713336725 }, + { 2137581569, 805610339, 1378891359 }, + { 2137538561, 204342388, 1950165220 }, + { 2137526273, 1947629754, 1500789441 }, + { 2137516033, 719902645, 1499525372 }, + { 2137491457, 230451261, 556382829 }, + { 2137440257, 979573541, 412760291 }, + { 2137374721, 927841248, 1954137185 }, + { 2137362433, 1243778559, 861024672 }, + { 2137313281, 1341338501, 980638386 }, + { 2137311233, 937415182, 1793212117 }, + { 2137255937, 795331324, 1410253405 }, + { 2137243649, 150756339, 1966999887 }, + { 2137182209, 163346914, 1939301431 }, + { 2137171969, 1952552395, 758913141 }, + { 2137159681, 570788721, 218668666 }, + { 2137147393, 1896656810, 2045670345 }, + { 2137141249, 358493842, 518199643 }, + { 2137139201, 1505023029, 674695848 }, + { 2137133057, 27911103, 830956306 }, + { 2137122817, 439771337, 1555268614 }, + { 2137116673, 790988579, 1871449599 }, + { 2137110529, 432109234, 811805080 }, + { 2137102337, 1357900653, 1184997641 }, + { 2137098241, 515119035, 1715693095 }, + { 2137090049, 408575203, 2085660657 }, + { 2137085953, 2097793407, 1349626963 }, + { 2137055233, 1556739954, 1449960883 }, + { 2137030657, 1545758650, 1369303716 }, + { 2136987649, 332602570, 103875114 }, + { 2136969217, 1499989506, 1662964115 }, + { 2136924161, 857040753, 4738842 }, + { 2136895489, 1948872712, 570436091 }, + { 2136893441, 58969960, 1568349634 }, + { 2136887297, 2127193379, 273612548 }, + { 2136850433, 111208983, 1181257116 }, + { 2136809473, 1627275942, 1680317971 }, + { 2136764417, 1574888217, 14011331 }, + { 2136741889, 14011055, 1129154251 }, + { 2136727553, 35862563, 1838555253 }, + { 2136721409, 310235666, 1363928244 }, + { 2136698881, 1612429202, 1560383828 }, + { 2136649729, 1138540131, 800014364 }, + { 2136606721, 602323503, 1433096652 }, + { 2136563713, 182209265, 1919611038 }, + { 2136555521, 324156477, 165591039 }, + { 2136549377, 195513113, 217165345 }, + { 2136526849, 1050768046, 939647887 }, + { 2136508417, 1886286237, 1619926572 }, + { 2136477697, 609647664, 35065157 }, + { 2136471553, 679352216, 1452259468 }, + { 2136457217, 128630031, 824816521 }, + { 2136422401, 19787464, 1526049830 }, + { 2136420353, 698316836, 1530623527 }, + { 2136371201, 1651862373, 1804812805 }, + { 2136334337, 326596005, 336977082 }, + { 2136322049, 63253370, 1904972151 }, + { 2136297473, 312176076, 172182411 }, + { 2136248321, 381261841, 369032670 }, + { 2136242177, 358688773, 1640007994 }, + { 2136229889, 512677188, 75585225 }, + { 2136219649, 2095003250, 1970086149 }, + { 2136207361, 1909650722, 537760675 }, + { 2136176641, 1334616195, 1533487619 }, + { 2136158209, 2096285632, 1793285210 }, + { 2136143873, 1897347517, 293843959 }, + { 2136133633, 923586222, 1022655978 }, + { 2136096769, 1464868191, 1515074410 }, + { 2136094721, 2020679520, 2061636104 }, + { 2136076289, 290798503, 1814726809 }, + { 2136041473, 156415894, 1250757633 }, + { 2135996417, 297459940, 1132158924 }, + { 2135955457, 538755304, 1688831340 }, + { 0, 0, 0 } +}; + +/* + * Reduce a small signed integer modulo a small prime. The source + * value x MUST be such that -p < x < p. + */ +static inline uint32_t +modp_set(int32_t x, uint32_t p) { + uint32_t w; + + w = (uint32_t)x; + w += p & -(w >> 31); + return w; +} + +/* + * Normalize a modular integer around 0. + */ +static inline int32_t +modp_norm(uint32_t x, uint32_t p) { + return (int32_t)(x - (p & (((x - ((p + 1) >> 1)) >> 31) - 1))); +} + +/* + * Compute -1/p mod 2^31. This works for all odd integers p that fit + * on 31 bits. + */ +static uint32_t +modp_ninv31(uint32_t p) { + uint32_t y; + + y = 2 - p; + y *= 2 - p * y; + y *= 2 - p * y; + y *= 2 - p * y; + y *= 2 - p * y; + return (uint32_t)0x7FFFFFFF & -y; +} + +/* + * Compute R = 2^31 mod p. + */ +static inline uint32_t +modp_R(uint32_t p) { + /* + * Since 2^30 < p < 2^31, we know that 2^31 mod p is simply + * 2^31 - p. + */ + return ((uint32_t)1 << 31) - p; +} + +/* + * Addition modulo p. + */ +static inline uint32_t +modp_add(uint32_t a, uint32_t b, uint32_t p) { + uint32_t d; + + d = a + b - p; + d += p & -(d >> 31); + return d; +} + +/* + * Subtraction modulo p. + */ +static inline uint32_t +modp_sub(uint32_t a, uint32_t b, uint32_t p) { + uint32_t d; + + d = a - b; + d += p & -(d >> 31); + return d; +} + +/* + * Halving modulo p. + */ +/* unused +static inline uint32_t +modp_half(uint32_t a, uint32_t p) +{ + a += p & -(a & 1); + return a >> 1; +} +*/ + +/* + * Montgomery multiplication modulo p. The 'p0i' value is -1/p mod 2^31. + * It is required that p is an odd integer. + */ +static inline uint32_t +modp_montymul(uint32_t a, uint32_t b, uint32_t p, uint32_t p0i) { + uint64_t z, w; + uint32_t d; + + z = (uint64_t)a * (uint64_t)b; + w = ((z * p0i) & (uint64_t)0x7FFFFFFF) * p; + d = (uint32_t)((z + w) >> 31) - p; + d += p & -(d >> 31); + return d; +} + +/* + * Compute R2 = 2^62 mod p. + */ +static uint32_t +modp_R2(uint32_t p, uint32_t p0i) { + uint32_t z; + + /* + * Compute z = 2^31 mod p (this is the value 1 in Montgomery + * representation), then double it with an addition. + */ + z = modp_R(p); + z = modp_add(z, z, p); + + /* + * Square it five times to obtain 2^32 in Montgomery representation + * (i.e. 2^63 mod p). + */ + z = modp_montymul(z, z, p, p0i); + z = modp_montymul(z, z, p, p0i); + z = modp_montymul(z, z, p, p0i); + z = modp_montymul(z, z, p, p0i); + z = modp_montymul(z, z, p, p0i); + + /* + * Halve the value mod p to get 2^62. + */ + z = (z + (p & -(z & 1))) >> 1; + return z; +} + +/* + * Compute 2^(31*x) modulo p. This works for integers x up to 2^11. + * p must be prime such that 2^30 < p < 2^31; p0i must be equal to + * -1/p mod 2^31; R2 must be equal to 2^62 mod p. + */ +static inline uint32_t +modp_Rx(unsigned x, uint32_t p, uint32_t p0i, uint32_t R2) { + int i; + uint32_t r, z; + + /* + * 2^(31*x) = (2^31)*(2^(31*(x-1))); i.e. we want the Montgomery + * representation of (2^31)^e mod p, where e = x-1. + * R2 is 2^31 in Montgomery representation. + */ + x --; + r = R2; + z = modp_R(p); + for (i = 0; (1U << i) <= x; i ++) { + if ((x & (1U << i)) != 0) { + z = modp_montymul(z, r, p, p0i); + } + r = modp_montymul(r, r, p, p0i); + } + return z; +} + +/* + * Division modulo p. If the divisor (b) is 0, then 0 is returned. + * This function computes proper results only when p is prime. + * Parameters: + * a dividend + * b divisor + * p odd prime modulus + * p0i -1/p mod 2^31 + * R 2^31 mod R + */ +static uint32_t +modp_div(uint32_t a, uint32_t b, uint32_t p, uint32_t p0i, uint32_t R) { + uint32_t z, e; + int i; + + e = p - 2; + z = R; + for (i = 30; i >= 0; i --) { + uint32_t z2; + + z = modp_montymul(z, z, p, p0i); + z2 = modp_montymul(z, b, p, p0i); + z ^= (z ^ z2) & -(uint32_t)((e >> i) & 1); + } + + /* + * The loop above just assumed that b was in Montgomery + * representation, i.e. really contained b*R; under that + * assumption, it returns 1/b in Montgomery representation, + * which is R/b. But we gave it b in normal representation, + * so the loop really returned R/(b/R) = R^2/b. + * + * We want a/b, so we need one Montgomery multiplication with a, + * which also remove one of the R factors, and another such + * multiplication to remove the second R factor. + */ + z = modp_montymul(z, 1, p, p0i); + return modp_montymul(a, z, p, p0i); +} + +/* + * Bit-reversal index table. + */ +static const uint16_t REV10[] = { + 0, 512, 256, 768, 128, 640, 384, 896, 64, 576, 320, 832, + 192, 704, 448, 960, 32, 544, 288, 800, 160, 672, 416, 928, + 96, 608, 352, 864, 224, 736, 480, 992, 16, 528, 272, 784, + 144, 656, 400, 912, 80, 592, 336, 848, 208, 720, 464, 976, + 48, 560, 304, 816, 176, 688, 432, 944, 112, 624, 368, 880, + 240, 752, 496, 1008, 8, 520, 264, 776, 136, 648, 392, 904, + 72, 584, 328, 840, 200, 712, 456, 968, 40, 552, 296, 808, + 168, 680, 424, 936, 104, 616, 360, 872, 232, 744, 488, 1000, + 24, 536, 280, 792, 152, 664, 408, 920, 88, 600, 344, 856, + 216, 728, 472, 984, 56, 568, 312, 824, 184, 696, 440, 952, + 120, 632, 376, 888, 248, 760, 504, 1016, 4, 516, 260, 772, + 132, 644, 388, 900, 68, 580, 324, 836, 196, 708, 452, 964, + 36, 548, 292, 804, 164, 676, 420, 932, 100, 612, 356, 868, + 228, 740, 484, 996, 20, 532, 276, 788, 148, 660, 404, 916, + 84, 596, 340, 852, 212, 724, 468, 980, 52, 564, 308, 820, + 180, 692, 436, 948, 116, 628, 372, 884, 244, 756, 500, 1012, + 12, 524, 268, 780, 140, 652, 396, 908, 76, 588, 332, 844, + 204, 716, 460, 972, 44, 556, 300, 812, 172, 684, 428, 940, + 108, 620, 364, 876, 236, 748, 492, 1004, 28, 540, 284, 796, + 156, 668, 412, 924, 92, 604, 348, 860, 220, 732, 476, 988, + 60, 572, 316, 828, 188, 700, 444, 956, 124, 636, 380, 892, + 252, 764, 508, 1020, 2, 514, 258, 770, 130, 642, 386, 898, + 66, 578, 322, 834, 194, 706, 450, 962, 34, 546, 290, 802, + 162, 674, 418, 930, 98, 610, 354, 866, 226, 738, 482, 994, + 18, 530, 274, 786, 146, 658, 402, 914, 82, 594, 338, 850, + 210, 722, 466, 978, 50, 562, 306, 818, 178, 690, 434, 946, + 114, 626, 370, 882, 242, 754, 498, 1010, 10, 522, 266, 778, + 138, 650, 394, 906, 74, 586, 330, 842, 202, 714, 458, 970, + 42, 554, 298, 810, 170, 682, 426, 938, 106, 618, 362, 874, + 234, 746, 490, 1002, 26, 538, 282, 794, 154, 666, 410, 922, + 90, 602, 346, 858, 218, 730, 474, 986, 58, 570, 314, 826, + 186, 698, 442, 954, 122, 634, 378, 890, 250, 762, 506, 1018, + 6, 518, 262, 774, 134, 646, 390, 902, 70, 582, 326, 838, + 198, 710, 454, 966, 38, 550, 294, 806, 166, 678, 422, 934, + 102, 614, 358, 870, 230, 742, 486, 998, 22, 534, 278, 790, + 150, 662, 406, 918, 86, 598, 342, 854, 214, 726, 470, 982, + 54, 566, 310, 822, 182, 694, 438, 950, 118, 630, 374, 886, + 246, 758, 502, 1014, 14, 526, 270, 782, 142, 654, 398, 910, + 78, 590, 334, 846, 206, 718, 462, 974, 46, 558, 302, 814, + 174, 686, 430, 942, 110, 622, 366, 878, 238, 750, 494, 1006, + 30, 542, 286, 798, 158, 670, 414, 926, 94, 606, 350, 862, + 222, 734, 478, 990, 62, 574, 318, 830, 190, 702, 446, 958, + 126, 638, 382, 894, 254, 766, 510, 1022, 1, 513, 257, 769, + 129, 641, 385, 897, 65, 577, 321, 833, 193, 705, 449, 961, + 33, 545, 289, 801, 161, 673, 417, 929, 97, 609, 353, 865, + 225, 737, 481, 993, 17, 529, 273, 785, 145, 657, 401, 913, + 81, 593, 337, 849, 209, 721, 465, 977, 49, 561, 305, 817, + 177, 689, 433, 945, 113, 625, 369, 881, 241, 753, 497, 1009, + 9, 521, 265, 777, 137, 649, 393, 905, 73, 585, 329, 841, + 201, 713, 457, 969, 41, 553, 297, 809, 169, 681, 425, 937, + 105, 617, 361, 873, 233, 745, 489, 1001, 25, 537, 281, 793, + 153, 665, 409, 921, 89, 601, 345, 857, 217, 729, 473, 985, + 57, 569, 313, 825, 185, 697, 441, 953, 121, 633, 377, 889, + 249, 761, 505, 1017, 5, 517, 261, 773, 133, 645, 389, 901, + 69, 581, 325, 837, 197, 709, 453, 965, 37, 549, 293, 805, + 165, 677, 421, 933, 101, 613, 357, 869, 229, 741, 485, 997, + 21, 533, 277, 789, 149, 661, 405, 917, 85, 597, 341, 853, + 213, 725, 469, 981, 53, 565, 309, 821, 181, 693, 437, 949, + 117, 629, 373, 885, 245, 757, 501, 1013, 13, 525, 269, 781, + 141, 653, 397, 909, 77, 589, 333, 845, 205, 717, 461, 973, + 45, 557, 301, 813, 173, 685, 429, 941, 109, 621, 365, 877, + 237, 749, 493, 1005, 29, 541, 285, 797, 157, 669, 413, 925, + 93, 605, 349, 861, 221, 733, 477, 989, 61, 573, 317, 829, + 189, 701, 445, 957, 125, 637, 381, 893, 253, 765, 509, 1021, + 3, 515, 259, 771, 131, 643, 387, 899, 67, 579, 323, 835, + 195, 707, 451, 963, 35, 547, 291, 803, 163, 675, 419, 931, + 99, 611, 355, 867, 227, 739, 483, 995, 19, 531, 275, 787, + 147, 659, 403, 915, 83, 595, 339, 851, 211, 723, 467, 979, + 51, 563, 307, 819, 179, 691, 435, 947, 115, 627, 371, 883, + 243, 755, 499, 1011, 11, 523, 267, 779, 139, 651, 395, 907, + 75, 587, 331, 843, 203, 715, 459, 971, 43, 555, 299, 811, + 171, 683, 427, 939, 107, 619, 363, 875, 235, 747, 491, 1003, + 27, 539, 283, 795, 155, 667, 411, 923, 91, 603, 347, 859, + 219, 731, 475, 987, 59, 571, 315, 827, 187, 699, 443, 955, + 123, 635, 379, 891, 251, 763, 507, 1019, 7, 519, 263, 775, + 135, 647, 391, 903, 71, 583, 327, 839, 199, 711, 455, 967, + 39, 551, 295, 807, 167, 679, 423, 935, 103, 615, 359, 871, + 231, 743, 487, 999, 23, 535, 279, 791, 151, 663, 407, 919, + 87, 599, 343, 855, 215, 727, 471, 983, 55, 567, 311, 823, + 183, 695, 439, 951, 119, 631, 375, 887, 247, 759, 503, 1015, + 15, 527, 271, 783, 143, 655, 399, 911, 79, 591, 335, 847, + 207, 719, 463, 975, 47, 559, 303, 815, 175, 687, 431, 943, + 111, 623, 367, 879, 239, 751, 495, 1007, 31, 543, 287, 799, + 159, 671, 415, 927, 95, 607, 351, 863, 223, 735, 479, 991, + 63, 575, 319, 831, 191, 703, 447, 959, 127, 639, 383, 895, + 255, 767, 511, 1023 +}; + +/* + * Compute the roots for NTT and inverse NTT (binary case). Input + * parameter g is a primitive 2048-th root of 1 modulo p (i.e. g^1024 = + * -1 mod p). This fills gm[] and igm[] with powers of g and 1/g: + * gm[rev(i)] = g^i mod p + * igm[rev(i)] = (1/g)^i mod p + * where rev() is the "bit reversal" function over 10 bits. It fills + * the arrays only up to N = 2^logn values. + * + * The values stored in gm[] and igm[] are in Montgomery representation. + * + * p must be a prime such that p = 1 mod 2048. + */ +static void +modp_mkgm2(uint32_t *gm, uint32_t *igm, unsigned logn, + uint32_t g, uint32_t p, uint32_t p0i) { + size_t u, n; + unsigned k; + uint32_t ig, x1, x2, R2; + + n = (size_t)1 << logn; + + /* + * We want g such that g^(2N) = 1 mod p, but the provided + * generator has order 2048. We must square it a few times. + */ + R2 = modp_R2(p, p0i); + g = modp_montymul(g, R2, p, p0i); + for (k = logn; k < 10; k ++) { + g = modp_montymul(g, g, p, p0i); + } + + ig = modp_div(R2, g, p, p0i, modp_R(p)); + k = 10 - logn; + x1 = x2 = modp_R(p); + for (u = 0; u < n; u ++) { + size_t v; + + v = REV10[u << k]; + gm[v] = x1; + igm[v] = x2; + x1 = modp_montymul(x1, g, p, p0i); + x2 = modp_montymul(x2, ig, p, p0i); + } +} + +/* + * Compute the NTT over a polynomial (binary case). Polynomial elements + * are a[0], a[stride], a[2 * stride]... + */ +static void +modp_NTT2_ext(uint32_t *a, size_t stride, const uint32_t *gm, unsigned logn, + uint32_t p, uint32_t p0i) { + size_t t, m, n; + + if (logn == 0) { + return; + } + n = (size_t)1 << logn; + t = n; + for (m = 1; m < n; m <<= 1) { + size_t ht, u, v1; + + ht = t >> 1; + for (u = 0, v1 = 0; u < m; u ++, v1 += t) { + uint32_t s; + size_t v; + uint32_t *r1, *r2; + + s = gm[m + u]; + r1 = a + v1 * stride; + r2 = r1 + ht * stride; + for (v = 0; v < ht; v ++, r1 += stride, r2 += stride) { + uint32_t x, y; + + x = *r1; + y = modp_montymul(*r2, s, p, p0i); + *r1 = modp_add(x, y, p); + *r2 = modp_sub(x, y, p); + } + } + t = ht; + } +} + +/* + * Compute the inverse NTT over a polynomial (binary case). + */ +static void +modp_iNTT2_ext(uint32_t *a, size_t stride, const uint32_t *igm, unsigned logn, + uint32_t p, uint32_t p0i) { + size_t t, m, n, k; + uint32_t ni; + uint32_t *r; + + if (logn == 0) { + return; + } + n = (size_t)1 << logn; + t = 1; + for (m = n; m > 1; m >>= 1) { + size_t hm, dt, u, v1; + + hm = m >> 1; + dt = t << 1; + for (u = 0, v1 = 0; u < hm; u ++, v1 += dt) { + uint32_t s; + size_t v; + uint32_t *r1, *r2; + + s = igm[hm + u]; + r1 = a + v1 * stride; + r2 = r1 + t * stride; + for (v = 0; v < t; v ++, r1 += stride, r2 += stride) { + uint32_t x, y; + + x = *r1; + y = *r2; + *r1 = modp_add(x, y, p); + *r2 = modp_montymul( + modp_sub(x, y, p), s, p, p0i);; + } + } + t = dt; + } + + /* + * We need 1/n in Montgomery representation, i.e. R/n. Since + * 1 <= logn <= 10, R/n is an integer; morever, R/n <= 2^30 < p, + * thus a simple shift will do. + */ + ni = (uint32_t)1 << (31 - logn); + for (k = 0, r = a; k < n; k ++, r += stride) { + *r = modp_montymul(*r, ni, p, p0i); + } +} + +/* + * Simplified macros for NTT and iNTT (binary case) when the elements + * are consecutive in RAM. + */ +#define modp_NTT2(a, gm, logn, p, p0i) modp_NTT2_ext(a, 1, gm, logn, p, p0i) +#define modp_iNTT2(a, igm, logn, p, p0i) modp_iNTT2_ext(a, 1, igm, logn, p, p0i) + +/* + * Given polynomial f in NTT representation modulo p, compute f' of degree + * less than N/2 such that f' = f0^2 - X*f1^2, where f0 and f1 are + * polynomials of degree less than N/2 such that f = f0(X^2) + X*f1(X^2). + * + * The new polynomial is written "in place" over the first N/2 elements + * of f. + * + * If applied logn times successively on a given polynomial, the resulting + * degree-0 polynomial is the resultant of f and X^N+1 modulo p. + * + * This function applies only to the binary case; it is invoked from + * solve_NTRU_binary_depth1(). + */ +static void +modp_poly_rec_res(uint32_t *f, unsigned logn, + uint32_t p, uint32_t p0i, uint32_t R2) { + size_t hn, u; + + hn = (size_t)1 << (logn - 1); + for (u = 0; u < hn; u ++) { + uint32_t w0, w1; + + w0 = f[(u << 1) + 0]; + w1 = f[(u << 1) + 1]; + f[u] = modp_montymul(modp_montymul(w0, w1, p, p0i), R2, p, p0i); + } +} + +/* ==================================================================== */ +/* + * Custom bignum implementation. + * + * This is a very reduced set of functionalities. We need to do the + * following operations: + * + * - Rebuild the resultant and the polynomial coefficients from their + * values modulo small primes (of length 31 bits each). + * + * - Compute an extended GCD between the two computed resultants. + * + * - Extract top bits and add scaled values during the successive steps + * of Babai rounding. + * + * When rebuilding values using CRT, we must also recompute the product + * of the small prime factors. We always do it one small factor at a + * time, so the "complicated" operations can be done modulo the small + * prime with the modp_* functions. CRT coefficients (inverses) are + * precomputed. + * + * All values are positive until the last step: when the polynomial + * coefficients have been rebuilt, we normalize them around 0. But then, + * only additions and subtractions on the upper few bits are needed + * afterwards. + * + * We keep big integers as arrays of 31-bit words (in uint32_t values); + * the top bit of each uint32_t is kept equal to 0. Using 31-bit words + * makes it easier to keep track of carries. When negative values are + * used, two's complement is used. + */ + +/* + * Subtract integer b from integer a. Both integers are supposed to have + * the same size. The carry (0 or 1) is returned. Source arrays a and b + * MUST be distinct. + * + * The operation is performed as described above if ctr = 1. If + * ctl = 0, the value a[] is unmodified, but all memory accesses are + * still performed, and the carry is computed and returned. + */ +static uint32_t +zint_sub(uint32_t *a, const uint32_t *b, size_t len, + uint32_t ctl) { + size_t u; + uint32_t cc, m; + + cc = 0; + m = -ctl; + for (u = 0; u < len; u ++) { + uint32_t aw, w; + + aw = a[u]; + w = aw - b[u] - cc; + cc = w >> 31; + aw ^= ((w & 0x7FFFFFFF) ^ aw) & m; + a[u] = aw; + } + return cc; +} + +/* + * Mutiply the provided big integer m with a small value x. + * This function assumes that x < 2^31. The carry word is returned. + */ +static uint32_t +zint_mul_small(uint32_t *m, size_t mlen, uint32_t x) { + size_t u; + uint32_t cc; + + cc = 0; + for (u = 0; u < mlen; u ++) { + uint64_t z; + + z = (uint64_t)m[u] * (uint64_t)x + cc; + m[u] = (uint32_t)z & 0x7FFFFFFF; + cc = (uint32_t)(z >> 31); + } + return cc; +} + +/* + * Reduce a big integer d modulo a small integer p. + * Rules: + * d is unsigned + * p is prime + * 2^30 < p < 2^31 + * p0i = -(1/p) mod 2^31 + * R2 = 2^62 mod p + */ +static uint32_t +zint_mod_small_unsigned(const uint32_t *d, size_t dlen, + uint32_t p, uint32_t p0i, uint32_t R2) { + uint32_t x; + size_t u; + + /* + * Algorithm: we inject words one by one, starting with the high + * word. Each step is: + * - multiply x by 2^31 + * - add new word + */ + x = 0; + u = dlen; + while (u -- > 0) { + uint32_t w; + + x = modp_montymul(x, R2, p, p0i); + w = d[u] - p; + w += p & -(w >> 31); + x = modp_add(x, w, p); + } + return x; +} + +/* + * Similar to zint_mod_small_unsigned(), except that d may be signed. + * Extra parameter is Rx = 2^(31*dlen) mod p. + */ +static uint32_t +zint_mod_small_signed(const uint32_t *d, size_t dlen, + uint32_t p, uint32_t p0i, uint32_t R2, uint32_t Rx) { + uint32_t z; + + if (dlen == 0) { + return 0; + } + z = zint_mod_small_unsigned(d, dlen, p, p0i, R2); + z = modp_sub(z, Rx & -(d[dlen - 1] >> 30), p); + return z; +} + +/* + * Add y*s to x. x and y initially have length 'len' words; the new x + * has length 'len+1' words. 's' must fit on 31 bits. x[] and y[] must + * not overlap. + */ +static void +zint_add_mul_small(uint32_t *x, + const uint32_t *y, size_t len, uint32_t s) { + size_t u; + uint32_t cc; + + cc = 0; + for (u = 0; u < len; u ++) { + uint32_t xw, yw; + uint64_t z; + + xw = x[u]; + yw = y[u]; + z = (uint64_t)yw * (uint64_t)s + (uint64_t)xw + (uint64_t)cc; + x[u] = (uint32_t)z & 0x7FFFFFFF; + cc = (uint32_t)(z >> 31); + } + x[len] = cc; +} + +/* + * Normalize a modular integer around 0: if x > p/2, then x is replaced + * with x - p (signed encoding with two's complement); otherwise, x is + * untouched. The two integers x and p are encoded over the same length. + */ +static void +zint_norm_zero(uint32_t *x, const uint32_t *p, size_t len) { + size_t u; + uint32_t r, bb; + + /* + * Compare x with p/2. We use the shifted version of p, and p + * is odd, so we really compare with (p-1)/2; we want to perform + * the subtraction if and only if x > (p-1)/2. + */ + r = 0; + bb = 0; + u = len; + while (u -- > 0) { + uint32_t wx, wp, cc; + + /* + * Get the two words to compare in wx and wp (both over + * 31 bits exactly). + */ + wx = x[u]; + wp = (p[u] >> 1) | (bb << 30); + bb = p[u] & 1; + + /* + * We set cc to -1, 0 or 1, depending on whether wp is + * lower than, equal to, or greater than wx. + */ + cc = wp - wx; + cc = ((-cc) >> 31) | -(cc >> 31); + + /* + * If r != 0 then it is either 1 or -1, and we keep its + * value. Otherwise, if r = 0, then we replace it with cc. + */ + r |= cc & ((r & 1) - 1); + } + + /* + * At this point, r = -1, 0 or 1, depending on whether (p-1)/2 + * is lower than, equal to, or greater than x. We thus want to + * do the subtraction only if r = -1. + */ + zint_sub(x, p, len, r >> 31); +} + +/* + * Rebuild integers from their RNS representation. There are 'num' + * integers, and each consists in 'xlen' words. 'xx' points at that + * first word of the first integer; subsequent integers are accessed + * by adding 'xstride' repeatedly. + * + * The words of an integer are the RNS representation of that integer, + * using the provided 'primes' are moduli. This function replaces + * each integer with its multi-word value (little-endian order). + * + * If "normalize_signed" is non-zero, then the returned value is + * normalized to the -m/2..m/2 interval (where m is the product of all + * small prime moduli); two's complement is used for negative values. + */ +static void +zint_rebuild_CRT(uint32_t *xx, size_t xlen, size_t xstride, + size_t num, const small_prime *primes, int normalize_signed, + uint32_t *tmp) { + size_t u; + uint32_t *x; + + tmp[0] = primes[0].p; + for (u = 1; u < xlen; u ++) { + /* + * At the entry of each loop iteration: + * - the first u words of each array have been + * reassembled; + * - the first u words of tmp[] contains the + * product of the prime moduli processed so far. + * + * We call 'q' the product of all previous primes. + */ + uint32_t p, p0i, s, R2; + size_t v; + + p = primes[u].p; + s = primes[u].s; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + + for (v = 0, x = xx; v < num; v ++, x += xstride) { + uint32_t xp, xq, xr; + /* + * xp = the integer x modulo the prime p for this + * iteration + * xq = (x mod q) mod p + */ + xp = x[u]; + xq = zint_mod_small_unsigned(x, u, p, p0i, R2); + + /* + * New value is (x mod q) + q * (s * (xp - xq) mod p) + */ + xr = modp_montymul(s, modp_sub(xp, xq, p), p, p0i); + zint_add_mul_small(x, tmp, u, xr); + } + + /* + * Update product of primes in tmp[]. + */ + tmp[u] = zint_mul_small(tmp, u, p); + } + + /* + * Normalize the reconstructed values around 0. + */ + if (normalize_signed) { + for (u = 0, x = xx; u < num; u ++, x += xstride) { + zint_norm_zero(x, tmp, xlen); + } + } +} + +/* + * Negate a big integer conditionally: value a is replaced with -a if + * and only if ctl = 1. Control value ctl must be 0 or 1. + */ +static void +zint_negate(uint32_t *a, size_t len, uint32_t ctl) { + size_t u; + uint32_t cc, m; + + /* + * If ctl = 1 then we flip the bits of a by XORing with + * 0x7FFFFFFF, and we add 1 to the value. If ctl = 0 then we XOR + * with 0 and add 0, which leaves the value unchanged. + */ + cc = ctl; + m = -ctl >> 1; + for (u = 0; u < len; u ++) { + uint32_t aw; + + aw = a[u]; + aw = (aw ^ m) + cc; + a[u] = aw & 0x7FFFFFFF; + cc = aw >> 31; + } +} + +/* + * Replace a with (a*xa+b*xb)/(2^31) and b with (a*ya+b*yb)/(2^31). + * The low bits are dropped (the caller should compute the coefficients + * such that these dropped bits are all zeros). If either or both + * yields a negative value, then the value is negated. + * + * Returned value is: + * 0 both values were positive + * 1 new a had to be negated + * 2 new b had to be negated + * 3 both new a and new b had to be negated + * + * Coefficients xa, xb, ya and yb may use the full signed 32-bit range. + */ +static uint32_t +zint_co_reduce(uint32_t *a, uint32_t *b, size_t len, + int64_t xa, int64_t xb, int64_t ya, int64_t yb) { + size_t u; + int64_t cca, ccb; + uint32_t nega, negb; + + cca = 0; + ccb = 0; + for (u = 0; u < len; u ++) { + uint32_t wa, wb; + uint64_t za, zb; + + wa = a[u]; + wb = b[u]; + za = wa * (uint64_t)xa + wb * (uint64_t)xb + (uint64_t)cca; + zb = wa * (uint64_t)ya + wb * (uint64_t)yb + (uint64_t)ccb; + if (u > 0) { + a[u - 1] = (uint32_t)za & 0x7FFFFFFF; + b[u - 1] = (uint32_t)zb & 0x7FFFFFFF; + } + cca = *(int64_t *)&za >> 31; + ccb = *(int64_t *)&zb >> 31; + } + a[len - 1] = (uint32_t)cca; + b[len - 1] = (uint32_t)ccb; + + nega = (uint32_t)((uint64_t)cca >> 63); + negb = (uint32_t)((uint64_t)ccb >> 63); + zint_negate(a, len, nega); + zint_negate(b, len, negb); + return nega | (negb << 1); +} + +/* + * Finish modular reduction. Rules on input parameters: + * + * if neg = 1, then -m <= a < 0 + * if neg = 0, then 0 <= a < 2*m + * + * If neg = 0, then the top word of a[] is allowed to use 32 bits. + * + * Modulus m must be odd. + */ +static void +zint_finish_mod(uint32_t *a, size_t len, const uint32_t *m, uint32_t neg) { + size_t u; + uint32_t cc, xm, ym; + + /* + * First pass: compare a (assumed nonnegative) with m. Note that + * if the top word uses 32 bits, subtracting m must yield a + * value less than 2^31 since a < 2*m. + */ + cc = 0; + for (u = 0; u < len; u ++) { + cc = (a[u] - m[u] - cc) >> 31; + } + + /* + * If neg = 1 then we must add m (regardless of cc) + * If neg = 0 and cc = 0 then we must subtract m + * If neg = 0 and cc = 1 then we must do nothing + * + * In the loop below, we conditionally subtract either m or -m + * from a. Word xm is a word of m (if neg = 0) or -m (if neg = 1); + * but if neg = 0 and cc = 1, then ym = 0 and it forces mw to 0. + */ + xm = -neg >> 1; + ym = -(neg | (1 - cc)); + cc = neg; + for (u = 0; u < len; u ++) { + uint32_t aw, mw; + + aw = a[u]; + mw = (m[u] ^ xm) & ym; + aw = aw - mw - cc; + a[u] = aw & 0x7FFFFFFF; + cc = aw >> 31; + } +} + +/* + * Replace a with (a*xa+b*xb)/(2^31) mod m, and b with + * (a*ya+b*yb)/(2^31) mod m. Modulus m must be odd; m0i = -1/m[0] mod 2^31. + */ +static void +zint_co_reduce_mod(uint32_t *a, uint32_t *b, const uint32_t *m, size_t len, + uint32_t m0i, int64_t xa, int64_t xb, int64_t ya, int64_t yb) { + size_t u; + int64_t cca, ccb; + uint32_t fa, fb; + + /* + * These are actually four combined Montgomery multiplications. + */ + cca = 0; + ccb = 0; + fa = ((a[0] * (uint32_t)xa + b[0] * (uint32_t)xb) * m0i) & 0x7FFFFFFF; + fb = ((a[0] * (uint32_t)ya + b[0] * (uint32_t)yb) * m0i) & 0x7FFFFFFF; + for (u = 0; u < len; u ++) { + uint32_t wa, wb; + uint64_t za, zb; + + wa = a[u]; + wb = b[u]; + za = wa * (uint64_t)xa + wb * (uint64_t)xb + + m[u] * (uint64_t)fa + (uint64_t)cca; + zb = wa * (uint64_t)ya + wb * (uint64_t)yb + + m[u] * (uint64_t)fb + (uint64_t)ccb; + if (u > 0) { + a[u - 1] = (uint32_t)za & 0x7FFFFFFF; + b[u - 1] = (uint32_t)zb & 0x7FFFFFFF; + } + cca = *(int64_t *)&za >> 31; + ccb = *(int64_t *)&zb >> 31; + } + a[len - 1] = (uint32_t)cca; + b[len - 1] = (uint32_t)ccb; + + /* + * At this point: + * -m <= a < 2*m + * -m <= b < 2*m + * (this is a case of Montgomery reduction) + * The top words of 'a' and 'b' may have a 32-th bit set. + * We want to add or subtract the modulus, as required. + */ + zint_finish_mod(a, len, m, (uint32_t)((uint64_t)cca >> 63)); + zint_finish_mod(b, len, m, (uint32_t)((uint64_t)ccb >> 63)); +} + +/* + * Compute a GCD between two positive big integers x and y. The two + * integers must be odd. Returned value is 1 if the GCD is 1, 0 + * otherwise. When 1 is returned, arrays u and v are filled with values + * such that: + * 0 <= u <= y + * 0 <= v <= x + * x*u - y*v = 1 + * x[] and y[] are unmodified. Both input values must have the same + * encoded length. Temporary array must be large enough to accommodate 4 + * extra values of that length. Arrays u, v and tmp may not overlap with + * each other, or with either x or y. + */ +static int +zint_bezout(uint32_t *u, uint32_t *v, + const uint32_t *x, const uint32_t *y, + size_t len, uint32_t *tmp) { + /* + * Algorithm is an extended binary GCD. We maintain 6 values + * a, b, u0, u1, v0 and v1 with the following invariants: + * + * a = x*u0 - y*v0 + * b = x*u1 - y*v1 + * 0 <= a <= x + * 0 <= b <= y + * 0 <= u0 < y + * 0 <= v0 < x + * 0 <= u1 <= y + * 0 <= v1 < x + * + * Initial values are: + * + * a = x u0 = 1 v0 = 0 + * b = y u1 = y v1 = x-1 + * + * Each iteration reduces either a or b, and maintains the + * invariants. Algorithm stops when a = b, at which point their + * common value is GCD(a,b) and (u0,v0) (or (u1,v1)) contains + * the values (u,v) we want to return. + * + * The formal definition of the algorithm is a sequence of steps: + * + * - If a is even, then: + * a <- a/2 + * u0 <- u0/2 mod y + * v0 <- v0/2 mod x + * + * - Otherwise, if b is even, then: + * b <- b/2 + * u1 <- u1/2 mod y + * v1 <- v1/2 mod x + * + * - Otherwise, if a > b, then: + * a <- (a-b)/2 + * u0 <- (u0-u1)/2 mod y + * v0 <- (v0-v1)/2 mod x + * + * - Otherwise: + * b <- (b-a)/2 + * u1 <- (u1-u0)/2 mod y + * v1 <- (v1-v0)/2 mod y + * + * We can show that the operations above preserve the invariants: + * + * - If a is even, then u0 and v0 are either both even or both + * odd (since a = x*u0 - y*v0, and x and y are both odd). + * If u0 and v0 are both even, then (u0,v0) <- (u0/2,v0/2). + * Otherwise, (u0,v0) <- ((u0+y)/2,(v0+x)/2). Either way, + * the a = x*u0 - y*v0 invariant is preserved. + * + * - The same holds for the case where b is even. + * + * - If a and b are odd, and a > b, then: + * + * a-b = x*(u0-u1) - y*(v0-v1) + * + * In that situation, if u0 < u1, then x*(u0-u1) < 0, but + * a-b > 0; therefore, it must be that v0 < v1, and the + * first part of the update is: (u0,v0) <- (u0-u1+y,v0-v1+x), + * which preserves the invariants. Otherwise, if u0 > u1, + * then u0-u1 >= 1, thus x*(u0-u1) >= x. But a <= x and + * b >= 0, hence a-b <= x. It follows that, in that case, + * v0-v1 >= 0. The first part of the update is then: + * (u0,v0) <- (u0-u1,v0-v1), which again preserves the + * invariants. + * + * Either way, once the subtraction is done, the new value of + * a, which is the difference of two odd values, is even, + * and the remaining of this step is a subcase of the + * first algorithm case (i.e. when a is even). + * + * - If a and b are odd, and b > a, then the a similar + * argument holds. + * + * The values a and b start at x and y, respectively. Since x + * and y are odd, their GCD is odd, and it is easily seen that + * all steps conserve the GCD (GCD(a-b,b) = GCD(a, b); + * GCD(a/2,b) = GCD(a,b) if GCD(a,b) is odd). Moreover, either a + * or b is reduced by at least one bit at each iteration, so + * the algorithm necessarily converges on the case a = b, at + * which point the common value is the GCD. + * + * In the algorithm expressed above, when a = b, the fourth case + * applies, and sets b = 0. Since a contains the GCD of x and y, + * which are both odd, a must be odd, and subsequent iterations + * (if any) will simply divide b by 2 repeatedly, which has no + * consequence. Thus, the algorithm can run for more iterations + * than necessary; the final GCD will be in a, and the (u,v) + * coefficients will be (u0,v0). + * + * + * The presentation above is bit-by-bit. It can be sped up by + * noticing that all decisions are taken based on the low bits + * and high bits of a and b. We can extract the two top words + * and low word of each of a and b, and compute reduction + * parameters pa, pb, qa and qb such that the new values for + * a and b are: + * a' = (a*pa + b*pb) / (2^31) + * b' = (a*qa + b*qb) / (2^31) + * the two divisions being exact. The coefficients are obtained + * just from the extracted words, and may be slightly off, requiring + * an optional correction: if a' < 0, then we replace pa with -pa + * and pb with -pb. Each such step will reduce the total length + * (sum of lengths of a and b) by at least 30 bits at each + * iteration. + */ + uint32_t *u0, *u1, *v0, *v1, *a, *b; + uint32_t x0i, y0i; + uint32_t num, rc; + size_t j; + + if (len == 0) { + return 0; + } + + /* + * u0 and v0 are the u and v result buffers; the four other + * values (u1, v1, a and b) are taken from tmp[]. + */ + u0 = u; + v0 = v; + u1 = tmp; + v1 = u1 + len; + a = v1 + len; + b = a + len; + + /* + * We'll need the Montgomery reduction coefficients. + */ + x0i = modp_ninv31(x[0]); + y0i = modp_ninv31(y[0]); + + /* + * Initialize a, b, u0, u1, v0 and v1. + * a = x u0 = 1 v0 = 0 + * b = y u1 = y v1 = x-1 + * Note that x is odd, so computing x-1 is easy. + */ + memcpy(a, x, len * sizeof * x); + memcpy(b, y, len * sizeof * y); + u0[0] = 1; + memset(u0 + 1, 0, (len - 1) * sizeof * u0); + memset(v0, 0, len * sizeof * v0); + memcpy(u1, y, len * sizeof * u1); + memcpy(v1, x, len * sizeof * v1); + v1[0] --; + + /* + * Each input operand may be as large as 31*len bits, and we + * reduce the total length by at least 30 bits at each iteration. + */ + for (num = 62 * (uint32_t)len + 30; num >= 30; num -= 30) { + uint32_t c0, c1; + uint32_t a0, a1, b0, b1; + uint64_t a_hi, b_hi; + uint32_t a_lo, b_lo; + int64_t pa, pb, qa, qb; + int i; + uint32_t r; + + /* + * Extract the top words of a and b. If j is the highest + * index >= 1 such that a[j] != 0 or b[j] != 0, then we + * want (a[j] << 31) + a[j-1] and (b[j] << 31) + b[j-1]. + * If a and b are down to one word each, then we use + * a[0] and b[0]. + */ + c0 = (uint32_t) -1; + c1 = (uint32_t) -1; + a0 = 0; + a1 = 0; + b0 = 0; + b1 = 0; + j = len; + while (j -- > 0) { + uint32_t aw, bw; + + aw = a[j]; + bw = b[j]; + a0 ^= (a0 ^ aw) & c0; + a1 ^= (a1 ^ aw) & c1; + b0 ^= (b0 ^ bw) & c0; + b1 ^= (b1 ^ bw) & c1; + c1 = c0; + c0 &= (((aw | bw) + 0x7FFFFFFF) >> 31) - (uint32_t)1; + } + + /* + * If c1 = 0, then we grabbed two words for a and b. + * If c1 != 0 but c0 = 0, then we grabbed one word. It + * is not possible that c1 != 0 and c0 != 0, because that + * would mean that both integers are zero. + */ + a1 |= a0 & c1; + a0 &= ~c1; + b1 |= b0 & c1; + b0 &= ~c1; + a_hi = ((uint64_t)a0 << 31) + a1; + b_hi = ((uint64_t)b0 << 31) + b1; + a_lo = a[0]; + b_lo = b[0]; + + /* + * Compute reduction factors: + * + * a' = a*pa + b*pb + * b' = a*qa + b*qb + * + * such that a' and b' are both multiple of 2^31, but are + * only marginally larger than a and b. + */ + pa = 1; + pb = 0; + qa = 0; + qb = 1; + for (i = 0; i < 31; i ++) { + /* + * At each iteration: + * + * a <- (a-b)/2 if: a is odd, b is odd, a_hi > b_hi + * b <- (b-a)/2 if: a is odd, b is odd, a_hi <= b_hi + * a <- a/2 if: a is even + * b <- b/2 if: a is odd, b is even + * + * We multiply a_lo and b_lo by 2 at each + * iteration, thus a division by 2 really is a + * non-multiplication by 2. + */ + uint32_t rt, oa, ob, cAB, cBA, cA; + uint64_t rz; + + /* + * rt = 1 if a_hi > b_hi, 0 otherwise. + */ + rz = b_hi - a_hi; + rt = (uint32_t)((rz ^ ((a_hi ^ b_hi) + & (a_hi ^ rz))) >> 63); + + /* + * cAB = 1 if b must be subtracted from a + * cBA = 1 if a must be subtracted from b + * cA = 1 if a must be divided by 2 + * + * Rules: + * + * cAB and cBA cannot both be 1. + * If a is not divided by 2, b is. + */ + oa = (a_lo >> i) & 1; + ob = (b_lo >> i) & 1; + cAB = oa & ob & rt; + cBA = oa & ob & ~rt; + cA = cAB | (oa ^ 1); + + /* + * Conditional subtractions. + */ + a_lo -= b_lo & -cAB; + a_hi -= b_hi & -(uint64_t)cAB; + pa -= qa & -(int64_t)cAB; + pb -= qb & -(int64_t)cAB; + b_lo -= a_lo & -cBA; + b_hi -= a_hi & -(uint64_t)cBA; + qa -= pa & -(int64_t)cBA; + qb -= pb & -(int64_t)cBA; + + /* + * Shifting. + */ + a_lo += a_lo & (cA - 1); + pa += pa & ((int64_t)cA - 1); + pb += pb & ((int64_t)cA - 1); + a_hi ^= (a_hi ^ (a_hi >> 1)) & -(uint64_t)cA; + b_lo += b_lo & -cA; + qa += qa & -(int64_t)cA; + qb += qb & -(int64_t)cA; + b_hi ^= (b_hi ^ (b_hi >> 1)) & ((uint64_t)cA - 1); + } + + /* + * Apply the computed parameters to our values. We + * may have to correct pa and pb depending on the + * returned value of zint_co_reduce() (when a and/or b + * had to be negated). + */ + r = zint_co_reduce(a, b, len, pa, pb, qa, qb); + pa -= (pa + pa) & -(int64_t)(r & 1); + pb -= (pb + pb) & -(int64_t)(r & 1); + qa -= (qa + qa) & -(int64_t)(r >> 1); + qb -= (qb + qb) & -(int64_t)(r >> 1); + zint_co_reduce_mod(u0, u1, y, len, y0i, pa, pb, qa, qb); + zint_co_reduce_mod(v0, v1, x, len, x0i, pa, pb, qa, qb); + } + + /* + * At that point, array a[] should contain the GCD, and the + * results (u,v) should already be set. We check that the GCD + * is indeed 1. We also check that the two operands x and y + * are odd. + */ + rc = a[0] ^ 1; + for (j = 1; j < len; j ++) { + rc |= a[j]; + } + return (int)((1 - ((rc | -rc) >> 31)) & x[0] & y[0]); +} + +/* + * Add k*y*2^sc to x. The result is assumed to fit in the array of + * size xlen (truncation is applied if necessary). + * Scale factor 'sc' is provided as sch and scl, such that: + * sch = sc / 31 + * scl = sc % 31 + * xlen MUST NOT be lower than ylen. + * + * x[] and y[] are both signed integers, using two's complement for + * negative values. + */ +static void +zint_add_scaled_mul_small(uint32_t *x, size_t xlen, + const uint32_t *y, size_t ylen, int32_t k, + uint32_t sch, uint32_t scl) { + size_t u; + uint32_t ysign, tw; + int32_t cc; + + if (ylen == 0) { + return; + } + + ysign = -(y[ylen - 1] >> 30) >> 1; + tw = 0; + cc = 0; + for (u = sch; u < xlen; u ++) { + size_t v; + uint32_t wy, wys, ccu; + uint64_t z; + + /* + * Get the next word of y (scaled). + */ + v = u - sch; + if (v < ylen) { + wy = y[v]; + } else { + wy = ysign; + } + wys = ((wy << scl) & 0x7FFFFFFF) | tw; + tw = wy >> (31 - scl); + + /* + * The expression below does not overflow. + */ + z = (uint64_t)((int64_t)wys * (int64_t)k + (int64_t)x[u] + cc); + x[u] = (uint32_t)z & 0x7FFFFFFF; + + /* + * Right-shifting the signed value z would yield + * implementation-defined results (arithmetic shift is + * not guaranteed). However, we can cast to unsigned, + * and get the next carry as an unsigned word. We can + * then convert it back to signed by using the guaranteed + * fact that 'int32_t' uses two's complement with no + * trap representation or padding bit, and with a layout + * compatible with that of 'uint32_t'. + */ + ccu = (uint32_t)(z >> 31); + cc = *(int32_t *)&ccu; + } +} + +/* + * Subtract y*2^sc from x. The result is assumed to fit in the array of + * size xlen (truncation is applied if necessary). + * Scale factor 'sc' is provided as sch and scl, such that: + * sch = sc / 31 + * scl = sc % 31 + * xlen MUST NOT be lower than ylen. + * + * x[] and y[] are both signed integers, using two's complement for + * negative values. + */ +static void +zint_sub_scaled(uint32_t *x, size_t xlen, + const uint32_t *y, size_t ylen, uint32_t sch, uint32_t scl) { + size_t u; + uint32_t ysign, tw; + uint32_t cc; + + if (ylen == 0) { + return; + } + + ysign = -(y[ylen - 1] >> 30) >> 1; + tw = 0; + cc = 0; + for (u = sch; u < xlen; u ++) { + size_t v; + uint32_t w, wy, wys; + + /* + * Get the next word of y (scaled). + */ + v = u - sch; + if (v < ylen) { + wy = y[v]; + } else { + wy = ysign; + } + wys = ((wy << scl) & 0x7FFFFFFF) | tw; + tw = wy >> (31 - scl); + + w = x[u] - wys - cc; + x[u] = w & 0x7FFFFFFF; + cc = w >> 31; + } +} + +/* + * Convert a one-word signed big integer into a signed value. + */ +static inline int32_t +zint_one_to_plain(const uint32_t *x) { + uint32_t w; + + w = x[0]; + w |= (w & 0x40000000) << 1; + return *(int32_t *)&w; +} + +/* ==================================================================== */ + +/* + * Convert a polynomial to floating-point values. + * + * Each coefficient has length flen words, and starts fstride words after + * the previous. + * + * IEEE-754 binary64 values can represent values in a finite range, + * roughly 2^(-1023) to 2^(+1023); thus, if coefficients are too large, + * they should be "trimmed" by pointing not to the lowest word of each, + * but upper. + */ +static void +poly_big_to_fp(fpr *d, const uint32_t *f, size_t flen, size_t fstride, + unsigned logn) { + size_t n, u; + + n = MKN(logn); + if (flen == 0) { + for (u = 0; u < n; u ++) { + d[u] = fpr_zero; + } + return; + } + for (u = 0; u < n; u ++, f += fstride) { + size_t v; + uint32_t neg, cc, xm; + fpr x, fsc; + + /* + * Get sign of the integer; if it is negative, then we + * will load its absolute value instead, and negate the + * result. + */ + neg = -(f[flen - 1] >> 30); + xm = neg >> 1; + cc = neg & 1; + x = fpr_zero; + fsc = fpr_one; + for (v = 0; v < flen; v ++, fsc = fpr_mul(fsc, fpr_ptwo31)) { + uint32_t w; + + w = (f[v] ^ xm) + cc; + cc = w >> 31; + w &= 0x7FFFFFFF; + w -= (w << 1) & neg; + x = fpr_add(x, fpr_mul(fpr_of(*(int32_t *)&w), fsc)); + } + d[u] = x; + } +} + +/* + * Convert a polynomial to small integers. Source values are supposed + * to be one-word integers, signed over 31 bits. Returned value is 0 + * if any of the coefficients exceeds the provided limit (in absolute + * value), or 1 on success. + * + * This is not constant-time; this is not a problem here, because on + * any failure, the NTRU-solving process will be deemed to have failed + * and the (f,g) polynomials will be discarded. + */ +static int +poly_big_to_small(int8_t *d, const uint32_t *s, int lim, unsigned logn) { + size_t n, u; + + n = MKN(logn); + for (u = 0; u < n; u ++) { + int32_t z; + + z = zint_one_to_plain(s + u); + if (z < -lim || z > lim) { + return 0; + } + d[u] = (int8_t)z; + } + return 1; +} + +/* + * Subtract k*f from F, where F, f and k are polynomials modulo X^N+1. + * Coefficients of polynomial k are small integers (signed values in the + * -2^31..2^31 range) scaled by 2^sc. Value sc is provided as sch = sc / 31 + * and scl = sc % 31. + * + * This function implements the basic quadratic multiplication algorithm, + * which is efficient in space (no extra buffer needed) but slow at + * high degree. + */ +static void +poly_sub_scaled(uint32_t *F, size_t Flen, size_t Fstride, + const uint32_t *f, size_t flen, size_t fstride, + const int32_t *k, uint32_t sch, uint32_t scl, unsigned logn) { + size_t n, u; + + n = MKN(logn); + for (u = 0; u < n; u ++) { + int32_t kf; + size_t v; + uint32_t *x; + const uint32_t *y; + + kf = -k[u]; + x = F + u * Fstride; + y = f; + for (v = 0; v < n; v ++) { + zint_add_scaled_mul_small( + x, Flen, y, flen, kf, sch, scl); + if (u + v == n - 1) { + x = F; + kf = -kf; + } else { + x += Fstride; + } + y += fstride; + } + } +} + +/* + * Subtract k*f from F. Coefficients of polynomial k are small integers + * (signed values in the -2^31..2^31 range) scaled by 2^sc. This function + * assumes that the degree is large, and integers relatively small. + * The value sc is provided as sch = sc / 31 and scl = sc % 31. + */ +static void +poly_sub_scaled_ntt(uint32_t *F, size_t Flen, size_t Fstride, + const uint32_t *f, size_t flen, size_t fstride, + const int32_t *k, uint32_t sch, uint32_t scl, unsigned logn, + uint32_t *tmp) { + uint32_t *gm, *igm, *fk, *t1, *x; + const uint32_t *y; + size_t n, u, tlen; + const small_prime *primes; + + n = MKN(logn); + tlen = flen + 1; + gm = tmp; + igm = gm + MKN(logn); + fk = igm + MKN(logn); + t1 = fk + n * tlen; + + primes = PRIMES; + + /* + * Compute k*f in fk[], in RNS notation. + */ + for (u = 0; u < tlen; u ++) { + uint32_t p, p0i, R2, Rx; + size_t v; + + p = primes[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + Rx = modp_Rx((unsigned)flen, p, p0i, R2); + modp_mkgm2(gm, igm, logn, primes[u].g, p, p0i); + + for (v = 0; v < n; v ++) { + t1[v] = modp_set(k[v], p); + } + modp_NTT2(t1, gm, logn, p, p0i); + for (v = 0, y = f, x = fk + u; + v < n; v ++, y += fstride, x += tlen) { + *x = zint_mod_small_signed(y, flen, p, p0i, R2, Rx); + } + modp_NTT2_ext(fk + u, tlen, gm, logn, p, p0i); + for (v = 0, x = fk + u; v < n; v ++, x += tlen) { + *x = modp_montymul( + modp_montymul(t1[v], *x, p, p0i), R2, p, p0i); + } + modp_iNTT2_ext(fk + u, tlen, igm, logn, p, p0i); + } + + /* + * Rebuild k*f. + */ + zint_rebuild_CRT(fk, tlen, tlen, n, primes, 1, t1); + + /* + * Subtract k*f, scaled, from F. + */ + for (u = 0, x = F, y = fk; u < n; u ++, x += Fstride, y += tlen) { + zint_sub_scaled(x, Flen, y, tlen, sch, scl); + } +} + +/* ==================================================================== */ + + +#define RNG_CONTEXT inner_shake256_context + +/* + * Get a random 8-byte integer from a SHAKE-based RNG. This function + * ensures consistent interpretation of the SHAKE output so that + * the same values will be obtained over different platforms, in case + * a known seed is used. + */ +static inline uint64_t +get_rng_u64(inner_shake256_context *rng) { + /* + * We enforce little-endian representation. + */ + + uint8_t tmp[8]; + + inner_shake256_extract(rng, tmp, sizeof tmp); + return (uint64_t)tmp[0] + | ((uint64_t)tmp[1] << 8) + | ((uint64_t)tmp[2] << 16) + | ((uint64_t)tmp[3] << 24) + | ((uint64_t)tmp[4] << 32) + | ((uint64_t)tmp[5] << 40) + | ((uint64_t)tmp[6] << 48) + | ((uint64_t)tmp[7] << 56); +} + +/* + * Table below incarnates a discrete Gaussian distribution: + * D(x) = exp(-(x^2)/(2*sigma^2)) + * where sigma = 1.17*sqrt(q/(2*N)), q = 12289, and N = 1024. + * Element 0 of the table is P(x = 0). + * For k > 0, element k is P(x >= k+1 | x > 0). + * Probabilities are scaled up by 2^63. + */ +static const uint64_t gauss_1024_12289[] = { + 1283868770400643928u, 6416574995475331444u, 4078260278032692663u, + 2353523259288686585u, 1227179971273316331u, 575931623374121527u, + 242543240509105209u, 91437049221049666u, 30799446349977173u, + 9255276791179340u, 2478152334826140u, 590642893610164u, + 125206034929641u, 23590435911403u, 3948334035941u, + 586753615614u, 77391054539u, 9056793210u, + 940121950u, 86539696u, 7062824u, + 510971u, 32764u, 1862u, + 94u, 4u, 0u +}; + +/* + * Generate a random value with a Gaussian distribution centered on 0. + * The RNG must be ready for extraction (already flipped). + * + * Distribution has standard deviation 1.17*sqrt(q/(2*N)). The + * precomputed table is for N = 1024. Since the sum of two independent + * values of standard deviation sigma has standard deviation + * sigma*sqrt(2), then we can just generate more values and add them + * together for lower dimensions. + */ +static int +mkgauss(RNG_CONTEXT *rng, unsigned logn) { + unsigned u, g; + int val; + + g = 1U << (10 - logn); + val = 0; + for (u = 0; u < g; u ++) { + /* + * Each iteration generates one value with the + * Gaussian distribution for N = 1024. + * + * We use two random 64-bit values. First value + * decides on whether the generated value is 0, and, + * if not, the sign of the value. Second random 64-bit + * word is used to generate the non-zero value. + * + * For constant-time code we have to read the complete + * table. This has negligible cost, compared with the + * remainder of the keygen process (solving the NTRU + * equation). + */ + uint64_t r; + uint32_t f, v, k, neg; + + /* + * First value: + * - flag 'neg' is randomly selected to be 0 or 1. + * - flag 'f' is set to 1 if the generated value is zero, + * or set to 0 otherwise. + */ + r = get_rng_u64(rng); + neg = (uint32_t)(r >> 63); + r &= ~((uint64_t)1 << 63); + f = (uint32_t)((r - gauss_1024_12289[0]) >> 63); + + /* + * We produce a new random 63-bit integer r, and go over + * the array, starting at index 1. We store in v the + * index of the first array element which is not greater + * than r, unless the flag f was already 1. + */ + v = 0; + r = get_rng_u64(rng); + r &= ~((uint64_t)1 << 63); + for (k = 1; k < (uint32_t)((sizeof gauss_1024_12289) + / (sizeof gauss_1024_12289[0])); k ++) { + uint32_t t; + + t = (uint32_t)((r - gauss_1024_12289[k]) >> 63) ^ 1; + v |= k & -(t & (f ^ 1)); + f |= t; + } + + /* + * We apply the sign ('neg' flag). If the value is zero, + * the sign has no effect. + */ + v = (v ^ -neg) + neg; + + /* + * Generated value is added to val. + */ + val += *(int32_t *)&v; + } + return val; +} + +/* + * The MAX_BL_SMALL[] and MAX_BL_LARGE[] contain the lengths, in 31-bit + * words, of intermediate values in the computation: + * + * MAX_BL_SMALL[depth]: length for the input f and g at that depth + * MAX_BL_LARGE[depth]: length for the unreduced F and G at that depth + * + * Rules: + * + * - Within an array, values grow. + * + * - The 'SMALL' array must have an entry for maximum depth, corresponding + * to the size of values used in the binary GCD. There is no such value + * for the 'LARGE' array (the binary GCD yields already reduced + * coefficients). + * + * - MAX_BL_LARGE[depth] >= MAX_BL_SMALL[depth + 1]. + * + * - Values must be large enough to handle the common cases, with some + * margins. + * + * - Values must not be "too large" either because we will convert some + * integers into floating-point values by considering the top 10 words, + * i.e. 310 bits; hence, for values of length more than 10 words, we + * should take care to have the length centered on the expected size. + * + * The following average lengths, in bits, have been measured on thousands + * of random keys (fg = max length of the absolute value of coefficients + * of f and g at that depth; FG = idem for the unreduced F and G; for the + * maximum depth, F and G are the output of binary GCD, multiplied by q; + * for each value, the average and standard deviation are provided). + * + * Binary case: + * depth: 10 fg: 6307.52 (24.48) FG: 6319.66 (24.51) + * depth: 9 fg: 3138.35 (12.25) FG: 9403.29 (27.55) + * depth: 8 fg: 1576.87 ( 7.49) FG: 4703.30 (14.77) + * depth: 7 fg: 794.17 ( 4.98) FG: 2361.84 ( 9.31) + * depth: 6 fg: 400.67 ( 3.10) FG: 1188.68 ( 6.04) + * depth: 5 fg: 202.22 ( 1.87) FG: 599.81 ( 3.87) + * depth: 4 fg: 101.62 ( 1.02) FG: 303.49 ( 2.38) + * depth: 3 fg: 50.37 ( 0.53) FG: 153.65 ( 1.39) + * depth: 2 fg: 24.07 ( 0.25) FG: 78.20 ( 0.73) + * depth: 1 fg: 10.99 ( 0.08) FG: 39.82 ( 0.41) + * depth: 0 fg: 4.00 ( 0.00) FG: 19.61 ( 0.49) + * + * Integers are actually represented either in binary notation over + * 31-bit words (signed, using two's complement), or in RNS, modulo + * many small primes. These small primes are close to, but slightly + * lower than, 2^31. Use of RNS loses less than two bits, even for + * the largest values. + * + * IMPORTANT: if these values are modified, then the temporary buffer + * sizes (FALCON_KEYGEN_TEMP_*, in inner.h) must be recomputed + * accordingly. + */ + +static const size_t MAX_BL_SMALL[] = { + 1, 1, 2, 2, 4, 7, 14, 27, 53, 106, 209 +}; + +static const size_t MAX_BL_LARGE[] = { + 2, 2, 5, 7, 12, 21, 40, 78, 157, 308 +}; + +/* + * Average and standard deviation for the maximum size (in bits) of + * coefficients of (f,g), depending on depth. These values are used + * to compute bounds for Babai's reduction. + */ +static const struct { + int avg; + int std; +} BITLENGTH[] = { + { 4, 0 }, + { 11, 1 }, + { 24, 1 }, + { 50, 1 }, + { 102, 1 }, + { 202, 2 }, + { 401, 4 }, + { 794, 5 }, + { 1577, 8 }, + { 3138, 13 }, + { 6308, 25 } +}; + +/* + * Minimal recursion depth at which we rebuild intermediate values + * when reconstructing f and g. + */ +#define DEPTH_INT_FG 4 + +/* + * Compute squared norm of a short vector. Returned value is saturated to + * 2^32-1 if it is not lower than 2^31. + */ +static uint32_t +poly_small_sqnorm(const int8_t *f, unsigned logn) { + size_t n, u; + uint32_t s, ng; + + n = MKN(logn); + s = 0; + ng = 0; + for (u = 0; u < n; u ++) { + int32_t z; + + z = f[u]; + s += (uint32_t)(z * z); + ng |= s; + } + return s | -(ng >> 31); +} + +/* + * Align (upwards) the provided 'data' pointer with regards to 'base' + * so that the offset is a multiple of the size of 'fpr'. + */ +static fpr * +align_fpr(void *base, void *data) { + uint8_t *cb, *cd; + size_t k, km; + + cb = base; + cd = data; + k = (size_t)(cd - cb); + km = k % sizeof(fpr); + if (km) { + k += (sizeof(fpr)) - km; + } + return (fpr *)(cb + k); +} + +/* + * Align (upwards) the provided 'data' pointer with regards to 'base' + * so that the offset is a multiple of the size of 'uint32_t'. + */ +static uint32_t * +align_u32(void *base, void *data) { + uint8_t *cb, *cd; + size_t k, km; + + cb = base; + cd = data; + k = (size_t)(cd - cb); + km = k % sizeof(uint32_t); + if (km) { + k += (sizeof(uint32_t)) - km; + } + return (uint32_t *)(cb + k); +} + +/* + * Convert a small vector to floating point. + */ +static void +poly_small_to_fp(fpr *x, const int8_t *f, unsigned logn) { + size_t n, u; + + n = MKN(logn); + for (u = 0; u < n; u ++) { + x[u] = fpr_of(f[u]); + } +} + +/* + * Input: f,g of degree N = 2^logn; 'depth' is used only to get their + * individual length. + * + * Output: f',g' of degree N/2, with the length for 'depth+1'. + * + * Values are in RNS; input and/or output may also be in NTT. + */ +static void +make_fg_step(uint32_t *data, unsigned logn, unsigned depth, + int in_ntt, int out_ntt) { + size_t n, hn, u; + size_t slen, tlen; + uint32_t *fd, *gd, *fs, *gs, *gm, *igm, *t1; + const small_prime *primes; + + n = (size_t)1 << logn; + hn = n >> 1; + slen = MAX_BL_SMALL[depth]; + tlen = MAX_BL_SMALL[depth + 1]; + primes = PRIMES; + + /* + * Prepare room for the result. + */ + fd = data; + gd = fd + hn * tlen; + fs = gd + hn * tlen; + gs = fs + n * slen; + gm = gs + n * slen; + igm = gm + n; + t1 = igm + n; + memmove(fs, data, 2 * n * slen * sizeof * data); + + /* + * First slen words: we use the input values directly, and apply + * inverse NTT as we go. + */ + for (u = 0; u < slen; u ++) { + uint32_t p, p0i, R2; + size_t v; + uint32_t *x; + + p = primes[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + modp_mkgm2(gm, igm, logn, primes[u].g, p, p0i); + + for (v = 0, x = fs + u; v < n; v ++, x += slen) { + t1[v] = *x; + } + if (!in_ntt) { + modp_NTT2(t1, gm, logn, p, p0i); + } + for (v = 0, x = fd + u; v < hn; v ++, x += tlen) { + uint32_t w0, w1; + + w0 = t1[(v << 1) + 0]; + w1 = t1[(v << 1) + 1]; + *x = modp_montymul( + modp_montymul(w0, w1, p, p0i), R2, p, p0i); + } + if (in_ntt) { + modp_iNTT2_ext(fs + u, slen, igm, logn, p, p0i); + } + + for (v = 0, x = gs + u; v < n; v ++, x += slen) { + t1[v] = *x; + } + if (!in_ntt) { + modp_NTT2(t1, gm, logn, p, p0i); + } + for (v = 0, x = gd + u; v < hn; v ++, x += tlen) { + uint32_t w0, w1; + + w0 = t1[(v << 1) + 0]; + w1 = t1[(v << 1) + 1]; + *x = modp_montymul( + modp_montymul(w0, w1, p, p0i), R2, p, p0i); + } + if (in_ntt) { + modp_iNTT2_ext(gs + u, slen, igm, logn, p, p0i); + } + + if (!out_ntt) { + modp_iNTT2_ext(fd + u, tlen, igm, logn - 1, p, p0i); + modp_iNTT2_ext(gd + u, tlen, igm, logn - 1, p, p0i); + } + } + + /* + * Since the fs and gs words have been de-NTTized, we can use the + * CRT to rebuild the values. + */ + zint_rebuild_CRT(fs, slen, slen, n, primes, 1, gm); + zint_rebuild_CRT(gs, slen, slen, n, primes, 1, gm); + + /* + * Remaining words: use modular reductions to extract the values. + */ + for (u = slen; u < tlen; u ++) { + uint32_t p, p0i, R2, Rx; + size_t v; + uint32_t *x; + + p = primes[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + Rx = modp_Rx((unsigned)slen, p, p0i, R2); + modp_mkgm2(gm, igm, logn, primes[u].g, p, p0i); + for (v = 0, x = fs; v < n; v ++, x += slen) { + t1[v] = zint_mod_small_signed(x, slen, p, p0i, R2, Rx); + } + modp_NTT2(t1, gm, logn, p, p0i); + for (v = 0, x = fd + u; v < hn; v ++, x += tlen) { + uint32_t w0, w1; + + w0 = t1[(v << 1) + 0]; + w1 = t1[(v << 1) + 1]; + *x = modp_montymul( + modp_montymul(w0, w1, p, p0i), R2, p, p0i); + } + for (v = 0, x = gs; v < n; v ++, x += slen) { + t1[v] = zint_mod_small_signed(x, slen, p, p0i, R2, Rx); + } + modp_NTT2(t1, gm, logn, p, p0i); + for (v = 0, x = gd + u; v < hn; v ++, x += tlen) { + uint32_t w0, w1; + + w0 = t1[(v << 1) + 0]; + w1 = t1[(v << 1) + 1]; + *x = modp_montymul( + modp_montymul(w0, w1, p, p0i), R2, p, p0i); + } + + if (!out_ntt) { + modp_iNTT2_ext(fd + u, tlen, igm, logn - 1, p, p0i); + modp_iNTT2_ext(gd + u, tlen, igm, logn - 1, p, p0i); + } + } +} + +/* + * Compute f and g at a specific depth, in RNS notation. + * + * Returned values are stored in the data[] array, at slen words per integer. + * + * Conditions: + * 0 <= depth <= logn + * + * Space use in data[]: enough room for any two successive values (f', g', + * f and g). + */ +static void +make_fg(uint32_t *data, const int8_t *f, const int8_t *g, + unsigned logn, unsigned depth, int out_ntt) { + size_t n, u; + uint32_t *ft, *gt, p0; + unsigned d; + const small_prime *primes; + + n = MKN(logn); + ft = data; + gt = ft + n; + primes = PRIMES; + p0 = primes[0].p; + for (u = 0; u < n; u ++) { + ft[u] = modp_set(f[u], p0); + gt[u] = modp_set(g[u], p0); + } + + if (depth == 0 && out_ntt) { + uint32_t *gm, *igm; + uint32_t p, p0i; + + p = primes[0].p; + p0i = modp_ninv31(p); + gm = gt + n; + igm = gm + MKN(logn); + modp_mkgm2(gm, igm, logn, primes[0].g, p, p0i); + modp_NTT2(ft, gm, logn, p, p0i); + modp_NTT2(gt, gm, logn, p, p0i); + return; + } + + if (depth == 0) { + return; + } + if (depth == 1) { + make_fg_step(data, logn, 0, 0, out_ntt); + return; + } + make_fg_step(data, logn, 0, 0, 1); + for (d = 1; d + 1 < depth; d ++) { + make_fg_step(data, logn - d, d, 1, 1); + } + make_fg_step(data, logn - depth + 1, depth - 1, 1, out_ntt); +} + +/* + * Solving the NTRU equation, deepest level: compute the resultants of + * f and g with X^N+1, and use binary GCD. The F and G values are + * returned in tmp[]. + * + * Returned value: 1 on success, 0 on error. + */ +static int +solve_NTRU_deepest(unsigned logn_top, + const int8_t *f, const int8_t *g, uint32_t *tmp) { + size_t len; + uint32_t *Fp, *Gp, *fp, *gp, *t1, q; + const small_prime *primes; + + len = MAX_BL_SMALL[logn_top]; + primes = PRIMES; + + Fp = tmp; + Gp = Fp + len; + fp = Gp + len; + gp = fp + len; + t1 = gp + len; + + make_fg(fp, f, g, logn_top, logn_top, 0); + + /* + * We use the CRT to rebuild the resultants as big integers. + * There are two such big integers. The resultants are always + * nonnegative. + */ + zint_rebuild_CRT(fp, len, len, 2, primes, 0, t1); + + /* + * Apply the binary GCD. The zint_bezout() function works only + * if both inputs are odd. + * + * We can test on the result and return 0 because that would + * imply failure of the NTRU solving equation, and the (f,g) + * values will be abandoned in that case. + */ + if (!zint_bezout(Gp, Fp, fp, gp, len, t1)) { + return 0; + } + + /* + * Multiply the two values by the target value q. Values must + * fit in the destination arrays. + * We can again test on the returned words: a non-zero output + * of zint_mul_small() means that we exceeded our array + * capacity, and that implies failure and rejection of (f,g). + */ + q = 12289; + if (zint_mul_small(Fp, len, q) != 0 + || zint_mul_small(Gp, len, q) != 0) { + return 0; + } + + return 1; +} + +/* + * Solving the NTRU equation, intermediate level. Upon entry, the F and G + * from the previous level should be in the tmp[] array. + * This function MAY be invoked for the top-level (in which case depth = 0). + * + * Returned value: 1 on success, 0 on error. + */ +static int +solve_NTRU_intermediate(unsigned logn_top, + const int8_t *f, const int8_t *g, unsigned depth, uint32_t *tmp) { + /* + * In this function, 'logn' is the log2 of the degree for + * this step. If N = 2^logn, then: + * - the F and G values already in fk->tmp (from the deeper + * levels) have degree N/2; + * - this function should return F and G of degree N. + */ + unsigned logn; + size_t n, hn, slen, dlen, llen, rlen, FGlen, u; + uint32_t *Fd, *Gd, *Ft, *Gt, *ft, *gt, *t1; + fpr *rt1, *rt2, *rt3, *rt4, *rt5; + int scale_fg, minbl_fg, maxbl_fg, maxbl_FG, scale_k; + uint32_t *x, *y; + int32_t *k; + const small_prime *primes; + + logn = logn_top - depth; + n = (size_t)1 << logn; + hn = n >> 1; + + /* + * slen = size for our input f and g; also size of the reduced + * F and G we return (degree N) + * + * dlen = size of the F and G obtained from the deeper level + * (degree N/2 or N/3) + * + * llen = size for intermediary F and G before reduction (degree N) + * + * We build our non-reduced F and G as two independent halves each, + * of degree N/2 (F = F0 + X*F1, G = G0 + X*G1). + */ + slen = MAX_BL_SMALL[depth]; + dlen = MAX_BL_SMALL[depth + 1]; + llen = MAX_BL_LARGE[depth]; + primes = PRIMES; + + /* + * Fd and Gd are the F and G from the deeper level. + */ + Fd = tmp; + Gd = Fd + dlen * hn; + + /* + * Compute the input f and g for this level. Note that we get f + * and g in RNS + NTT representation. + */ + ft = Gd + dlen * hn; + make_fg(ft, f, g, logn_top, depth, 1); + + /* + * Move the newly computed f and g to make room for our candidate + * F and G (unreduced). + */ + Ft = tmp; + Gt = Ft + n * llen; + t1 = Gt + n * llen; + memmove(t1, ft, 2 * n * slen * sizeof * ft); + ft = t1; + gt = ft + slen * n; + t1 = gt + slen * n; + + /* + * Move Fd and Gd _after_ f and g. + */ + memmove(t1, Fd, 2 * hn * dlen * sizeof * Fd); + Fd = t1; + Gd = Fd + hn * dlen; + + /* + * We reduce Fd and Gd modulo all the small primes we will need, + * and store the values in Ft and Gt (only n/2 values in each). + */ + for (u = 0; u < llen; u ++) { + uint32_t p, p0i, R2, Rx; + size_t v; + uint32_t *xs, *ys, *xd, *yd; + + p = primes[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + Rx = modp_Rx((unsigned)dlen, p, p0i, R2); + for (v = 0, xs = Fd, ys = Gd, xd = Ft + u, yd = Gt + u; + v < hn; + v ++, xs += dlen, ys += dlen, xd += llen, yd += llen) { + *xd = zint_mod_small_signed(xs, dlen, p, p0i, R2, Rx); + *yd = zint_mod_small_signed(ys, dlen, p, p0i, R2, Rx); + } + } + + /* + * We do not need Fd and Gd after that point. + */ + + /* + * Compute our F and G modulo sufficiently many small primes. + */ + for (u = 0; u < llen; u ++) { + uint32_t p, p0i, R2; + uint32_t *gm, *igm, *fx, *gx, *Fp, *Gp; + size_t v; + + /* + * All computations are done modulo p. + */ + p = primes[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + + /* + * If we processed slen words, then f and g have been + * de-NTTized, and are in RNS; we can rebuild them. + */ + if (u == slen) { + zint_rebuild_CRT(ft, slen, slen, n, primes, 1, t1); + zint_rebuild_CRT(gt, slen, slen, n, primes, 1, t1); + } + + gm = t1; + igm = gm + n; + fx = igm + n; + gx = fx + n; + + modp_mkgm2(gm, igm, logn, primes[u].g, p, p0i); + + if (u < slen) { + for (v = 0, x = ft + u, y = gt + u; + v < n; v ++, x += slen, y += slen) { + fx[v] = *x; + gx[v] = *y; + } + modp_iNTT2_ext(ft + u, slen, igm, logn, p, p0i); + modp_iNTT2_ext(gt + u, slen, igm, logn, p, p0i); + } else { + uint32_t Rx; + + Rx = modp_Rx((unsigned)slen, p, p0i, R2); + for (v = 0, x = ft, y = gt; + v < n; v ++, x += slen, y += slen) { + fx[v] = zint_mod_small_signed(x, slen, + p, p0i, R2, Rx); + gx[v] = zint_mod_small_signed(y, slen, + p, p0i, R2, Rx); + } + modp_NTT2(fx, gm, logn, p, p0i); + modp_NTT2(gx, gm, logn, p, p0i); + } + + /* + * Get F' and G' modulo p and in NTT representation + * (they have degree n/2). These values were computed in + * a previous step, and stored in Ft and Gt. + */ + Fp = gx + n; + Gp = Fp + hn; + for (v = 0, x = Ft + u, y = Gt + u; + v < hn; v ++, x += llen, y += llen) { + Fp[v] = *x; + Gp[v] = *y; + } + modp_NTT2(Fp, gm, logn - 1, p, p0i); + modp_NTT2(Gp, gm, logn - 1, p, p0i); + + /* + * Compute our F and G modulo p. + * + * General case: + * + * we divide degree by d = 2 or 3 + * f'(x^d) = N(f)(x^d) = f * adj(f) + * g'(x^d) = N(g)(x^d) = g * adj(g) + * f'*G' - g'*F' = q + * F = F'(x^d) * adj(g) + * G = G'(x^d) * adj(f) + * + * We compute things in the NTT. We group roots of phi + * such that all roots x in a group share the same x^d. + * If the roots in a group are x_1, x_2... x_d, then: + * + * N(f)(x_1^d) = f(x_1)*f(x_2)*...*f(x_d) + * + * Thus, we have: + * + * G(x_1) = f(x_2)*f(x_3)*...*f(x_d)*G'(x_1^d) + * G(x_2) = f(x_1)*f(x_3)*...*f(x_d)*G'(x_1^d) + * ... + * G(x_d) = f(x_1)*f(x_2)*...*f(x_{d-1})*G'(x_1^d) + * + * In all cases, we can thus compute F and G in NTT + * representation by a few simple multiplications. + * Moreover, in our chosen NTT representation, roots + * from the same group are consecutive in RAM. + */ + for (v = 0, x = Ft + u, y = Gt + u; v < hn; + v ++, x += (llen << 1), y += (llen << 1)) { + uint32_t ftA, ftB, gtA, gtB; + uint32_t mFp, mGp; + + ftA = fx[(v << 1) + 0]; + ftB = fx[(v << 1) + 1]; + gtA = gx[(v << 1) + 0]; + gtB = gx[(v << 1) + 1]; + mFp = modp_montymul(Fp[v], R2, p, p0i); + mGp = modp_montymul(Gp[v], R2, p, p0i); + x[0] = modp_montymul(gtB, mFp, p, p0i); + x[llen] = modp_montymul(gtA, mFp, p, p0i); + y[0] = modp_montymul(ftB, mGp, p, p0i); + y[llen] = modp_montymul(ftA, mGp, p, p0i); + } + modp_iNTT2_ext(Ft + u, llen, igm, logn, p, p0i); + modp_iNTT2_ext(Gt + u, llen, igm, logn, p, p0i); + } + + /* + * Rebuild F and G with the CRT. + */ + zint_rebuild_CRT(Ft, llen, llen, n, primes, 1, t1); + zint_rebuild_CRT(Gt, llen, llen, n, primes, 1, t1); + + /* + * At that point, Ft, Gt, ft and gt are consecutive in RAM (in that + * order). + */ + + /* + * Apply Babai reduction to bring back F and G to size slen. + * + * We use the FFT to compute successive approximations of the + * reduction coefficient. We first isolate the top bits of + * the coefficients of f and g, and convert them to floating + * point; with the FFT, we compute adj(f), adj(g), and + * 1/(f*adj(f)+g*adj(g)). + * + * Then, we repeatedly apply the following: + * + * - Get the top bits of the coefficients of F and G into + * floating point, and use the FFT to compute: + * (F*adj(f)+G*adj(g))/(f*adj(f)+g*adj(g)) + * + * - Convert back that value into normal representation, and + * round it to the nearest integers, yielding a polynomial k. + * Proper scaling is applied to f, g, F and G so that the + * coefficients fit on 32 bits (signed). + * + * - Subtract k*f from F and k*g from G. + * + * Under normal conditions, this process reduces the size of F + * and G by some bits at each iteration. For constant-time + * operation, we do not want to measure the actual length of + * F and G; instead, we do the following: + * + * - f and g are converted to floating-point, with some scaling + * if necessary to keep values in the representable range. + * + * - For each iteration, we _assume_ a maximum size for F and G, + * and use the values at that size. If we overreach, then + * we get zeros, which is harmless: the resulting coefficients + * of k will be 0 and the value won't be reduced. + * + * - We conservatively assume that F and G will be reduced by + * at least 25 bits at each iteration. + * + * Even when reaching the bottom of the reduction, reduction + * coefficient will remain low. If it goes out-of-range, then + * something wrong occurred and the whole NTRU solving fails. + */ + + /* + * Memory layout: + * - We need to compute and keep adj(f), adj(g), and + * 1/(f*adj(f)+g*adj(g)) (sizes N, N and N/2 fp numbers, + * respectively). + * - At each iteration we need two extra fp buffer (N fp values), + * and produce a k (N 32-bit words). k will be shared with one + * of the fp buffers. + * - To compute k*f and k*g efficiently (with the NTT), we need + * some extra room; we reuse the space of the temporary buffers. + * + * Arrays of 'fpr' are obtained from the temporary array itself. + * We ensure that the base is at a properly aligned offset (the + * source array tmp[] is supposed to be already aligned). + */ + + rt3 = align_fpr(tmp, t1); + rt4 = rt3 + n; + rt5 = rt4 + n; + rt1 = rt5 + (n >> 1); + k = (int32_t *)align_u32(tmp, rt1); + rt2 = align_fpr(tmp, k + n); + if (rt2 < (rt1 + n)) { + rt2 = rt1 + n; + } + t1 = (uint32_t *)k + n; + + /* + * Get f and g into rt3 and rt4 as floating-point approximations. + * + * We need to "scale down" the floating-point representation of + * coefficients when they are too big. We want to keep the value + * below 2^310 or so. Thus, when values are larger than 10 words, + * we consider only the top 10 words. Array lengths have been + * computed so that average maximum length will fall in the + * middle or the upper half of these top 10 words. + */ + rlen = slen; + if (rlen > 10) { + rlen = 10; + } + poly_big_to_fp(rt3, ft + slen - rlen, rlen, slen, logn); + poly_big_to_fp(rt4, gt + slen - rlen, rlen, slen, logn); + + /* + * Values in rt3 and rt4 are downscaled by 2^(scale_fg). + */ + scale_fg = 31 * (int)(slen - rlen); + + /* + * Estimated boundaries for the maximum size (in bits) of the + * coefficients of (f,g). We use the measured average, and + * allow for a deviation of at most six times the standard + * deviation. + */ + minbl_fg = BITLENGTH[depth].avg - 6 * BITLENGTH[depth].std; + maxbl_fg = BITLENGTH[depth].avg + 6 * BITLENGTH[depth].std; + + /* + * Compute 1/(f*adj(f)+g*adj(g)) in rt5. We also keep adj(f) + * and adj(g) in rt3 and rt4, respectively. + */ + PQCLEAN_FALCON512_CLEAN_FFT(rt3, logn); + PQCLEAN_FALCON512_CLEAN_FFT(rt4, logn); + PQCLEAN_FALCON512_CLEAN_poly_invnorm2_fft(rt5, rt3, rt4, logn); + PQCLEAN_FALCON512_CLEAN_poly_adj_fft(rt3, logn); + PQCLEAN_FALCON512_CLEAN_poly_adj_fft(rt4, logn); + + /* + * Reduce F and G repeatedly. + * + * The expected maximum bit length of coefficients of F and G + * is kept in maxbl_FG, with the corresponding word length in + * FGlen. + */ + FGlen = llen; + maxbl_FG = 31 * (int)llen; + + /* + * Each reduction operation computes the reduction polynomial + * "k". We need that polynomial to have coefficients that fit + * on 32-bit signed integers, with some scaling; thus, we use + * a descending sequence of scaling values, down to zero. + * + * The size of the coefficients of k is (roughly) the difference + * between the size of the coefficients of (F,G) and the size + * of the coefficients of (f,g). Thus, the maximum size of the + * coefficients of k is, at the start, maxbl_FG - minbl_fg; + * this is our starting scale value for k. + * + * We need to estimate the size of (F,G) during the execution of + * the algorithm; we are allowed some overestimation but not too + * much (poly_big_to_fp() uses a 310-bit window). Generally + * speaking, after applying a reduction with k scaled to + * scale_k, the size of (F,G) will be size(f,g) + scale_k + dd, + * where 'dd' is a few bits to account for the fact that the + * reduction is never perfect (intuitively, dd is on the order + * of sqrt(N), so at most 5 bits; we here allow for 10 extra + * bits). + * + * The size of (f,g) is not known exactly, but maxbl_fg is an + * upper bound. + */ + scale_k = maxbl_FG - minbl_fg; + + for (;;) { + int scale_FG, dc, new_maxbl_FG; + uint32_t scl, sch; + fpr pdc, pt; + + /* + * Convert current F and G into floating-point. We apply + * scaling if the current length is more than 10 words. + */ + rlen = FGlen; + if (rlen > 10) { + rlen = 10; + } + scale_FG = 31 * (int)(FGlen - rlen); + poly_big_to_fp(rt1, Ft + FGlen - rlen, rlen, llen, logn); + poly_big_to_fp(rt2, Gt + FGlen - rlen, rlen, llen, logn); + + /* + * Compute (F*adj(f)+G*adj(g))/(f*adj(f)+g*adj(g)) in rt2. + */ + PQCLEAN_FALCON512_CLEAN_FFT(rt1, logn); + PQCLEAN_FALCON512_CLEAN_FFT(rt2, logn); + PQCLEAN_FALCON512_CLEAN_poly_mul_fft(rt1, rt3, logn); + PQCLEAN_FALCON512_CLEAN_poly_mul_fft(rt2, rt4, logn); + PQCLEAN_FALCON512_CLEAN_poly_add(rt2, rt1, logn); + PQCLEAN_FALCON512_CLEAN_poly_mul_autoadj_fft(rt2, rt5, logn); + PQCLEAN_FALCON512_CLEAN_iFFT(rt2, logn); + + /* + * (f,g) are scaled by 'scale_fg', meaning that the + * numbers in rt3/rt4 should be multiplied by 2^(scale_fg) + * to have their true mathematical value. + * + * (F,G) are similarly scaled by 'scale_FG'. Therefore, + * the value we computed in rt2 is scaled by + * 'scale_FG-scale_fg'. + * + * We want that value to be scaled by 'scale_k', hence we + * apply a corrective scaling. After scaling, the values + * should fit in -2^31-1..+2^31-1. + */ + dc = scale_k - scale_FG + scale_fg; + + /* + * We will need to multiply values by 2^(-dc). The value + * 'dc' is not secret, so we can compute 2^(-dc) with a + * non-constant-time process. + * (We could use ldexp(), but we prefer to avoid any + * dependency on libm. When using FP emulation, we could + * use our fpr_ldexp(), which is constant-time.) + */ + if (dc < 0) { + dc = -dc; + pt = fpr_two; + } else { + pt = fpr_onehalf; + } + pdc = fpr_one; + while (dc != 0) { + if ((dc & 1) != 0) { + pdc = fpr_mul(pdc, pt); + } + dc >>= 1; + pt = fpr_sqr(pt); + } + + for (u = 0; u < n; u ++) { + fpr xv; + + xv = fpr_mul(rt2[u], pdc); + + /* + * Sometimes the values can be out-of-bounds if + * the algorithm fails; we must not call + * fpr_rint() (and cast to int32_t) if the value + * is not in-bounds. Note that the test does not + * break constant-time discipline, since any + * failure here implies that we discard the current + * secret key (f,g). + */ + if (!fpr_lt(fpr_mtwo31m1, xv) + || !fpr_lt(xv, fpr_ptwo31m1)) { + return 0; + } + k[u] = (int32_t)fpr_rint(xv); + } + + /* + * Values in k[] are integers. They really are scaled + * down by maxbl_FG - minbl_fg bits. + * + * If we are at low depth, then we use the NTT to + * compute k*f and k*g. + */ + sch = (uint32_t)(scale_k / 31); + scl = (uint32_t)(scale_k % 31); + if (depth <= DEPTH_INT_FG) { + poly_sub_scaled_ntt(Ft, FGlen, llen, ft, slen, slen, + k, sch, scl, logn, t1); + poly_sub_scaled_ntt(Gt, FGlen, llen, gt, slen, slen, + k, sch, scl, logn, t1); + } else { + poly_sub_scaled(Ft, FGlen, llen, ft, slen, slen, + k, sch, scl, logn); + poly_sub_scaled(Gt, FGlen, llen, gt, slen, slen, + k, sch, scl, logn); + } + + /* + * We compute the new maximum size of (F,G), assuming that + * (f,g) has _maximal_ length (i.e. that reduction is + * "late" instead of "early". We also adjust FGlen + * accordingly. + */ + new_maxbl_FG = scale_k + maxbl_fg + 10; + if (new_maxbl_FG < maxbl_FG) { + maxbl_FG = new_maxbl_FG; + if ((int)FGlen * 31 >= maxbl_FG + 31) { + FGlen --; + } + } + + /* + * We suppose that scaling down achieves a reduction by + * at least 25 bits per iteration. We stop when we have + * done the loop with an unscaled k. + */ + if (scale_k <= 0) { + break; + } + scale_k -= 25; + if (scale_k < 0) { + scale_k = 0; + } + } + + /* + * If (F,G) length was lowered below 'slen', then we must take + * care to re-extend the sign. + */ + if (FGlen < slen) { + for (u = 0; u < n; u ++, Ft += llen, Gt += llen) { + size_t v; + uint32_t sw; + + sw = -(Ft[FGlen - 1] >> 30) >> 1; + for (v = FGlen; v < slen; v ++) { + Ft[v] = sw; + } + sw = -(Gt[FGlen - 1] >> 30) >> 1; + for (v = FGlen; v < slen; v ++) { + Gt[v] = sw; + } + } + } + + /* + * Compress encoding of all values to 'slen' words (this is the + * expected output format). + */ + for (u = 0, x = tmp, y = tmp; + u < (n << 1); u ++, x += slen, y += llen) { + memmove(x, y, slen * sizeof * y); + } + return 1; +} + +/* + * Solving the NTRU equation, binary case, depth = 1. Upon entry, the + * F and G from the previous level should be in the tmp[] array. + * + * Returned value: 1 on success, 0 on error. + */ +static int +solve_NTRU_binary_depth1(unsigned logn_top, + const int8_t *f, const int8_t *g, uint32_t *tmp) { + /* + * The first half of this function is a copy of the corresponding + * part in solve_NTRU_intermediate(), for the reconstruction of + * the unreduced F and G. The second half (Babai reduction) is + * done differently, because the unreduced F and G fit in 53 bits + * of precision, allowing a much simpler process with lower RAM + * usage. + */ + unsigned depth, logn; + size_t n_top, n, hn, slen, dlen, llen, u; + uint32_t *Fd, *Gd, *Ft, *Gt, *ft, *gt, *t1; + fpr *rt1, *rt2, *rt3, *rt4, *rt5, *rt6; + uint32_t *x, *y; + + depth = 1; + n_top = (size_t)1 << logn_top; + logn = logn_top - depth; + n = (size_t)1 << logn; + hn = n >> 1; + + /* + * Equations are: + * + * f' = f0^2 - X^2*f1^2 + * g' = g0^2 - X^2*g1^2 + * F' and G' are a solution to f'G' - g'F' = q (from deeper levels) + * F = F'*(g0 - X*g1) + * G = G'*(f0 - X*f1) + * + * f0, f1, g0, g1, f', g', F' and G' are all "compressed" to + * degree N/2 (their odd-indexed coefficients are all zero). + */ + + /* + * slen = size for our input f and g; also size of the reduced + * F and G we return (degree N) + * + * dlen = size of the F and G obtained from the deeper level + * (degree N/2) + * + * llen = size for intermediary F and G before reduction (degree N) + * + * We build our non-reduced F and G as two independent halves each, + * of degree N/2 (F = F0 + X*F1, G = G0 + X*G1). + */ + slen = MAX_BL_SMALL[depth]; + dlen = MAX_BL_SMALL[depth + 1]; + llen = MAX_BL_LARGE[depth]; + + /* + * Fd and Gd are the F and G from the deeper level. Ft and Gt + * are the destination arrays for the unreduced F and G. + */ + Fd = tmp; + Gd = Fd + dlen * hn; + Ft = Gd + dlen * hn; + Gt = Ft + llen * n; + + /* + * We reduce Fd and Gd modulo all the small primes we will need, + * and store the values in Ft and Gt. + */ + for (u = 0; u < llen; u ++) { + uint32_t p, p0i, R2, Rx; + size_t v; + uint32_t *xs, *ys, *xd, *yd; + + p = PRIMES[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + Rx = modp_Rx((unsigned)dlen, p, p0i, R2); + for (v = 0, xs = Fd, ys = Gd, xd = Ft + u, yd = Gt + u; + v < hn; + v ++, xs += dlen, ys += dlen, xd += llen, yd += llen) { + *xd = zint_mod_small_signed(xs, dlen, p, p0i, R2, Rx); + *yd = zint_mod_small_signed(ys, dlen, p, p0i, R2, Rx); + } + } + + /* + * Now Fd and Gd are not needed anymore; we can squeeze them out. + */ + memmove(tmp, Ft, llen * n * sizeof(uint32_t)); + Ft = tmp; + memmove(Ft + llen * n, Gt, llen * n * sizeof(uint32_t)); + Gt = Ft + llen * n; + ft = Gt + llen * n; + gt = ft + slen * n; + + t1 = gt + slen * n; + + /* + * Compute our F and G modulo sufficiently many small primes. + */ + for (u = 0; u < llen; u ++) { + uint32_t p, p0i, R2; + uint32_t *gm, *igm, *fx, *gx, *Fp, *Gp; + unsigned e; + size_t v; + + /* + * All computations are done modulo p. + */ + p = PRIMES[u].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + + /* + * We recompute things from the source f and g, of full + * degree. However, we will need only the n first elements + * of the inverse NTT table (igm); the call to modp_mkgm() + * below will fill n_top elements in igm[] (thus overflowing + * into fx[]) but later code will overwrite these extra + * elements. + */ + gm = t1; + igm = gm + n_top; + fx = igm + n; + gx = fx + n_top; + modp_mkgm2(gm, igm, logn_top, PRIMES[u].g, p, p0i); + + /* + * Set ft and gt to f and g modulo p, respectively. + */ + for (v = 0; v < n_top; v ++) { + fx[v] = modp_set(f[v], p); + gx[v] = modp_set(g[v], p); + } + + /* + * Convert to NTT and compute our f and g. + */ + modp_NTT2(fx, gm, logn_top, p, p0i); + modp_NTT2(gx, gm, logn_top, p, p0i); + for (e = logn_top; e > logn; e --) { + modp_poly_rec_res(fx, e, p, p0i, R2); + modp_poly_rec_res(gx, e, p, p0i, R2); + } + + /* + * From that point onward, we only need tables for + * degree n, so we can save some space. + */ + if (depth > 0) { /* always true */ + memmove(gm + n, igm, n * sizeof * igm); + igm = gm + n; + memmove(igm + n, fx, n * sizeof * ft); + fx = igm + n; + memmove(fx + n, gx, n * sizeof * gt); + gx = fx + n; + } + + /* + * Get F' and G' modulo p and in NTT representation + * (they have degree n/2). These values were computed + * in a previous step, and stored in Ft and Gt. + */ + Fp = gx + n; + Gp = Fp + hn; + for (v = 0, x = Ft + u, y = Gt + u; + v < hn; v ++, x += llen, y += llen) { + Fp[v] = *x; + Gp[v] = *y; + } + modp_NTT2(Fp, gm, logn - 1, p, p0i); + modp_NTT2(Gp, gm, logn - 1, p, p0i); + + /* + * Compute our F and G modulo p. + * + * Equations are: + * + * f'(x^2) = N(f)(x^2) = f * adj(f) + * g'(x^2) = N(g)(x^2) = g * adj(g) + * + * f'*G' - g'*F' = q + * + * F = F'(x^2) * adj(g) + * G = G'(x^2) * adj(f) + * + * The NTT representation of f is f(w) for all w which + * are roots of phi. In the binary case, as well as in + * the ternary case for all depth except the deepest, + * these roots can be grouped in pairs (w,-w), and we + * then have: + * + * f(w) = adj(f)(-w) + * f(-w) = adj(f)(w) + * + * and w^2 is then a root for phi at the half-degree. + * + * At the deepest level in the ternary case, this still + * holds, in the following sense: the roots of x^2-x+1 + * are (w,-w^2) (for w^3 = -1, and w != -1), and we + * have: + * + * f(w) = adj(f)(-w^2) + * f(-w^2) = adj(f)(w) + * + * In all case, we can thus compute F and G in NTT + * representation by a few simple multiplications. + * Moreover, the two roots for each pair are consecutive + * in our bit-reversal encoding. + */ + for (v = 0, x = Ft + u, y = Gt + u; + v < hn; v ++, x += (llen << 1), y += (llen << 1)) { + uint32_t ftA, ftB, gtA, gtB; + uint32_t mFp, mGp; + + ftA = fx[(v << 1) + 0]; + ftB = fx[(v << 1) + 1]; + gtA = gx[(v << 1) + 0]; + gtB = gx[(v << 1) + 1]; + mFp = modp_montymul(Fp[v], R2, p, p0i); + mGp = modp_montymul(Gp[v], R2, p, p0i); + x[0] = modp_montymul(gtB, mFp, p, p0i); + x[llen] = modp_montymul(gtA, mFp, p, p0i); + y[0] = modp_montymul(ftB, mGp, p, p0i); + y[llen] = modp_montymul(ftA, mGp, p, p0i); + } + modp_iNTT2_ext(Ft + u, llen, igm, logn, p, p0i); + modp_iNTT2_ext(Gt + u, llen, igm, logn, p, p0i); + + /* + * Also save ft and gt (only up to size slen). + */ + if (u < slen) { + modp_iNTT2(fx, igm, logn, p, p0i); + modp_iNTT2(gx, igm, logn, p, p0i); + for (v = 0, x = ft + u, y = gt + u; + v < n; v ++, x += slen, y += slen) { + *x = fx[v]; + *y = gx[v]; + } + } + } + + /* + * Rebuild f, g, F and G with the CRT. Note that the elements of F + * and G are consecutive, and thus can be rebuilt in a single + * loop; similarly, the elements of f and g are consecutive. + */ + zint_rebuild_CRT(Ft, llen, llen, n << 1, PRIMES, 1, t1); + zint_rebuild_CRT(ft, slen, slen, n << 1, PRIMES, 1, t1); + + /* + * Here starts the Babai reduction, specialized for depth = 1. + * + * Candidates F and G (from Ft and Gt), and base f and g (ft and gt), + * are converted to floating point. There is no scaling, and a + * single pass is sufficient. + */ + + /* + * Convert F and G into floating point (rt1 and rt2). + */ + rt1 = align_fpr(tmp, gt + slen * n); + rt2 = rt1 + n; + poly_big_to_fp(rt1, Ft, llen, llen, logn); + poly_big_to_fp(rt2, Gt, llen, llen, logn); + + /* + * Integer representation of F and G is no longer needed, we + * can remove it. + */ + memmove(tmp, ft, 2 * slen * n * sizeof * ft); + ft = tmp; + gt = ft + slen * n; + rt3 = align_fpr(tmp, gt + slen * n); + memmove(rt3, rt1, 2 * n * sizeof * rt1); + rt1 = rt3; + rt2 = rt1 + n; + rt3 = rt2 + n; + rt4 = rt3 + n; + + /* + * Convert f and g into floating point (rt3 and rt4). + */ + poly_big_to_fp(rt3, ft, slen, slen, logn); + poly_big_to_fp(rt4, gt, slen, slen, logn); + + /* + * Remove unneeded ft and gt. + */ + memmove(tmp, rt1, 4 * n * sizeof * rt1); + rt1 = (fpr *)tmp; + rt2 = rt1 + n; + rt3 = rt2 + n; + rt4 = rt3 + n; + + /* + * We now have: + * rt1 = F + * rt2 = G + * rt3 = f + * rt4 = g + * in that order in RAM. We convert all of them to FFT. + */ + PQCLEAN_FALCON512_CLEAN_FFT(rt1, logn); + PQCLEAN_FALCON512_CLEAN_FFT(rt2, logn); + PQCLEAN_FALCON512_CLEAN_FFT(rt3, logn); + PQCLEAN_FALCON512_CLEAN_FFT(rt4, logn); + + /* + * Compute: + * rt5 = F*adj(f) + G*adj(g) + * rt6 = 1 / (f*adj(f) + g*adj(g)) + * (Note that rt6 is half-length.) + */ + rt5 = rt4 + n; + rt6 = rt5 + n; + PQCLEAN_FALCON512_CLEAN_poly_add_muladj_fft(rt5, rt1, rt2, rt3, rt4, logn); + PQCLEAN_FALCON512_CLEAN_poly_invnorm2_fft(rt6, rt3, rt4, logn); + + /* + * Compute: + * rt5 = (F*adj(f)+G*adj(g)) / (f*adj(f)+g*adj(g)) + */ + PQCLEAN_FALCON512_CLEAN_poly_mul_autoadj_fft(rt5, rt6, logn); + + /* + * Compute k as the rounded version of rt5. Check that none of + * the values is larger than 2^63-1 (in absolute value) + * because that would make the fpr_rint() do something undefined; + * note that any out-of-bounds value here implies a failure and + * (f,g) will be discarded, so we can make a simple test. + */ + PQCLEAN_FALCON512_CLEAN_iFFT(rt5, logn); + for (u = 0; u < n; u ++) { + fpr z; + + z = rt5[u]; + if (!fpr_lt(z, fpr_ptwo63m1) || !fpr_lt(fpr_mtwo63m1, z)) { + return 0; + } + rt5[u] = fpr_of(fpr_rint(z)); + } + PQCLEAN_FALCON512_CLEAN_FFT(rt5, logn); + + /* + * Subtract k*f from F, and k*g from G. + */ + PQCLEAN_FALCON512_CLEAN_poly_mul_fft(rt3, rt5, logn); + PQCLEAN_FALCON512_CLEAN_poly_mul_fft(rt4, rt5, logn); + PQCLEAN_FALCON512_CLEAN_poly_sub(rt1, rt3, logn); + PQCLEAN_FALCON512_CLEAN_poly_sub(rt2, rt4, logn); + PQCLEAN_FALCON512_CLEAN_iFFT(rt1, logn); + PQCLEAN_FALCON512_CLEAN_iFFT(rt2, logn); + + /* + * Convert back F and G to integers, and return. + */ + Ft = tmp; + Gt = Ft + n; + rt3 = align_fpr(tmp, Gt + n); + memmove(rt3, rt1, 2 * n * sizeof * rt1); + rt1 = rt3; + rt2 = rt1 + n; + for (u = 0; u < n; u ++) { + Ft[u] = (uint32_t)fpr_rint(rt1[u]); + Gt[u] = (uint32_t)fpr_rint(rt2[u]); + } + + return 1; +} + +/* + * Solving the NTRU equation, top level. Upon entry, the F and G + * from the previous level should be in the tmp[] array. + * + * Returned value: 1 on success, 0 on error. + */ +static int +solve_NTRU_binary_depth0(unsigned logn, + const int8_t *f, const int8_t *g, uint32_t *tmp) { + size_t n, hn, u; + uint32_t p, p0i, R2; + uint32_t *Fp, *Gp, *t1, *t2, *t3, *t4, *t5; + uint32_t *gm, *igm, *ft, *gt; + fpr *rt2, *rt3; + + n = (size_t)1 << logn; + hn = n >> 1; + + /* + * Equations are: + * + * f' = f0^2 - X^2*f1^2 + * g' = g0^2 - X^2*g1^2 + * F' and G' are a solution to f'G' - g'F' = q (from deeper levels) + * F = F'*(g0 - X*g1) + * G = G'*(f0 - X*f1) + * + * f0, f1, g0, g1, f', g', F' and G' are all "compressed" to + * degree N/2 (their odd-indexed coefficients are all zero). + * + * Everything should fit in 31-bit integers, hence we can just use + * the first small prime p = 2147473409. + */ + p = PRIMES[0].p; + p0i = modp_ninv31(p); + R2 = modp_R2(p, p0i); + + Fp = tmp; + Gp = Fp + hn; + ft = Gp + hn; + gt = ft + n; + gm = gt + n; + igm = gm + n; + + modp_mkgm2(gm, igm, logn, PRIMES[0].g, p, p0i); + + /* + * Convert F' anf G' in NTT representation. + */ + for (u = 0; u < hn; u ++) { + Fp[u] = modp_set(zint_one_to_plain(Fp + u), p); + Gp[u] = modp_set(zint_one_to_plain(Gp + u), p); + } + modp_NTT2(Fp, gm, logn - 1, p, p0i); + modp_NTT2(Gp, gm, logn - 1, p, p0i); + + /* + * Load f and g and convert them to NTT representation. + */ + for (u = 0; u < n; u ++) { + ft[u] = modp_set(f[u], p); + gt[u] = modp_set(g[u], p); + } + modp_NTT2(ft, gm, logn, p, p0i); + modp_NTT2(gt, gm, logn, p, p0i); + + /* + * Build the unreduced F,G in ft and gt. + */ + for (u = 0; u < n; u += 2) { + uint32_t ftA, ftB, gtA, gtB; + uint32_t mFp, mGp; + + ftA = ft[u + 0]; + ftB = ft[u + 1]; + gtA = gt[u + 0]; + gtB = gt[u + 1]; + mFp = modp_montymul(Fp[u >> 1], R2, p, p0i); + mGp = modp_montymul(Gp[u >> 1], R2, p, p0i); + ft[u + 0] = modp_montymul(gtB, mFp, p, p0i); + ft[u + 1] = modp_montymul(gtA, mFp, p, p0i); + gt[u + 0] = modp_montymul(ftB, mGp, p, p0i); + gt[u + 1] = modp_montymul(ftA, mGp, p, p0i); + } + modp_iNTT2(ft, igm, logn, p, p0i); + modp_iNTT2(gt, igm, logn, p, p0i); + + Gp = Fp + n; + t1 = Gp + n; + memmove(Fp, ft, 2 * n * sizeof * ft); + + /* + * We now need to apply the Babai reduction. At that point, + * we have F and G in two n-word arrays. + * + * We can compute F*adj(f)+G*adj(g) and f*adj(f)+g*adj(g) + * modulo p, using the NTT. We still move memory around in + * order to save RAM. + */ + t2 = t1 + n; + t3 = t2 + n; + t4 = t3 + n; + t5 = t4 + n; + + /* + * Compute the NTT tables in t1 and t2. We do not keep t2 + * (we'll recompute it later on). + */ + modp_mkgm2(t1, t2, logn, PRIMES[0].g, p, p0i); + + /* + * Convert F and G to NTT. + */ + modp_NTT2(Fp, t1, logn, p, p0i); + modp_NTT2(Gp, t1, logn, p, p0i); + + /* + * Load f and adj(f) in t4 and t5, and convert them to NTT + * representation. + */ + t4[0] = t5[0] = modp_set(f[0], p); + for (u = 1; u < n; u ++) { + t4[u] = modp_set(f[u], p); + t5[n - u] = modp_set(-f[u], p); + } + modp_NTT2(t4, t1, logn, p, p0i); + modp_NTT2(t5, t1, logn, p, p0i); + + /* + * Compute F*adj(f) in t2, and f*adj(f) in t3. + */ + for (u = 0; u < n; u ++) { + uint32_t w; + + w = modp_montymul(t5[u], R2, p, p0i); + t2[u] = modp_montymul(w, Fp[u], p, p0i); + t3[u] = modp_montymul(w, t4[u], p, p0i); + } + + /* + * Load g and adj(g) in t4 and t5, and convert them to NTT + * representation. + */ + t4[0] = t5[0] = modp_set(g[0], p); + for (u = 1; u < n; u ++) { + t4[u] = modp_set(g[u], p); + t5[n - u] = modp_set(-g[u], p); + } + modp_NTT2(t4, t1, logn, p, p0i); + modp_NTT2(t5, t1, logn, p, p0i); + + /* + * Add G*adj(g) to t2, and g*adj(g) to t3. + */ + for (u = 0; u < n; u ++) { + uint32_t w; + + w = modp_montymul(t5[u], R2, p, p0i); + t2[u] = modp_add(t2[u], + modp_montymul(w, Gp[u], p, p0i), p); + t3[u] = modp_add(t3[u], + modp_montymul(w, t4[u], p, p0i), p); + } + + /* + * Convert back t2 and t3 to normal representation (normalized + * around 0), and then + * move them to t1 and t2. We first need to recompute the + * inverse table for NTT. + */ + modp_mkgm2(t1, t4, logn, PRIMES[0].g, p, p0i); + modp_iNTT2(t2, t4, logn, p, p0i); + modp_iNTT2(t3, t4, logn, p, p0i); + for (u = 0; u < n; u ++) { + t1[u] = (uint32_t)modp_norm(t2[u], p); + t2[u] = (uint32_t)modp_norm(t3[u], p); + } + + /* + * At that point, array contents are: + * + * F (NTT representation) (Fp) + * G (NTT representation) (Gp) + * F*adj(f)+G*adj(g) (t1) + * f*adj(f)+g*adj(g) (t2) + * + * We want to divide t1 by t2. The result is not integral; it + * must be rounded. We thus need to use the FFT. + */ + + /* + * Get f*adj(f)+g*adj(g) in FFT representation. Since this + * polynomial is auto-adjoint, all its coordinates in FFT + * representation are actually real, so we can truncate off + * the imaginary parts. + */ + rt3 = align_fpr(tmp, t3); + for (u = 0; u < n; u ++) { + rt3[u] = fpr_of(((int32_t *)t2)[u]); + } + PQCLEAN_FALCON512_CLEAN_FFT(rt3, logn); + rt2 = align_fpr(tmp, t2); + memmove(rt2, rt3, hn * sizeof * rt3); + + /* + * Convert F*adj(f)+G*adj(g) in FFT representation. + */ + rt3 = rt2 + hn; + for (u = 0; u < n; u ++) { + rt3[u] = fpr_of(((int32_t *)t1)[u]); + } + PQCLEAN_FALCON512_CLEAN_FFT(rt3, logn); + + /* + * Compute (F*adj(f)+G*adj(g))/(f*adj(f)+g*adj(g)) and get + * its rounded normal representation in t1. + */ + PQCLEAN_FALCON512_CLEAN_poly_div_autoadj_fft(rt3, rt2, logn); + PQCLEAN_FALCON512_CLEAN_iFFT(rt3, logn); + for (u = 0; u < n; u ++) { + t1[u] = modp_set((int32_t)fpr_rint(rt3[u]), p); + } + + /* + * RAM contents are now: + * + * F (NTT representation) (Fp) + * G (NTT representation) (Gp) + * k (t1) + * + * We want to compute F-k*f, and G-k*g. + */ + t2 = t1 + n; + t3 = t2 + n; + t4 = t3 + n; + t5 = t4 + n; + modp_mkgm2(t2, t3, logn, PRIMES[0].g, p, p0i); + for (u = 0; u < n; u ++) { + t4[u] = modp_set(f[u], p); + t5[u] = modp_set(g[u], p); + } + modp_NTT2(t1, t2, logn, p, p0i); + modp_NTT2(t4, t2, logn, p, p0i); + modp_NTT2(t5, t2, logn, p, p0i); + for (u = 0; u < n; u ++) { + uint32_t kw; + + kw = modp_montymul(t1[u], R2, p, p0i); + Fp[u] = modp_sub(Fp[u], + modp_montymul(kw, t4[u], p, p0i), p); + Gp[u] = modp_sub(Gp[u], + modp_montymul(kw, t5[u], p, p0i), p); + } + modp_iNTT2(Fp, t3, logn, p, p0i); + modp_iNTT2(Gp, t3, logn, p, p0i); + for (u = 0; u < n; u ++) { + Fp[u] = (uint32_t)modp_norm(Fp[u], p); + Gp[u] = (uint32_t)modp_norm(Gp[u], p); + } + + return 1; +} + +/* + * Solve the NTRU equation. Returned value is 1 on success, 0 on error. + * G can be NULL, in which case that value is computed but not returned. + * If any of the coefficients of F and G exceeds lim (in absolute value), + * then 0 is returned. + */ +static int +solve_NTRU(unsigned logn, int8_t *F, int8_t *G, + const int8_t *f, const int8_t *g, int lim, uint32_t *tmp) { + size_t n, u; + uint32_t *ft, *gt, *Ft, *Gt, *gm; + uint32_t p, p0i, r; + const small_prime *primes; + + n = MKN(logn); + + if (!solve_NTRU_deepest(logn, f, g, tmp)) { + return 0; + } + + /* + * For logn <= 2, we need to use solve_NTRU_intermediate() + * directly, because coefficients are a bit too large and + * do not fit the hypotheses in solve_NTRU_binary_depth0(). + */ + if (logn <= 2) { + unsigned depth; + + depth = logn; + while (depth -- > 0) { + if (!solve_NTRU_intermediate(logn, f, g, depth, tmp)) { + return 0; + } + } + } else { + unsigned depth; + + depth = logn; + while (depth -- > 2) { + if (!solve_NTRU_intermediate(logn, f, g, depth, tmp)) { + return 0; + } + } + if (!solve_NTRU_binary_depth1(logn, f, g, tmp)) { + return 0; + } + if (!solve_NTRU_binary_depth0(logn, f, g, tmp)) { + return 0; + } + } + + /* + * If no buffer has been provided for G, use a temporary one. + */ + if (G == NULL) { + G = (int8_t *)(tmp + 2 * n); + } + + /* + * Final F and G are in fk->tmp, one word per coefficient + * (signed value over 31 bits). + */ + if (!poly_big_to_small(F, tmp, lim, logn) + || !poly_big_to_small(G, tmp + n, lim, logn)) { + return 0; + } + + /* + * Verify that the NTRU equation is fulfilled. Since all elements + * have short lengths, verifying modulo a small prime p works, and + * allows using the NTT. + * + * We put Gt[] first in tmp[], and process it first, so that it does + * not overlap with G[] in case we allocated it ourselves. + */ + Gt = tmp; + ft = Gt + n; + gt = ft + n; + Ft = gt + n; + gm = Ft + n; + + primes = PRIMES; + p = primes[0].p; + p0i = modp_ninv31(p); + modp_mkgm2(gm, tmp, logn, primes[0].g, p, p0i); + for (u = 0; u < n; u ++) { + Gt[u] = modp_set(G[u], p); + } + for (u = 0; u < n; u ++) { + ft[u] = modp_set(f[u], p); + gt[u] = modp_set(g[u], p); + Ft[u] = modp_set(F[u], p); + } + modp_NTT2(ft, gm, logn, p, p0i); + modp_NTT2(gt, gm, logn, p, p0i); + modp_NTT2(Ft, gm, logn, p, p0i); + modp_NTT2(Gt, gm, logn, p, p0i); + r = modp_montymul(12289, 1, p, p0i); + for (u = 0; u < n; u ++) { + uint32_t z; + + z = modp_sub(modp_montymul(ft[u], Gt[u], p, p0i), + modp_montymul(gt[u], Ft[u], p, p0i), p); + if (z != r) { + return 0; + } + } + + return 1; +} + +/* + * Generate a random polynomial with a Gaussian distribution. This function + * also makes sure that the resultant of the polynomial with phi is odd. + */ +static void +poly_small_mkgauss(RNG_CONTEXT *rng, int8_t *f, unsigned logn) { + size_t n, u; + unsigned mod2; + + n = MKN(logn); + mod2 = 0; + for (u = 0; u < n; u ++) { + int s; + +restart: + s = mkgauss(rng, logn); + + /* + * We need the coefficient to fit within -127..+127; + * realistically, this is always the case except for + * the very low degrees (N = 2 or 4), for which there + * is no real security anyway. + */ + if (s < -127 || s > 127) { + goto restart; + } + + /* + * We need the sum of all coefficients to be 1; otherwise, + * the resultant of the polynomial with X^N+1 will be even, + * and the binary GCD will fail. + */ + if (u == n - 1) { + if ((mod2 ^ (unsigned)(s & 1)) == 0) { + goto restart; + } + } else { + mod2 ^= (unsigned)(s & 1); + } + f[u] = (int8_t)s; + } +} + +/* see falcon.h */ +void +PQCLEAN_FALCON512_CLEAN_keygen(inner_shake256_context *rng, + int8_t *f, int8_t *g, int8_t *F, int8_t *G, uint16_t *h, + unsigned logn, uint8_t *tmp) { + /* + * Algorithm is the following: + * + * - Generate f and g with the Gaussian distribution. + * + * - If either Res(f,phi) or Res(g,phi) is even, try again. + * + * - If ||(f,g)|| is too large, try again. + * + * - If ||B~_{f,g}|| is too large, try again. + * + * - If f is not invertible mod phi mod q, try again. + * + * - Compute h = g/f mod phi mod q. + * + * - Solve the NTRU equation fG - gF = q; if the solving fails, + * try again. Usual failure condition is when Res(f,phi) + * and Res(g,phi) are not prime to each other. + */ + size_t n, u; + uint16_t *h2, *tmp2; + RNG_CONTEXT *rc; + + n = MKN(logn); + rc = rng; + + /* + * We need to generate f and g randomly, until we find values + * such that the norm of (g,-f), and of the orthogonalized + * vector, are satisfying. The orthogonalized vector is: + * (q*adj(f)/(f*adj(f)+g*adj(g)), q*adj(g)/(f*adj(f)+g*adj(g))) + * (it is actually the (N+1)-th row of the Gram-Schmidt basis). + * + * In the binary case, coefficients of f and g are generated + * independently of each other, with a discrete Gaussian + * distribution of standard deviation 1.17*sqrt(q/(2*N)). Then, + * the two vectors have expected norm 1.17*sqrt(q), which is + * also our acceptance bound: we require both vectors to be no + * larger than that (this will be satisfied about 1/4th of the + * time, thus we expect sampling new (f,g) about 4 times for that + * step). + * + * We require that Res(f,phi) and Res(g,phi) are both odd (the + * NTRU equation solver requires it). + */ + for (;;) { + fpr *rt1, *rt2, *rt3; + fpr bnorm; + uint32_t normf, normg, norm; + int lim; + + /* + * The poly_small_mkgauss() function makes sure + * that the sum of coefficients is 1 modulo 2 + * (i.e. the resultant of the polynomial with phi + * will be odd). + */ + poly_small_mkgauss(rc, f, logn); + poly_small_mkgauss(rc, g, logn); + + /* + * Verify that all coefficients are within the bounds + * defined in max_fg_bits. This is the case with + * overwhelming probability; this guarantees that the + * key will be encodable with FALCON_COMP_TRIM. + */ + lim = 1 << (PQCLEAN_FALCON512_CLEAN_max_fg_bits[logn] - 1); + for (u = 0; u < n; u ++) { + /* + * We can use non-CT tests since on any failure + * we will discard f and g. + */ + if (f[u] >= lim || f[u] <= -lim + || g[u] >= lim || g[u] <= -lim) { + lim = -1; + break; + } + } + if (lim < 0) { + continue; + } + + /* + * Bound is 1.17*sqrt(q). We compute the squared + * norms. With q = 12289, the squared bound is: + * (1.17^2)* 12289 = 16822.4121 + * Since f and g are integral, the squared norm + * of (g,-f) is an integer. + */ + normf = poly_small_sqnorm(f, logn); + normg = poly_small_sqnorm(g, logn); + norm = (normf + normg) | -((normf | normg) >> 31); + if (norm >= 16823) { + continue; + } + + /* + * We compute the orthogonalized vector norm. + */ + rt1 = (fpr *)tmp; + rt2 = rt1 + n; + rt3 = rt2 + n; + poly_small_to_fp(rt1, f, logn); + poly_small_to_fp(rt2, g, logn); + PQCLEAN_FALCON512_CLEAN_FFT(rt1, logn); + PQCLEAN_FALCON512_CLEAN_FFT(rt2, logn); + PQCLEAN_FALCON512_CLEAN_poly_invnorm2_fft(rt3, rt1, rt2, logn); + PQCLEAN_FALCON512_CLEAN_poly_adj_fft(rt1, logn); + PQCLEAN_FALCON512_CLEAN_poly_adj_fft(rt2, logn); + PQCLEAN_FALCON512_CLEAN_poly_mulconst(rt1, fpr_q, logn); + PQCLEAN_FALCON512_CLEAN_poly_mulconst(rt2, fpr_q, logn); + PQCLEAN_FALCON512_CLEAN_poly_mul_autoadj_fft(rt1, rt3, logn); + PQCLEAN_FALCON512_CLEAN_poly_mul_autoadj_fft(rt2, rt3, logn); + PQCLEAN_FALCON512_CLEAN_iFFT(rt1, logn); + PQCLEAN_FALCON512_CLEAN_iFFT(rt2, logn); + bnorm = fpr_zero; + for (u = 0; u < n; u ++) { + bnorm = fpr_add(bnorm, fpr_sqr(rt1[u])); + bnorm = fpr_add(bnorm, fpr_sqr(rt2[u])); + } + if (!fpr_lt(bnorm, fpr_bnorm_max)) { + continue; + } + + /* + * Compute public key h = g/f mod X^N+1 mod q. If this + * fails, we must restart. + */ + if (h == NULL) { + h2 = (uint16_t *)tmp; + tmp2 = h2 + n; + } else { + h2 = h; + tmp2 = (uint16_t *)tmp; + } + if (!PQCLEAN_FALCON512_CLEAN_compute_public(h2, f, g, logn, (uint8_t *)tmp2)) { + continue; + } + + /* + * Solve the NTRU equation to get F and G. + */ + lim = (1 << (PQCLEAN_FALCON512_CLEAN_max_FG_bits[logn] - 1)) - 1; + if (!solve_NTRU(logn, F, G, f, g, lim, (uint32_t *)tmp)) { + continue; + } + + /* + * Key pair is generated. + */ + break; + } +} diff --git a/crypto_sign/falcon/falcon-512/clean/pqclean.c b/crypto_sign/falcon/falcon-512/clean/pqclean.c new file mode 100644 index 00000000..3abf6814 --- /dev/null +++ b/crypto_sign/falcon/falcon-512/clean/pqclean.c @@ -0,0 +1,384 @@ +#include "api.h" +#include "inner.h" +#include "randombytes.h" +#include +#include +/* + * Wrapper for implementing the PQClean API. + */ + + + +#define NONCELEN 40 +#define SEEDLEN 48 + +/* + * Encoding formats (nnnn = log of degree, 9 for Falcon-512, 10 for Falcon-1024) + * + * private key: + * header byte: 0101nnnn + * private f (6 or 5 bits by element, depending on degree) + * private g (6 or 5 bits by element, depending on degree) + * private F (8 bits by element) + * + * public key: + * header byte: 0000nnnn + * public h (14 bits by element) + * + * signature: + * header byte: 0011nnnn + * nonce 40 bytes + * value (12 bits by element) + * + * message + signature: + * signature length (2 bytes, big-endian) + * nonce 40 bytes + * message + * header byte: 0010nnnn + * value (12 bits by element) + * (signature length is 1+len(value), not counting the nonce) + */ + +/* see api.h */ +int +PQCLEAN_FALCON512_CLEAN_crypto_sign_keypair(unsigned char *pk, unsigned char *sk) { + union { + uint8_t b[FALCON_KEYGEN_TEMP_9]; + uint64_t dummy_u64; + fpr dummy_fpr; + } tmp; + int8_t f[512], g[512], F[512]; + uint16_t h[512]; + unsigned char seed[SEEDLEN]; + inner_shake256_context rng; + size_t u, v; + + /* + * Generate key pair. + */ + randombytes(seed, sizeof seed); + inner_shake256_init(&rng); + inner_shake256_inject(&rng, seed, sizeof seed); + inner_shake256_flip(&rng); + PQCLEAN_FALCON512_CLEAN_keygen(&rng, f, g, F, NULL, h, 9, tmp.b); + inner_shake256_ctx_release(&rng); + + /* + * Encode private key. + */ + sk[0] = 0x50 + 9; + u = 1; + v = PQCLEAN_FALCON512_CLEAN_trim_i8_encode( + sk + u, PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES - u, + f, 9, PQCLEAN_FALCON512_CLEAN_max_fg_bits[9]); + if (v == 0) { + return -1; + } + u += v; + v = PQCLEAN_FALCON512_CLEAN_trim_i8_encode( + sk + u, PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES - u, + g, 9, PQCLEAN_FALCON512_CLEAN_max_fg_bits[9]); + if (v == 0) { + return -1; + } + u += v; + v = PQCLEAN_FALCON512_CLEAN_trim_i8_encode( + sk + u, PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES - u, + F, 9, PQCLEAN_FALCON512_CLEAN_max_FG_bits[9]); + if (v == 0) { + return -1; + } + u += v; + if (u != PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES) { + return -1; + } + + /* + * Encode public key. + */ + pk[0] = 0x00 + 9; + v = PQCLEAN_FALCON512_CLEAN_modq_encode( + pk + 1, PQCLEAN_FALCON512_CLEAN_CRYPTO_PUBLICKEYBYTES - 1, + h, 9); + if (v != PQCLEAN_FALCON512_CLEAN_CRYPTO_PUBLICKEYBYTES - 1) { + return -1; + } + + return 0; +} + +/* + * Compute the signature. nonce[] receives the nonce and must have length + * NONCELEN bytes. sigbuf[] receives the signature value (without nonce + * or header byte), with *sigbuflen providing the maximum value length and + * receiving the actual value length. + * + * If a signature could be computed but not encoded because it would + * exceed the output buffer size, then a new signature is computed. If + * the provided buffer size is too low, this could loop indefinitely, so + * the caller must provide a size that can accommodate signatures with a + * large enough probability. + * + * Return value: 0 on success, -1 on error. + */ +static int +do_sign(uint8_t *nonce, uint8_t *sigbuf, size_t *sigbuflen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + union { + uint8_t b[72 * 512]; + uint64_t dummy_u64; + fpr dummy_fpr; + } tmp; + int8_t f[512], g[512], F[512], G[512]; + union { + int16_t sig[512]; + uint16_t hm[512]; + } r; + unsigned char seed[SEEDLEN]; + inner_shake256_context sc; + size_t u, v; + + /* + * Decode the private key. + */ + if (sk[0] != 0x50 + 9) { + return -1; + } + u = 1; + v = PQCLEAN_FALCON512_CLEAN_trim_i8_decode( + f, 9, PQCLEAN_FALCON512_CLEAN_max_fg_bits[9], + sk + u, PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES - u); + if (v == 0) { + return -1; + } + u += v; + v = PQCLEAN_FALCON512_CLEAN_trim_i8_decode( + g, 9, PQCLEAN_FALCON512_CLEAN_max_fg_bits[9], + sk + u, PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES - u); + if (v == 0) { + return -1; + } + u += v; + v = PQCLEAN_FALCON512_CLEAN_trim_i8_decode( + F, 9, PQCLEAN_FALCON512_CLEAN_max_FG_bits[9], + sk + u, PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES - u); + if (v == 0) { + return -1; + } + u += v; + if (u != PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES) { + return -1; + } + if (!PQCLEAN_FALCON512_CLEAN_complete_private(G, f, g, F, 9, tmp.b)) { + return -1; + } + + /* + * Create a random nonce (40 bytes). + */ + randombytes(nonce, NONCELEN); + + /* + * Hash message nonce + message into a vector. + */ + inner_shake256_init(&sc); + inner_shake256_inject(&sc, nonce, NONCELEN); + inner_shake256_inject(&sc, m, mlen); + inner_shake256_flip(&sc); + PQCLEAN_FALCON512_CLEAN_hash_to_point_ct(&sc, r.hm, 9, tmp.b); + inner_shake256_ctx_release(&sc); + + /* + * Initialize a RNG. + */ + randombytes(seed, sizeof seed); + inner_shake256_init(&sc); + inner_shake256_inject(&sc, seed, sizeof seed); + inner_shake256_flip(&sc); + + /* + * Compute and return the signature. This loops until a signature + * value is found that fits in the provided buffer. + */ + for (;;) { + PQCLEAN_FALCON512_CLEAN_sign_dyn(r.sig, &sc, f, g, F, G, r.hm, 9, tmp.b); + v = PQCLEAN_FALCON512_CLEAN_comp_encode(sigbuf, *sigbuflen, r.sig, 9); + if (v != 0) { + inner_shake256_ctx_release(&sc); + *sigbuflen = v; + return 0; + } + } +} + +/* + * Verify a sigature. The nonce has size NONCELEN bytes. sigbuf[] + * (of size sigbuflen) contains the signature value, not including the + * header byte or nonce. Return value is 0 on success, -1 on error. + */ +static int +do_verify( + const uint8_t *nonce, const uint8_t *sigbuf, size_t sigbuflen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + union { + uint8_t b[2 * 512]; + uint64_t dummy_u64; + fpr dummy_fpr; + } tmp; + uint16_t h[512], hm[512]; + int16_t sig[512]; + inner_shake256_context sc; + + /* + * Decode public key. + */ + if (pk[0] != 0x00 + 9) { + return -1; + } + if (PQCLEAN_FALCON512_CLEAN_modq_decode(h, 9, + pk + 1, PQCLEAN_FALCON512_CLEAN_CRYPTO_PUBLICKEYBYTES - 1) + != PQCLEAN_FALCON512_CLEAN_CRYPTO_PUBLICKEYBYTES - 1) { + return -1; + } + PQCLEAN_FALCON512_CLEAN_to_ntt_monty(h, 9); + + /* + * Decode signature. + */ + if (sigbuflen == 0) { + return -1; + } + if (PQCLEAN_FALCON512_CLEAN_comp_decode(sig, 9, sigbuf, sigbuflen) != sigbuflen) { + return -1; + } + + /* + * Hash nonce + message into a vector. + */ + inner_shake256_init(&sc); + inner_shake256_inject(&sc, nonce, NONCELEN); + inner_shake256_inject(&sc, m, mlen); + inner_shake256_flip(&sc); + PQCLEAN_FALCON512_CLEAN_hash_to_point_ct(&sc, hm, 9, tmp.b); + inner_shake256_ctx_release(&sc); + + /* + * Verify signature. + */ + if (!PQCLEAN_FALCON512_CLEAN_verify_raw(hm, sig, h, 9, tmp.b)) { + return -1; + } + return 0; +} + +/* see api.h */ +int +PQCLEAN_FALCON512_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + /* + * The PQCLEAN_FALCON512_CLEAN_CRYPTO_BYTES constant is used for + * the signed message object (as produced by PQCLEAN_FALCON512_CLEAN_crypto_sign()) + * and includes a two-byte length value, so we take care here + * to only generate signatures that are two bytes shorter than + * the maximum. This is done to ensure that PQCLEAN_FALCON512_CLEAN_crypto_sign() + * and PQCLEAN_FALCON512_CLEAN_crypto_sign_signature() produce the exact same signature + * value, if used on the same message, with the same private key, + * and using the same output from randombytes() (this is for + * reproducibility of tests). + */ + size_t vlen; + + vlen = PQCLEAN_FALCON512_CLEAN_CRYPTO_BYTES - NONCELEN - 3; + if (do_sign(sig + 1, sig + 1 + NONCELEN, &vlen, m, mlen, sk) < 0) { + return -1; + } + sig[0] = 0x30 + 9; + *siglen = 1 + NONCELEN + vlen; + return 0; +} + +/* see api.h */ +int +PQCLEAN_FALCON512_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + if (siglen < 1 + NONCELEN) { + return -1; + } + if (sig[0] != 0x30 + 9) { + return -1; + } + return do_verify(sig + 1, + sig + 1 + NONCELEN, siglen - 1 - NONCELEN, m, mlen, pk); +} + +/* see api.h */ +int +PQCLEAN_FALCON512_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + uint8_t *pm, *sigbuf; + size_t sigbuflen; + + /* + * Move the message to its final location; this is a memmove() so + * it handles overlaps properly. + */ + memmove(sm + 2 + NONCELEN, m, mlen); + pm = sm + 2 + NONCELEN; + sigbuf = pm + 1 + mlen; + sigbuflen = PQCLEAN_FALCON512_CLEAN_CRYPTO_BYTES - NONCELEN - 3; + if (do_sign(sm + 2, sigbuf, &sigbuflen, pm, mlen, sk) < 0) { + return -1; + } + pm[mlen] = 0x20 + 9; + sigbuflen ++; + sm[0] = (uint8_t)(sigbuflen >> 8); + sm[1] = (uint8_t)sigbuflen; + *smlen = mlen + 2 + NONCELEN + sigbuflen; + return 0; +} + +/* see api.h */ +int +PQCLEAN_FALCON512_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + const uint8_t *sigbuf; + size_t pmlen, sigbuflen; + + if (smlen < 3 + NONCELEN) { + return -1; + } + sigbuflen = ((size_t)sm[0] << 8) | (size_t)sm[1]; + if (sigbuflen < 2 || sigbuflen > (smlen - NONCELEN - 2)) { + return -1; + } + sigbuflen --; + pmlen = smlen - NONCELEN - 3 - sigbuflen; + if (sm[2 + NONCELEN + pmlen] != 0x20 + 9) { + return -1; + } + sigbuf = sm + 2 + NONCELEN + pmlen + 1; + + /* + * The 2-byte length header and the one-byte signature header + * have been verified. Nonce is at sm+2, followed by the message + * itself. Message length is in pmlen. sigbuf/sigbuflen point to + * the signature value (excluding the header byte). + */ + if (do_verify(sm + 2, sigbuf, sigbuflen, + sm + 2 + NONCELEN, pmlen, pk) < 0) { + return -1; + } + + /* + * Signature is correct, we just have to copy/move the message + * to its final destination. The memmove() properly handles + * overlaps. + */ + memmove(m, sm + 2 + NONCELEN, pmlen); + *mlen = pmlen; + return 0; +} diff --git a/crypto_sign/falcon/falcon-512/clean/rng.c b/crypto_sign/falcon/falcon-512/clean/rng.c new file mode 100644 index 00000000..266db757 --- /dev/null +++ b/crypto_sign/falcon/falcon-512/clean/rng.c @@ -0,0 +1,201 @@ +#include "inner.h" +#include +/* + * PRNG and interface to the system RNG. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + + +/* + * Include relevant system header files. For Win32, this will also need + * linking with advapi32.dll, which we trigger with an appropriate #pragma. + */ + +/* see inner.h */ +int +PQCLEAN_FALCON512_CLEAN_get_seed(void *seed, size_t len) { + (void)seed; + if (len == 0) { + return 1; + } + return 0; +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_prng_init(prng *p, inner_shake256_context *src) { + /* + * To ensure reproducibility for a given seed, we + * must enforce little-endian interpretation of + * the state words. + */ + uint8_t tmp[56]; + uint64_t th, tl; + int i; + + inner_shake256_extract(src, tmp, 56); + for (i = 0; i < 14; i ++) { + uint32_t w; + + w = (uint32_t)tmp[(i << 2) + 0] + | ((uint32_t)tmp[(i << 2) + 1] << 8) + | ((uint32_t)tmp[(i << 2) + 2] << 16) + | ((uint32_t)tmp[(i << 2) + 3] << 24); + *(uint32_t *)(p->state.d + (i << 2)) = w; + } + tl = *(uint32_t *)(p->state.d + 48); + th = *(uint32_t *)(p->state.d + 52); + *(uint64_t *)(p->state.d + 48) = tl + (th << 32); + PQCLEAN_FALCON512_CLEAN_prng_refill(p); +} + +/* + * PRNG based on ChaCha20. + * + * State consists in key (32 bytes) then IV (16 bytes) and block counter + * (8 bytes). Normally, we should not care about local endianness (this + * is for a PRNG), but for the NIST competition we need reproducible KAT + * vectors that work across architectures, so we enforce little-endian + * interpretation where applicable. Moreover, output words are "spread + * out" over the output buffer with the interleaving pattern that is + * naturally obtained from the AVX2 implementation that runs eight + * ChaCha20 instances in parallel. + * + * The block counter is XORed into the first 8 bytes of the IV. + */ +void +PQCLEAN_FALCON512_CLEAN_prng_refill(prng *p) { + + static const uint32_t CW[] = { + 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 + }; + + uint64_t cc; + size_t u; + + /* + * State uses local endianness. Only the output bytes must be + * converted to little endian (if used on a big-endian machine). + */ + cc = *(uint64_t *)(p->state.d + 48); + for (u = 0; u < 8; u ++) { + uint32_t state[16]; + size_t v; + int i; + + memcpy(&state[0], CW, sizeof CW); + memcpy(&state[4], p->state.d, 48); + state[14] ^= (uint32_t)cc; + state[15] ^= (uint32_t)(cc >> 32); + for (i = 0; i < 10; i ++) { + +#define QROUND(a, b, c, d) do { \ + state[a] += state[b]; \ + state[d] ^= state[a]; \ + state[d] = (state[d] << 16) | (state[d] >> 16); \ + state[c] += state[d]; \ + state[b] ^= state[c]; \ + state[b] = (state[b] << 12) | (state[b] >> 20); \ + state[a] += state[b]; \ + state[d] ^= state[a]; \ + state[d] = (state[d] << 8) | (state[d] >> 24); \ + state[c] += state[d]; \ + state[b] ^= state[c]; \ + state[b] = (state[b] << 7) | (state[b] >> 25); \ + } while (0) + + QROUND( 0, 4, 8, 12); + QROUND( 1, 5, 9, 13); + QROUND( 2, 6, 10, 14); + QROUND( 3, 7, 11, 15); + QROUND( 0, 5, 10, 15); + QROUND( 1, 6, 11, 12); + QROUND( 2, 7, 8, 13); + QROUND( 3, 4, 9, 14); + +#undef QROUND + + } + + for (v = 0; v < 4; v ++) { + state[v] += CW[v]; + } + for (v = 4; v < 14; v ++) { + state[v] += ((uint32_t *)p->state.d)[v - 4]; + } + state[14] += ((uint32_t *)p->state.d)[10] + ^ (uint32_t)cc; + state[15] += ((uint32_t *)p->state.d)[11] + ^ (uint32_t)(cc >> 32); + cc ++; + + /* + * We mimic the interleaving that is used in the AVX2 + * implementation. + */ + for (v = 0; v < 16; v ++) { + p->buf.d[(u << 2) + (v << 5) + 0] = + (uint8_t)state[v]; + p->buf.d[(u << 2) + (v << 5) + 1] = + (uint8_t)(state[v] >> 8); + p->buf.d[(u << 2) + (v << 5) + 2] = + (uint8_t)(state[v] >> 16); + p->buf.d[(u << 2) + (v << 5) + 3] = + (uint8_t)(state[v] >> 24); + } + } + *(uint64_t *)(p->state.d + 48) = cc; + + + p->ptr = 0; +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_prng_get_bytes(prng *p, void *dst, size_t len) { + uint8_t *buf; + + buf = dst; + while (len > 0) { + size_t clen; + + clen = (sizeof p->buf.d) - p->ptr; + if (clen > len) { + clen = len; + } + memcpy(buf, p->buf.d, clen); + buf += clen; + len -= clen; + p->ptr += clen; + if (p->ptr == sizeof p->buf.d) { + PQCLEAN_FALCON512_CLEAN_prng_refill(p); + } + } +} diff --git a/crypto_sign/falcon/falcon-512/clean/sign.c b/crypto_sign/falcon/falcon-512/clean/sign.c new file mode 100644 index 00000000..87566d98 --- /dev/null +++ b/crypto_sign/falcon/falcon-512/clean/sign.c @@ -0,0 +1,1254 @@ +#include "inner.h" + +/* + * Falcon signature generation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +/* =================================================================== */ + +/* + * Compute degree N from logarithm 'logn'. + */ +#define MKN(logn) ((size_t)1 << (logn)) + +/* =================================================================== */ +/* + * Binary case: + * N = 2^logn + * phi = X^N+1 + */ + +/* + * Get the size of the LDL tree for an input with polynomials of size + * 2^logn. The size is expressed in the number of elements. + */ +static inline unsigned +ffLDL_treesize(unsigned logn) { + /* + * For logn = 0 (polynomials are constant), the "tree" is a + * single element. Otherwise, the tree node has size 2^logn, and + * has two child trees for size logn-1 each. Thus, treesize s() + * must fulfill these two relations: + * + * s(0) = 1 + * s(logn) = (2^logn) + 2*s(logn-1) + */ + return (logn + 1) << logn; +} + +/* + * Inner function for ffLDL_fft(). It expects the matrix to be both + * auto-adjoint and quasicyclic; also, it uses the source operands + * as modifiable temporaries. + * + * tmp[] must have room for at least one polynomial. + */ +static void +ffLDL_fft_inner(fpr *tree, + fpr *g0, fpr *g1, unsigned logn, fpr *tmp) { + size_t n, hn; + + n = MKN(logn); + if (n == 1) { + tree[0] = g0[0]; + return; + } + hn = n >> 1; + + /* + * The LDL decomposition yields L (which is written in the tree) + * and the diagonal of D. Since d00 = g0, we just write d11 + * into tmp. + */ + PQCLEAN_FALCON512_CLEAN_poly_LDLmv_fft(tmp, tree, g0, g1, g0, logn); + + /* + * Split d00 (currently in g0) and d11 (currently in tmp). We + * reuse g0 and g1 as temporary storage spaces: + * d00 splits into g1, g1+hn + * d11 splits into g0, g0+hn + */ + PQCLEAN_FALCON512_CLEAN_poly_split_fft(g1, g1 + hn, g0, logn); + PQCLEAN_FALCON512_CLEAN_poly_split_fft(g0, g0 + hn, tmp, logn); + + /* + * Each split result is the first row of a new auto-adjoint + * quasicyclic matrix for the next recursive step. + */ + ffLDL_fft_inner(tree + n, + g1, g1 + hn, logn - 1, tmp); + ffLDL_fft_inner(tree + n + ffLDL_treesize(logn - 1), + g0, g0 + hn, logn - 1, tmp); +} + +/* + * Compute the ffLDL tree of an auto-adjoint matrix G. The matrix + * is provided as three polynomials (FFT representation). + * + * The "tree" array is filled with the computed tree, of size + * (logn+1)*(2^logn) elements (see ffLDL_treesize()). + * + * Input arrays MUST NOT overlap, except possibly the three unmodified + * arrays g00, g01 and g11. tmp[] should have room for at least three + * polynomials of 2^logn elements each. + */ +static void +ffLDL_fft(fpr *tree, const fpr *g00, + const fpr *g01, const fpr *g11, + unsigned logn, fpr *tmp) { + size_t n, hn; + fpr *d00, *d11; + + n = MKN(logn); + if (n == 1) { + tree[0] = g00[0]; + return; + } + hn = n >> 1; + d00 = tmp; + d11 = tmp + n; + tmp += n << 1; + + memcpy(d00, g00, n * sizeof * g00); + PQCLEAN_FALCON512_CLEAN_poly_LDLmv_fft(d11, tree, g00, g01, g11, logn); + + PQCLEAN_FALCON512_CLEAN_poly_split_fft(tmp, tmp + hn, d00, logn); + PQCLEAN_FALCON512_CLEAN_poly_split_fft(d00, d00 + hn, d11, logn); + memcpy(d11, tmp, n * sizeof * tmp); + ffLDL_fft_inner(tree + n, + d11, d11 + hn, logn - 1, tmp); + ffLDL_fft_inner(tree + n + ffLDL_treesize(logn - 1), + d00, d00 + hn, logn - 1, tmp); +} + +/* + * Normalize an ffLDL tree: each leaf of value x is replaced with + * sigma / sqrt(x). + */ +static void +ffLDL_binary_normalize(fpr *tree, unsigned logn) { + /* + * TODO: make an iterative version. + */ + size_t n; + + n = MKN(logn); + if (n == 1) { + /* + * We actually store in the tree leaf the inverse of + * the value mandated by the specification: this + * saves a division both here and in the sampler. + */ + tree[0] = fpr_mul(fpr_sqrt(tree[0]), fpr_inv_sigma); + } else { + ffLDL_binary_normalize(tree + n, logn - 1); + ffLDL_binary_normalize(tree + n + ffLDL_treesize(logn - 1), + logn - 1); + } +} + +/* =================================================================== */ + +/* + * Convert an integer polynomial (with small values) into the + * representation with complex numbers. + */ +static void +smallints_to_fpr(fpr *r, const int8_t *t, unsigned logn) { + size_t n, u; + + n = MKN(logn); + for (u = 0; u < n; u ++) { + r[u] = fpr_of(t[u]); + } +} + +/* + * The expanded private key contains: + * - The B0 matrix (four elements) + * - The ffLDL tree + */ + +static inline size_t +skoff_b00(unsigned logn) { + (void)logn; + return 0; +} + +static inline size_t +skoff_b01(unsigned logn) { + return MKN(logn); +} + +static inline size_t +skoff_b10(unsigned logn) { + return 2 * MKN(logn); +} + +static inline size_t +skoff_b11(unsigned logn) { + return 3 * MKN(logn); +} + +static inline size_t +skoff_tree(unsigned logn) { + return 4 * MKN(logn); +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_expand_privkey(fpr *expanded_key, + const int8_t *f, const int8_t *g, + const int8_t *F, const int8_t *G, + unsigned logn, uint8_t *tmp) { + size_t n; + fpr *rf, *rg, *rF, *rG; + fpr *b00, *b01, *b10, *b11; + fpr *g00, *g01, *g11, *gxx; + fpr *tree; + + n = MKN(logn); + b00 = expanded_key + skoff_b00(logn); + b01 = expanded_key + skoff_b01(logn); + b10 = expanded_key + skoff_b10(logn); + b11 = expanded_key + skoff_b11(logn); + tree = expanded_key + skoff_tree(logn); + + /* + * We load the private key elements directly into the B0 matrix, + * since B0 = [[g, -f], [G, -F]]. + */ + rf = b01; + rg = b00; + rF = b11; + rG = b10; + + smallints_to_fpr(rf, f, logn); + smallints_to_fpr(rg, g, logn); + smallints_to_fpr(rF, F, logn); + smallints_to_fpr(rG, G, logn); + + /* + * Compute the FFT for the key elements, and negate f and F. + */ + PQCLEAN_FALCON512_CLEAN_FFT(rf, logn); + PQCLEAN_FALCON512_CLEAN_FFT(rg, logn); + PQCLEAN_FALCON512_CLEAN_FFT(rF, logn); + PQCLEAN_FALCON512_CLEAN_FFT(rG, logn); + PQCLEAN_FALCON512_CLEAN_poly_neg(rf, logn); + PQCLEAN_FALCON512_CLEAN_poly_neg(rF, logn); + + /* + * The Gram matrix is G = B·B*. Formulas are: + * g00 = b00*adj(b00) + b01*adj(b01) + * g01 = b00*adj(b10) + b01*adj(b11) + * g10 = b10*adj(b00) + b11*adj(b01) + * g11 = b10*adj(b10) + b11*adj(b11) + * + * For historical reasons, this implementation uses + * g00, g01 and g11 (upper triangle). + */ + g00 = (fpr *)tmp; + g01 = g00 + n; + g11 = g01 + n; + gxx = g11 + n; + + memcpy(g00, b00, n * sizeof * b00); + PQCLEAN_FALCON512_CLEAN_poly_mulselfadj_fft(g00, logn); + memcpy(gxx, b01, n * sizeof * b01); + PQCLEAN_FALCON512_CLEAN_poly_mulselfadj_fft(gxx, logn); + PQCLEAN_FALCON512_CLEAN_poly_add(g00, gxx, logn); + + memcpy(g01, b00, n * sizeof * b00); + PQCLEAN_FALCON512_CLEAN_poly_muladj_fft(g01, b10, logn); + memcpy(gxx, b01, n * sizeof * b01); + PQCLEAN_FALCON512_CLEAN_poly_muladj_fft(gxx, b11, logn); + PQCLEAN_FALCON512_CLEAN_poly_add(g01, gxx, logn); + + memcpy(g11, b10, n * sizeof * b10); + PQCLEAN_FALCON512_CLEAN_poly_mulselfadj_fft(g11, logn); + memcpy(gxx, b11, n * sizeof * b11); + PQCLEAN_FALCON512_CLEAN_poly_mulselfadj_fft(gxx, logn); + PQCLEAN_FALCON512_CLEAN_poly_add(g11, gxx, logn); + + /* + * Compute the Falcon tree. + */ + ffLDL_fft(tree, g00, g01, g11, logn, gxx); + + /* + * Normalize tree. + */ + ffLDL_binary_normalize(tree, logn); +} + +typedef int (*samplerZ)(void *ctx, fpr mu, fpr sigma); + +/* + * Perform Fast Fourier Sampling for target vector t. The Gram matrix + * is provided (G = [[g00, g01], [adj(g01), g11]]). The sampled vector + * is written over (t0,t1). The Gram matrix is modified as well. The + * tmp[] buffer must have room for four polynomials. + */ +static void +ffSampling_fft_dyntree(samplerZ samp, void *samp_ctx, + fpr *t0, fpr *t1, + fpr *g00, fpr *g01, fpr *g11, + unsigned logn, fpr *tmp) { + size_t n, hn; + fpr *z0, *z1; + + /* + * Deepest level: the LDL tree leaf value is just g00 (the + * array has length only 1 at this point); we normalize it + * with regards to sigma, then use it for sampling. + */ + if (logn == 0) { + fpr leaf; + + leaf = g00[0]; + leaf = fpr_mul(fpr_sqrt(leaf), fpr_inv_sigma); + t0[0] = fpr_of(samp(samp_ctx, t0[0], leaf)); + t1[0] = fpr_of(samp(samp_ctx, t1[0], leaf)); + return; + } + + n = (size_t)1 << logn; + hn = n >> 1; + + /* + * Decompose G into LDL. We only need d00 (identical to g00), + * d11, and l10; we do that in place. + */ + PQCLEAN_FALCON512_CLEAN_poly_LDL_fft(g00, g01, g11, logn); + + /* + * Split d00 and d11 and expand them into half-size quasi-cyclic + * Gram matrices. We also save l10 in tmp[]. + */ + PQCLEAN_FALCON512_CLEAN_poly_split_fft(tmp, tmp + hn, g00, logn); + memcpy(g00, tmp, n * sizeof * tmp); + PQCLEAN_FALCON512_CLEAN_poly_split_fft(tmp, tmp + hn, g11, logn); + memcpy(g11, tmp, n * sizeof * tmp); + memcpy(tmp, g01, n * sizeof * g01); + memcpy(g01, g00, hn * sizeof * g00); + memcpy(g01 + hn, g11, hn * sizeof * g00); + + /* + * The half-size Gram matrices for the recursive LDL tree + * building are now: + * - left sub-tree: g00, g00+hn, g01 + * - right sub-tree: g11, g11+hn, g01+hn + * l10 is in tmp[]. + */ + + /* + * We split t1 and use the first recursive call on the two + * halves, using the right sub-tree. The result is merged + * back into tmp + 2*n. + */ + z1 = tmp + n; + PQCLEAN_FALCON512_CLEAN_poly_split_fft(z1, z1 + hn, t1, logn); + ffSampling_fft_dyntree(samp, samp_ctx, z1, z1 + hn, + g11, g11 + hn, g01 + hn, logn - 1, z1 + n); + PQCLEAN_FALCON512_CLEAN_poly_merge_fft(tmp + (n << 1), z1, z1 + hn, logn); + + /* + * Compute tb0 = t0 + (t1 - z1) * l10. + * At that point, l10 is in tmp, t1 is unmodified, and z1 is + * in tmp + (n << 1). The buffer in z1 is free. + * + * In the end, z1 is written over t1, and tb0 is in t0. + */ + memcpy(z1, t1, n * sizeof * t1); + PQCLEAN_FALCON512_CLEAN_poly_sub(z1, tmp + (n << 1), logn); + memcpy(t1, tmp + (n << 1), n * sizeof * tmp); + PQCLEAN_FALCON512_CLEAN_poly_mul_fft(tmp, z1, logn); + PQCLEAN_FALCON512_CLEAN_poly_add(t0, tmp, logn); + + /* + * Second recursive invocation, on the split tb0 (currently in t0) + * and the left sub-tree. + */ + z0 = tmp; + PQCLEAN_FALCON512_CLEAN_poly_split_fft(z0, z0 + hn, t0, logn); + ffSampling_fft_dyntree(samp, samp_ctx, z0, z0 + hn, + g00, g00 + hn, g01, logn - 1, z0 + n); + PQCLEAN_FALCON512_CLEAN_poly_merge_fft(t0, z0, z0 + hn, logn); +} + +/* + * Perform Fast Fourier Sampling for target vector t and LDL tree T. + * tmp[] must have size for at least two polynomials of size 2^logn. + */ +static void +ffSampling_fft(samplerZ samp, void *samp_ctx, + fpr *z0, fpr *z1, + const fpr *tree, + const fpr *t0, const fpr *t1, unsigned logn, + fpr *tmp) { + size_t n, hn; + const fpr *tree0, *tree1; + + /* + * When logn == 2, we inline the last two recursion levels. + */ + if (logn == 2) { + fpr x0, x1, y0, y1, w0, w1, w2, w3, sigma; + fpr a_re, a_im, b_re, b_im, c_re, c_im; + + tree0 = tree + 4; + tree1 = tree + 8; + + /* + * We split t1 into w*, then do the recursive invocation, + * with output in w*. We finally merge back into z1. + */ + a_re = t1[0]; + a_im = t1[2]; + b_re = t1[1]; + b_im = t1[3]; + c_re = fpr_add(a_re, b_re); + c_im = fpr_add(a_im, b_im); + w0 = fpr_half(c_re); + w1 = fpr_half(c_im); + c_re = fpr_sub(a_re, b_re); + c_im = fpr_sub(a_im, b_im); + w2 = fpr_mul(fpr_add(c_re, c_im), fpr_invsqrt8); + w3 = fpr_mul(fpr_sub(c_im, c_re), fpr_invsqrt8); + + x0 = w2; + x1 = w3; + sigma = tree1[3]; + w2 = fpr_of(samp(samp_ctx, x0, sigma)); + w3 = fpr_of(samp(samp_ctx, x1, sigma)); + a_re = fpr_sub(x0, w2); + a_im = fpr_sub(x1, w3); + b_re = tree1[0]; + b_im = tree1[1]; + c_re = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im)); + c_im = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re)); + x0 = fpr_add(c_re, w0); + x1 = fpr_add(c_im, w1); + sigma = tree1[2]; + w0 = fpr_of(samp(samp_ctx, x0, sigma)); + w1 = fpr_of(samp(samp_ctx, x1, sigma)); + + a_re = w0; + a_im = w1; + b_re = w2; + b_im = w3; + c_re = fpr_mul(fpr_sub(b_re, b_im), fpr_invsqrt2); + c_im = fpr_mul(fpr_add(b_re, b_im), fpr_invsqrt2); + z1[0] = w0 = fpr_add(a_re, c_re); + z1[2] = w2 = fpr_add(a_im, c_im); + z1[1] = w1 = fpr_sub(a_re, c_re); + z1[3] = w3 = fpr_sub(a_im, c_im); + + /* + * Compute tb0 = t0 + (t1 - z1) * L. Value tb0 ends up in w*. + */ + w0 = fpr_sub(t1[0], w0); + w1 = fpr_sub(t1[1], w1); + w2 = fpr_sub(t1[2], w2); + w3 = fpr_sub(t1[3], w3); + + a_re = w0; + a_im = w2; + b_re = tree[0]; + b_im = tree[2]; + w0 = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im)); + w2 = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re)); + a_re = w1; + a_im = w3; + b_re = tree[1]; + b_im = tree[3]; + w1 = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im)); + w3 = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re)); + + w0 = fpr_add(w0, t0[0]); + w1 = fpr_add(w1, t0[1]); + w2 = fpr_add(w2, t0[2]); + w3 = fpr_add(w3, t0[3]); + + /* + * Second recursive invocation. + */ + a_re = w0; + a_im = w2; + b_re = w1; + b_im = w3; + c_re = fpr_add(a_re, b_re); + c_im = fpr_add(a_im, b_im); + w0 = fpr_half(c_re); + w1 = fpr_half(c_im); + c_re = fpr_sub(a_re, b_re); + c_im = fpr_sub(a_im, b_im); + w2 = fpr_mul(fpr_add(c_re, c_im), fpr_invsqrt8); + w3 = fpr_mul(fpr_sub(c_im, c_re), fpr_invsqrt8); + + x0 = w2; + x1 = w3; + sigma = tree0[3]; + w2 = y0 = fpr_of(samp(samp_ctx, x0, sigma)); + w3 = y1 = fpr_of(samp(samp_ctx, x1, sigma)); + a_re = fpr_sub(x0, y0); + a_im = fpr_sub(x1, y1); + b_re = tree0[0]; + b_im = tree0[1]; + c_re = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im)); + c_im = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re)); + x0 = fpr_add(c_re, w0); + x1 = fpr_add(c_im, w1); + sigma = tree0[2]; + w0 = fpr_of(samp(samp_ctx, x0, sigma)); + w1 = fpr_of(samp(samp_ctx, x1, sigma)); + + a_re = w0; + a_im = w1; + b_re = w2; + b_im = w3; + c_re = fpr_mul(fpr_sub(b_re, b_im), fpr_invsqrt2); + c_im = fpr_mul(fpr_add(b_re, b_im), fpr_invsqrt2); + z0[0] = fpr_add(a_re, c_re); + z0[2] = fpr_add(a_im, c_im); + z0[1] = fpr_sub(a_re, c_re); + z0[3] = fpr_sub(a_im, c_im); + + return; + } + + /* + * Case logn == 1 is reachable only when using Falcon-2 (the + * smallest size for which Falcon is mathematically defined, but + * of course way too insecure to be of any use). + */ + if (logn == 1) { + fpr x0, x1, y0, y1, sigma; + fpr a_re, a_im, b_re, b_im, c_re, c_im; + + x0 = t1[0]; + x1 = t1[1]; + sigma = tree[3]; + z1[0] = y0 = fpr_of(samp(samp_ctx, x0, sigma)); + z1[1] = y1 = fpr_of(samp(samp_ctx, x1, sigma)); + a_re = fpr_sub(x0, y0); + a_im = fpr_sub(x1, y1); + b_re = tree[0]; + b_im = tree[1]; + c_re = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im)); + c_im = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re)); + x0 = fpr_add(c_re, t0[0]); + x1 = fpr_add(c_im, t0[1]); + sigma = tree[2]; + z0[0] = fpr_of(samp(samp_ctx, x0, sigma)); + z0[1] = fpr_of(samp(samp_ctx, x1, sigma)); + + return; + } + + /* + * Normal end of recursion is for logn == 0. Since the last + * steps of the recursions were inlined in the blocks above + * (when logn == 1 or 2), this case is not reachable, and is + * retained here only for documentation purposes. + + if (logn == 0) { + fpr x0, x1, sigma; + + x0 = t0[0]; + x1 = t1[0]; + sigma = tree[0]; + z0[0] = fpr_of(samp(samp_ctx, x0, sigma)); + z1[0] = fpr_of(samp(samp_ctx, x1, sigma)); + return; + } + + */ + + /* + * General recursive case (logn >= 3). + */ + + n = (size_t)1 << logn; + hn = n >> 1; + tree0 = tree + n; + tree1 = tree + n + ffLDL_treesize(logn - 1); + + /* + * We split t1 into z1 (reused as temporary storage), then do + * the recursive invocation, with output in tmp. We finally + * merge back into z1. + */ + PQCLEAN_FALCON512_CLEAN_poly_split_fft(z1, z1 + hn, t1, logn); + ffSampling_fft(samp, samp_ctx, tmp, tmp + hn, + tree1, z1, z1 + hn, logn - 1, tmp + n); + PQCLEAN_FALCON512_CLEAN_poly_merge_fft(z1, tmp, tmp + hn, logn); + + /* + * Compute tb0 = t0 + (t1 - z1) * L. Value tb0 ends up in tmp[]. + */ + memcpy(tmp, t1, n * sizeof * t1); + PQCLEAN_FALCON512_CLEAN_poly_sub(tmp, z1, logn); + PQCLEAN_FALCON512_CLEAN_poly_mul_fft(tmp, tree, logn); + PQCLEAN_FALCON512_CLEAN_poly_add(tmp, t0, logn); + + /* + * Second recursive invocation. + */ + PQCLEAN_FALCON512_CLEAN_poly_split_fft(z0, z0 + hn, tmp, logn); + ffSampling_fft(samp, samp_ctx, tmp, tmp + hn, + tree0, z0, z0 + hn, logn - 1, tmp + n); + PQCLEAN_FALCON512_CLEAN_poly_merge_fft(z0, tmp, tmp + hn, logn); +} + +/* + * Compute a signature: the signature contains two vectors, s1 and s2. + * The s1 vector is not returned. The squared norm of (s1,s2) is + * computed, and if it is short enough, then s2 is returned into the + * s2[] buffer, and 1 is returned; otherwise, s2[] is untouched and 0 is + * returned; the caller should then try again. This function uses an + * expanded key. + * + * tmp[] must have room for at least six polynomials. + */ +static int +do_sign_tree(samplerZ samp, void *samp_ctx, int16_t *s2, + const fpr *expanded_key, + const uint16_t *hm, + unsigned logn, fpr *tmp) { + size_t n, u; + fpr *t0, *t1, *tx, *ty; + const fpr *b00, *b01, *b10, *b11, *tree; + fpr ni; + uint32_t sqn, ng; + int16_t *s1tmp, *s2tmp; + + n = MKN(logn); + t0 = tmp; + t1 = t0 + n; + b00 = expanded_key + skoff_b00(logn); + b01 = expanded_key + skoff_b01(logn); + b10 = expanded_key + skoff_b10(logn); + b11 = expanded_key + skoff_b11(logn); + tree = expanded_key + skoff_tree(logn); + + /* + * Set the target vector to [hm, 0] (hm is the hashed message). + */ + for (u = 0; u < n; u ++) { + t0[u] = fpr_of(hm[u]); + /* This is implicit. + t1[u] = fpr_zero; + */ + } + + /* + * Apply the lattice basis to obtain the real target + * vector (after normalization with regards to modulus). + */ + PQCLEAN_FALCON512_CLEAN_FFT(t0, logn); + ni = fpr_inverse_of_q; + memcpy(t1, t0, n * sizeof * t0); + PQCLEAN_FALCON512_CLEAN_poly_mul_fft(t1, b01, logn); + PQCLEAN_FALCON512_CLEAN_poly_mulconst(t1, fpr_neg(ni), logn); + PQCLEAN_FALCON512_CLEAN_poly_mul_fft(t0, b11, logn); + PQCLEAN_FALCON512_CLEAN_poly_mulconst(t0, ni, logn); + + tx = t1 + n; + ty = tx + n; + + /* + * Apply sampling. Output is written back in [tx, ty]. + */ + ffSampling_fft(samp, samp_ctx, tx, ty, tree, t0, t1, logn, ty + n); + + /* + * Get the lattice point corresponding to that tiny vector. + */ + memcpy(t0, tx, n * sizeof * tx); + memcpy(t1, ty, n * sizeof * ty); + PQCLEAN_FALCON512_CLEAN_poly_mul_fft(tx, b00, logn); + PQCLEAN_FALCON512_CLEAN_poly_mul_fft(ty, b10, logn); + PQCLEAN_FALCON512_CLEAN_poly_add(tx, ty, logn); + memcpy(ty, t0, n * sizeof * t0); + PQCLEAN_FALCON512_CLEAN_poly_mul_fft(ty, b01, logn); + + memcpy(t0, tx, n * sizeof * tx); + PQCLEAN_FALCON512_CLEAN_poly_mul_fft(t1, b11, logn); + PQCLEAN_FALCON512_CLEAN_poly_add(t1, ty, logn); + + PQCLEAN_FALCON512_CLEAN_iFFT(t0, logn); + PQCLEAN_FALCON512_CLEAN_iFFT(t1, logn); + + /* + * Compute the signature. + */ + s1tmp = (int16_t *)tx; + sqn = 0; + ng = 0; + for (u = 0; u < n; u ++) { + int32_t z; + + z = (int32_t)hm[u] - (int32_t)fpr_rint(t0[u]); + sqn += (uint32_t)(z * z); + ng |= sqn; + s1tmp[u] = (int16_t)z; + } + sqn |= -(ng >> 31); + + /* + * With "normal" degrees (e.g. 512 or 1024), it is very + * improbable that the computed vector is not short enough; + * however, it may happen in practice for the very reduced + * versions (e.g. degree 16 or below). In that case, the caller + * will loop, and we must not write anything into s2[] because + * s2[] may overlap with the hashed message hm[] and we need + * hm[] for the next iteration. + */ + s2tmp = (int16_t *)tmp; + for (u = 0; u < n; u ++) { + s2tmp[u] = (int16_t) - fpr_rint(t1[u]); + } + if (PQCLEAN_FALCON512_CLEAN_is_short_half(sqn, s2tmp, logn)) { + memcpy(s2, s2tmp, n * sizeof * s2); + memcpy(tmp, s1tmp, n * sizeof * s1tmp); + return 1; + } + return 0; +} + +/* + * Compute a signature: the signature contains two vectors, s1 and s2. + * The s1 vector is not returned. The squared norm of (s1,s2) is + * computed, and if it is short enough, then s2 is returned into the + * s2[] buffer, and 1 is returned; otherwise, s2[] is untouched and 0 is + * returned; the caller should then try again. + * + * tmp[] must have room for at least nine polynomials. + */ +static int +do_sign_dyn(samplerZ samp, void *samp_ctx, int16_t *s2, + const int8_t *f, const int8_t *g, + const int8_t *F, const int8_t *G, + const uint16_t *hm, unsigned logn, fpr *tmp) { + size_t n, u; + fpr *t0, *t1, *tx, *ty; + fpr *b00, *b01, *b10, *b11, *g00, *g01, *g11; + fpr ni; + uint32_t sqn, ng; + int16_t *s1tmp, *s2tmp; + + n = MKN(logn); + + /* + * Lattice basis is B = [[g, -f], [G, -F]]. We convert it to FFT. + */ + b00 = tmp; + b01 = b00 + n; + b10 = b01 + n; + b11 = b10 + n; + smallints_to_fpr(b01, f, logn); + smallints_to_fpr(b00, g, logn); + smallints_to_fpr(b11, F, logn); + smallints_to_fpr(b10, G, logn); + PQCLEAN_FALCON512_CLEAN_FFT(b01, logn); + PQCLEAN_FALCON512_CLEAN_FFT(b00, logn); + PQCLEAN_FALCON512_CLEAN_FFT(b11, logn); + PQCLEAN_FALCON512_CLEAN_FFT(b10, logn); + PQCLEAN_FALCON512_CLEAN_poly_neg(b01, logn); + PQCLEAN_FALCON512_CLEAN_poly_neg(b11, logn); + + /* + * Compute the Gram matrix G = B·B*. Formulas are: + * g00 = b00*adj(b00) + b01*adj(b01) + * g01 = b00*adj(b10) + b01*adj(b11) + * g10 = b10*adj(b00) + b11*adj(b01) + * g11 = b10*adj(b10) + b11*adj(b11) + * + * For historical reasons, this implementation uses + * g00, g01 and g11 (upper triangle). g10 is not kept + * since it is equal to adj(g01). + * + * We _replace_ the matrix B with the Gram matrix, but we + * must keep b01 and b11 for computing the target vector. + */ + t0 = b11 + n; + t1 = t0 + n; + + memcpy(t0, b01, n * sizeof * b01); + PQCLEAN_FALCON512_CLEAN_poly_mulselfadj_fft(t0, logn); // t0 <- b01*adj(b01) + + memcpy(t1, b00, n * sizeof * b00); + PQCLEAN_FALCON512_CLEAN_poly_muladj_fft(t1, b10, logn); // t1 <- b00*adj(b10) + PQCLEAN_FALCON512_CLEAN_poly_mulselfadj_fft(b00, logn); // b00 <- b00*adj(b00) + PQCLEAN_FALCON512_CLEAN_poly_add(b00, t0, logn); // b00 <- g00 + memcpy(t0, b01, n * sizeof * b01); + PQCLEAN_FALCON512_CLEAN_poly_muladj_fft(b01, b11, logn); // b01 <- b01*adj(b11) + PQCLEAN_FALCON512_CLEAN_poly_add(b01, t1, logn); // b01 <- g01 + + PQCLEAN_FALCON512_CLEAN_poly_mulselfadj_fft(b10, logn); // b10 <- b10*adj(b10) + memcpy(t1, b11, n * sizeof * b11); + PQCLEAN_FALCON512_CLEAN_poly_mulselfadj_fft(t1, logn); // t1 <- b11*adj(b11) + PQCLEAN_FALCON512_CLEAN_poly_add(b10, t1, logn); // b10 <- g11 + + /* + * We rename variables to make things clearer. The three elements + * of the Gram matrix uses the first 3*n slots of tmp[], followed + * by b11 and b01 (in that order). + */ + g00 = b00; + g01 = b01; + g11 = b10; + b01 = t0; + t0 = b01 + n; + t1 = t0 + n; + + /* + * Memory layout at that point: + * g00 g01 g11 b11 b01 t0 t1 + */ + + /* + * Set the target vector to [hm, 0] (hm is the hashed message). + */ + for (u = 0; u < n; u ++) { + t0[u] = fpr_of(hm[u]); + /* This is implicit. + t1[u] = fpr_zero; + */ + } + + /* + * Apply the lattice basis to obtain the real target + * vector (after normalization with regards to modulus). + */ + PQCLEAN_FALCON512_CLEAN_FFT(t0, logn); + ni = fpr_inverse_of_q; + memcpy(t1, t0, n * sizeof * t0); + PQCLEAN_FALCON512_CLEAN_poly_mul_fft(t1, b01, logn); + PQCLEAN_FALCON512_CLEAN_poly_mulconst(t1, fpr_neg(ni), logn); + PQCLEAN_FALCON512_CLEAN_poly_mul_fft(t0, b11, logn); + PQCLEAN_FALCON512_CLEAN_poly_mulconst(t0, ni, logn); + + /* + * b01 and b11 can be discarded, so we move back (t0,t1). + * Memory layout is now: + * g00 g01 g11 t0 t1 + */ + memcpy(b11, t0, n * 2 * sizeof * t0); + t0 = g11 + n; + t1 = t0 + n; + + /* + * Apply sampling; result is written over (t0,t1). + */ + ffSampling_fft_dyntree(samp, samp_ctx, + t0, t1, g00, g01, g11, logn, t1 + n); + + /* + * We arrange the layout back to: + * b00 b01 b10 b11 t0 t1 + * + * We did not conserve the matrix basis, so we must recompute + * it now. + */ + b00 = tmp; + b01 = b00 + n; + b10 = b01 + n; + b11 = b10 + n; + memmove(b11 + n, t0, n * 2 * sizeof * t0); + t0 = b11 + n; + t1 = t0 + n; + smallints_to_fpr(b01, f, logn); + smallints_to_fpr(b00, g, logn); + smallints_to_fpr(b11, F, logn); + smallints_to_fpr(b10, G, logn); + PQCLEAN_FALCON512_CLEAN_FFT(b01, logn); + PQCLEAN_FALCON512_CLEAN_FFT(b00, logn); + PQCLEAN_FALCON512_CLEAN_FFT(b11, logn); + PQCLEAN_FALCON512_CLEAN_FFT(b10, logn); + PQCLEAN_FALCON512_CLEAN_poly_neg(b01, logn); + PQCLEAN_FALCON512_CLEAN_poly_neg(b11, logn); + tx = t1 + n; + ty = tx + n; + + /* + * Get the lattice point corresponding to that tiny vector. + */ + memcpy(tx, t0, n * sizeof * t0); + memcpy(ty, t1, n * sizeof * t1); + PQCLEAN_FALCON512_CLEAN_poly_mul_fft(tx, b00, logn); + PQCLEAN_FALCON512_CLEAN_poly_mul_fft(ty, b10, logn); + PQCLEAN_FALCON512_CLEAN_poly_add(tx, ty, logn); + memcpy(ty, t0, n * sizeof * t0); + PQCLEAN_FALCON512_CLEAN_poly_mul_fft(ty, b01, logn); + + memcpy(t0, tx, n * sizeof * tx); + PQCLEAN_FALCON512_CLEAN_poly_mul_fft(t1, b11, logn); + PQCLEAN_FALCON512_CLEAN_poly_add(t1, ty, logn); + PQCLEAN_FALCON512_CLEAN_iFFT(t0, logn); + PQCLEAN_FALCON512_CLEAN_iFFT(t1, logn); + + s1tmp = (int16_t *)tx; + sqn = 0; + ng = 0; + for (u = 0; u < n; u ++) { + int32_t z; + + z = (int32_t)hm[u] - (int32_t)fpr_rint(t0[u]); + sqn += (uint32_t)(z * z); + ng |= sqn; + s1tmp[u] = (int16_t)z; + } + sqn |= -(ng >> 31); + + /* + * With "normal" degrees (e.g. 512 or 1024), it is very + * improbable that the computed vector is not short enough; + * however, it may happen in practice for the very reduced + * versions (e.g. degree 16 or below). In that case, the caller + * will loop, and we must not write anything into s2[] because + * s2[] may overlap with the hashed message hm[] and we need + * hm[] for the next iteration. + */ + s2tmp = (int16_t *)tmp; + for (u = 0; u < n; u ++) { + s2tmp[u] = (int16_t) - fpr_rint(t1[u]); + } + if (PQCLEAN_FALCON512_CLEAN_is_short_half(sqn, s2tmp, logn)) { + memcpy(s2, s2tmp, n * sizeof * s2); + memcpy(tmp, s1tmp, n * sizeof * s1tmp); + return 1; + } + return 0; +} + +/* + * Sample an integer value along a half-gaussian distribution centered + * on zero and standard deviation 1.8205, with a precision of 72 bits. + */ +int +PQCLEAN_FALCON512_CLEAN_gaussian0_sampler(prng *p) { + + static const uint32_t dist[] = { + 10745844u, 3068844u, 3741698u, + 5559083u, 1580863u, 8248194u, + 2260429u, 13669192u, 2736639u, + 708981u, 4421575u, 10046180u, + 169348u, 7122675u, 4136815u, + 30538u, 13063405u, 7650655u, + 4132u, 14505003u, 7826148u, + 417u, 16768101u, 11363290u, + 31u, 8444042u, 8086568u, + 1u, 12844466u, 265321u, + 0u, 1232676u, 13644283u, + 0u, 38047u, 9111839u, + 0u, 870u, 6138264u, + 0u, 14u, 12545723u, + 0u, 0u, 3104126u, + 0u, 0u, 28824u, + 0u, 0u, 198u, + 0u, 0u, 1u + }; + + uint32_t v0, v1, v2, hi; + uint64_t lo; + size_t u; + int z; + + /* + * Get a random 72-bit value, into three 24-bit limbs v0..v2. + */ + lo = prng_get_u64(p); + hi = prng_get_u8(p); + v0 = (uint32_t)lo & 0xFFFFFF; + v1 = (uint32_t)(lo >> 24) & 0xFFFFFF; + v2 = (uint32_t)(lo >> 48) | (hi << 16); + + /* + * Sampled value is z, such that v0..v2 is lower than the first + * z elements of the table. + */ + z = 0; + for (u = 0; u < (sizeof dist) / sizeof(dist[0]); u += 3) { + uint32_t w0, w1, w2, cc; + + w0 = dist[u + 2]; + w1 = dist[u + 1]; + w2 = dist[u + 0]; + cc = (v0 - w0) >> 31; + cc = (v1 - w1 - cc) >> 31; + cc = (v2 - w2 - cc) >> 31; + z += (int)cc; + } + return z; + +} + +/* + * Sample a bit with probability exp(-x) for some x >= 0. + */ +static int +BerExp(prng *p, fpr x, fpr ccs) { + int s, i; + fpr r; + uint32_t sw, w; + uint64_t z; + + /* + * Reduce x modulo log(2): x = s*log(2) + r, with s an integer, + * and 0 <= r < log(2). Since x >= 0, we can use fpr_trunc(). + */ + s = (int)fpr_trunc(fpr_mul(x, fpr_inv_log2)); + r = fpr_sub(x, fpr_mul(fpr_of(s), fpr_log2)); + + /* + * It may happen (quite rarely) that s >= 64; if sigma = 1.2 + * (the minimum value for sigma), r = 0 and b = 1, then we get + * s >= 64 if the half-Gaussian produced a z >= 13, which happens + * with probability about 0.000000000230383991, which is + * approximatively equal to 2^(-32). In any case, if s >= 64, + * then BerExp will be non-zero with probability less than + * 2^(-64), so we can simply saturate s at 63. + */ + sw = (uint32_t)s; + sw ^= (sw ^ 63) & -((63 - sw) >> 31); + s = (int)sw; + + /* + * Compute exp(-r); we know that 0 <= r < log(2) at this point, so + * we can use fpr_expm_p63(), which yields a result scaled to 2^63. + * We scale it up to 2^64, then right-shift it by s bits because + * we really want exp(-x) = 2^(-s)*exp(-r). + * + * The "-1" operation makes sure that the value fits on 64 bits + * (i.e. if r = 0, we may get 2^64, and we prefer 2^64-1 in that + * case). The bias is negligible since fpr_expm_p63() only computes + * with 51 bits of precision or so. + */ + z = ((fpr_expm_p63(r, ccs) << 1) - 1) >> s; + + /* + * Sample a bit with probability exp(-x). Since x = s*log(2) + r, + * exp(-x) = 2^-s * exp(-r), we compare lazily exp(-x) with the + * PRNG output to limit its consumption, the sign of the difference + * yields the expected result. + */ + i = 64; + do { + i -= 8; + w = prng_get_u8(p) - ((uint32_t)(z >> i) & 0xFF); + } while (!w && i > 0); + return (int)(w >> 31); +} + +/* + * The sampler produces a random integer that follows a discrete Gaussian + * distribution, centered on mu, and with standard deviation sigma. The + * provided parameter isigma is equal to 1/sigma. + * + * The value of sigma MUST lie between 1 and 2 (i.e. isigma lies between + * 0.5 and 1); in Falcon, sigma should always be between 1.2 and 1.9. + */ +int +PQCLEAN_FALCON512_CLEAN_sampler(void *ctx, fpr mu, fpr isigma) { + sampler_context *spc; + int s, z0, z, b; + fpr r, dss, ccs, x; + + spc = ctx; + + /* + * Center is mu. We compute mu = s + r where s is an integer + * and 0 <= r < 1. + */ + s = (int)fpr_floor(mu); + r = fpr_sub(mu, fpr_of(s)); + + /* + * dss = 1/(2*sigma^2) = 0.5*(isigma^2). + */ + dss = fpr_half(fpr_sqr(isigma)); + + /* + * ccs = sigma_min / sigma = sigma_min * isigma. + */ + ccs = fpr_mul(isigma, spc->sigma_min); + + /* + * We now need to sample on center r. + */ + for (;;) { + /* + * Sample z for a Gaussian distribution. Then get a + * random bit b to turn the sampling into a bimodal + * distribution: if b = 1, we use z+1, otherwise we + * use -z. We thus have two situations: + * + * - b = 1: z >= 1 and sampled against a Gaussian + * centered on 1. + * - b = 0: z <= 0 and sampled against a Gaussian + * centered on 0. + */ + z0 = PQCLEAN_FALCON512_CLEAN_gaussian0_sampler(&spc->p); + b = (int)prng_get_u8(&spc->p) & 1; + z = b + ((b << 1) - 1) * z0; + + /* + * Rejection sampling. We want a Gaussian centered on r; + * but we sampled against a Gaussian centered on b (0 or + * 1). But we know that z is always in the range where + * our sampling distribution is greater than the Gaussian + * distribution, so rejection works. + * + * We got z with distribution: + * G(z) = exp(-((z-b)^2)/(2*sigma0^2)) + * We target distribution: + * S(z) = exp(-((z-r)^2)/(2*sigma^2)) + * Rejection sampling works by keeping the value z with + * probability S(z)/G(z), and starting again otherwise. + * This requires S(z) <= G(z), which is the case here. + * Thus, we simply need to keep our z with probability: + * P = exp(-x) + * where: + * x = ((z-r)^2)/(2*sigma^2) - ((z-b)^2)/(2*sigma0^2) + * + * Here, we scale up the Bernouilli distribution, which + * makes rejection more probable, but makes rejection + * rate sufficiently decorrelated from the Gaussian + * center and standard deviation that the whole sampler + * can be said to be constant-time. + */ + x = fpr_mul(fpr_sqr(fpr_sub(fpr_of(z), r)), dss); + x = fpr_sub(x, fpr_mul(fpr_of(z0 * z0), fpr_inv_2sqrsigma0)); + if (BerExp(&spc->p, x, ccs)) { + /* + * Rejection sampling was centered on r, but the + * actual center is mu = s + r. + */ + return s + z; + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_sign_tree(int16_t *sig, inner_shake256_context *rng, + const fpr *expanded_key, + const uint16_t *hm, unsigned logn, uint8_t *tmp) { + fpr *ftmp; + + ftmp = (fpr *)tmp; + for (;;) { + /* + * Signature produces short vectors s1 and s2. The + * signature is acceptable only if the aggregate vector + * s1,s2 is short; we must use the same bound as the + * verifier. + * + * If the signature is acceptable, then we return only s2 + * (the verifier recomputes s1 from s2, the hashed message, + * and the public key). + */ + sampler_context spc; + samplerZ samp; + void *samp_ctx; + + /* + * Normal sampling. We use a fast PRNG seeded from our + * SHAKE context ('rng'). + */ + if (logn == 10) { + spc.sigma_min = fpr_sigma_min_10; + } else { + spc.sigma_min = fpr_sigma_min_9; + } + PQCLEAN_FALCON512_CLEAN_prng_init(&spc.p, rng); + samp = PQCLEAN_FALCON512_CLEAN_sampler; + samp_ctx = &spc; + + /* + * Do the actual signature. + */ + if (do_sign_tree(samp, samp_ctx, sig, + expanded_key, hm, logn, ftmp)) { + break; + } + } +} + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_sign_dyn(int16_t *sig, inner_shake256_context *rng, + const int8_t *f, const int8_t *g, + const int8_t *F, const int8_t *G, + const uint16_t *hm, unsigned logn, uint8_t *tmp) { + fpr *ftmp; + + ftmp = (fpr *)tmp; + for (;;) { + /* + * Signature produces short vectors s1 and s2. The + * signature is acceptable only if the aggregate vector + * s1,s2 is short; we must use the same bound as the + * verifier. + * + * If the signature is acceptable, then we return only s2 + * (the verifier recomputes s1 from s2, the hashed message, + * and the public key). + */ + sampler_context spc; + samplerZ samp; + void *samp_ctx; + + /* + * Normal sampling. We use a fast PRNG seeded from our + * SHAKE context ('rng'). + */ + if (logn == 10) { + spc.sigma_min = fpr_sigma_min_10; + } else { + spc.sigma_min = fpr_sigma_min_9; + } + PQCLEAN_FALCON512_CLEAN_prng_init(&spc.p, rng); + samp = PQCLEAN_FALCON512_CLEAN_sampler; + samp_ctx = &spc; + + /* + * Do the actual signature. + */ + if (do_sign_dyn(samp, samp_ctx, sig, + f, g, F, G, hm, logn, ftmp)) { + break; + } + } +} diff --git a/crypto_sign/falcon/falcon-512/clean/vrfy.c b/crypto_sign/falcon/falcon-512/clean/vrfy.c new file mode 100644 index 00000000..cf89f69f --- /dev/null +++ b/crypto_sign/falcon/falcon-512/clean/vrfy.c @@ -0,0 +1,853 @@ +#include "inner.h" + +/* + * Falcon signature verification. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + + +/* ===================================================================== */ +/* + * Constants for NTT. + * + * n = 2^logn (2 <= n <= 1024) + * phi = X^n + 1 + * q = 12289 + * q0i = -1/q mod 2^16 + * R = 2^16 mod q + * R2 = 2^32 mod q + */ + +#define Q 12289 +#define Q0I 12287 +#define R 4091 +#define R2 10952 + +/* + * Table for NTT, binary case: + * GMb[x] = R*(g^rev(x)) mod q + * where g = 7 (it is a 2048-th primitive root of 1 modulo q) + * and rev() is the bit-reversal function over 10 bits. + */ +static const uint16_t GMb[] = { + 4091, 7888, 11060, 11208, 6960, 4342, 6275, 9759, + 1591, 6399, 9477, 5266, 586, 5825, 7538, 9710, + 1134, 6407, 1711, 965, 7099, 7674, 3743, 6442, + 10414, 8100, 1885, 1688, 1364, 10329, 10164, 9180, + 12210, 6240, 997, 117, 4783, 4407, 1549, 7072, + 2829, 6458, 4431, 8877, 7144, 2564, 5664, 4042, + 12189, 432, 10751, 1237, 7610, 1534, 3983, 7863, + 2181, 6308, 8720, 6570, 4843, 1690, 14, 3872, + 5569, 9368, 12163, 2019, 7543, 2315, 4673, 7340, + 1553, 1156, 8401, 11389, 1020, 2967, 10772, 7045, + 3316, 11236, 5285, 11578, 10637, 10086, 9493, 6180, + 9277, 6130, 3323, 883, 10469, 489, 1502, 2851, + 11061, 9729, 2742, 12241, 4970, 10481, 10078, 1195, + 730, 1762, 3854, 2030, 5892, 10922, 9020, 5274, + 9179, 3604, 3782, 10206, 3180, 3467, 4668, 2446, + 7613, 9386, 834, 7703, 6836, 3403, 5351, 12276, + 3580, 1739, 10820, 9787, 10209, 4070, 12250, 8525, + 10401, 2749, 7338, 10574, 6040, 943, 9330, 1477, + 6865, 9668, 3585, 6633, 12145, 4063, 3684, 7680, + 8188, 6902, 3533, 9807, 6090, 727, 10099, 7003, + 6945, 1949, 9731, 10559, 6057, 378, 7871, 8763, + 8901, 9229, 8846, 4551, 9589, 11664, 7630, 8821, + 5680, 4956, 6251, 8388, 10156, 8723, 2341, 3159, + 1467, 5460, 8553, 7783, 2649, 2320, 9036, 6188, + 737, 3698, 4699, 5753, 9046, 3687, 16, 914, + 5186, 10531, 4552, 1964, 3509, 8436, 7516, 5381, + 10733, 3281, 7037, 1060, 2895, 7156, 8887, 5357, + 6409, 8197, 2962, 6375, 5064, 6634, 5625, 278, + 932, 10229, 8927, 7642, 351, 9298, 237, 5858, + 7692, 3146, 12126, 7586, 2053, 11285, 3802, 5204, + 4602, 1748, 11300, 340, 3711, 4614, 300, 10993, + 5070, 10049, 11616, 12247, 7421, 10707, 5746, 5654, + 3835, 5553, 1224, 8476, 9237, 3845, 250, 11209, + 4225, 6326, 9680, 12254, 4136, 2778, 692, 8808, + 6410, 6718, 10105, 10418, 3759, 7356, 11361, 8433, + 6437, 3652, 6342, 8978, 5391, 2272, 6476, 7416, + 8418, 10824, 11986, 5733, 876, 7030, 2167, 2436, + 3442, 9217, 8206, 4858, 5964, 2746, 7178, 1434, + 7389, 8879, 10661, 11457, 4220, 1432, 10832, 4328, + 8557, 1867, 9454, 2416, 3816, 9076, 686, 5393, + 2523, 4339, 6115, 619, 937, 2834, 7775, 3279, + 2363, 7488, 6112, 5056, 824, 10204, 11690, 1113, + 2727, 9848, 896, 2028, 5075, 2654, 10464, 7884, + 12169, 5434, 3070, 6400, 9132, 11672, 12153, 4520, + 1273, 9739, 11468, 9937, 10039, 9720, 2262, 9399, + 11192, 315, 4511, 1158, 6061, 6751, 11865, 357, + 7367, 4550, 983, 8534, 8352, 10126, 7530, 9253, + 4367, 5221, 3999, 8777, 3161, 6990, 4130, 11652, + 3374, 11477, 1753, 292, 8681, 2806, 10378, 12188, + 5800, 11811, 3181, 1988, 1024, 9340, 2477, 10928, + 4582, 6750, 3619, 5503, 5233, 2463, 8470, 7650, + 7964, 6395, 1071, 1272, 3474, 11045, 3291, 11344, + 8502, 9478, 9837, 1253, 1857, 6233, 4720, 11561, + 6034, 9817, 3339, 1797, 2879, 6242, 5200, 2114, + 7962, 9353, 11363, 5475, 6084, 9601, 4108, 7323, + 10438, 9471, 1271, 408, 6911, 3079, 360, 8276, + 11535, 9156, 9049, 11539, 850, 8617, 784, 7919, + 8334, 12170, 1846, 10213, 12184, 7827, 11903, 5600, + 9779, 1012, 721, 2784, 6676, 6552, 5348, 4424, + 6816, 8405, 9959, 5150, 2356, 5552, 5267, 1333, + 8801, 9661, 7308, 5788, 4910, 909, 11613, 4395, + 8238, 6686, 4302, 3044, 2285, 12249, 1963, 9216, + 4296, 11918, 695, 4371, 9793, 4884, 2411, 10230, + 2650, 841, 3890, 10231, 7248, 8505, 11196, 6688, + 4059, 6060, 3686, 4722, 11853, 5816, 7058, 6868, + 11137, 7926, 4894, 12284, 4102, 3908, 3610, 6525, + 7938, 7982, 11977, 6755, 537, 4562, 1623, 8227, + 11453, 7544, 906, 11816, 9548, 10858, 9703, 2815, + 11736, 6813, 6979, 819, 8903, 6271, 10843, 348, + 7514, 8339, 6439, 694, 852, 5659, 2781, 3716, + 11589, 3024, 1523, 8659, 4114, 10738, 3303, 5885, + 2978, 7289, 11884, 9123, 9323, 11830, 98, 2526, + 2116, 4131, 11407, 1844, 3645, 3916, 8133, 2224, + 10871, 8092, 9651, 5989, 7140, 8480, 1670, 159, + 10923, 4918, 128, 7312, 725, 9157, 5006, 6393, + 3494, 6043, 10972, 6181, 11838, 3423, 10514, 7668, + 3693, 6658, 6905, 11953, 10212, 11922, 9101, 8365, + 5110, 45, 2400, 1921, 4377, 2720, 1695, 51, + 2808, 650, 1896, 9997, 9971, 11980, 8098, 4833, + 4135, 4257, 5838, 4765, 10985, 11532, 590, 12198, + 482, 12173, 2006, 7064, 10018, 3912, 12016, 10519, + 11362, 6954, 2210, 284, 5413, 6601, 3865, 10339, + 11188, 6231, 517, 9564, 11281, 3863, 1210, 4604, + 8160, 11447, 153, 7204, 5763, 5089, 9248, 12154, + 11748, 1354, 6672, 179, 5532, 2646, 5941, 12185, + 862, 3158, 477, 7279, 5678, 7914, 4254, 302, + 2893, 10114, 6890, 9560, 9647, 11905, 4098, 9824, + 10269, 1353, 10715, 5325, 6254, 3951, 1807, 6449, + 5159, 1308, 8315, 3404, 1877, 1231, 112, 6398, + 11724, 12272, 7286, 1459, 12274, 9896, 3456, 800, + 1397, 10678, 103, 7420, 7976, 936, 764, 632, + 7996, 8223, 8445, 7758, 10870, 9571, 2508, 1946, + 6524, 10158, 1044, 4338, 2457, 3641, 1659, 4139, + 4688, 9733, 11148, 3946, 2082, 5261, 2036, 11850, + 7636, 12236, 5366, 2380, 1399, 7720, 2100, 3217, + 10912, 8898, 7578, 11995, 2791, 1215, 3355, 2711, + 2267, 2004, 8568, 10176, 3214, 2337, 1750, 4729, + 4997, 7415, 6315, 12044, 4374, 7157, 4844, 211, + 8003, 10159, 9290, 11481, 1735, 2336, 5793, 9875, + 8192, 986, 7527, 1401, 870, 3615, 8465, 2756, + 9770, 2034, 10168, 3264, 6132, 54, 2880, 4763, + 11805, 3074, 8286, 9428, 4881, 6933, 1090, 10038, + 2567, 708, 893, 6465, 4962, 10024, 2090, 5718, + 10743, 780, 4733, 4623, 2134, 2087, 4802, 884, + 5372, 5795, 5938, 4333, 6559, 7549, 5269, 10664, + 4252, 3260, 5917, 10814, 5768, 9983, 8096, 7791, + 6800, 7491, 6272, 1907, 10947, 6289, 11803, 6032, + 11449, 1171, 9201, 7933, 2479, 7970, 11337, 7062, + 8911, 6728, 6542, 8114, 8828, 6595, 3545, 4348, + 4610, 2205, 6999, 8106, 5560, 10390, 9321, 2499, + 2413, 7272, 6881, 10582, 9308, 9437, 3554, 3326, + 5991, 11969, 3415, 12283, 9838, 12063, 4332, 7830, + 11329, 6605, 12271, 2044, 11611, 7353, 11201, 11582, + 3733, 8943, 9978, 1627, 7168, 3935, 5050, 2762, + 7496, 10383, 755, 1654, 12053, 4952, 10134, 4394, + 6592, 7898, 7497, 8904, 12029, 3581, 10748, 5674, + 10358, 4901, 7414, 8771, 710, 6764, 8462, 7193, + 5371, 7274, 11084, 290, 7864, 6827, 11822, 2509, + 6578, 4026, 5807, 1458, 5721, 5762, 4178, 2105, + 11621, 4852, 8897, 2856, 11510, 9264, 2520, 8776, + 7011, 2647, 1898, 7039, 5950, 11163, 5488, 6277, + 9182, 11456, 633, 10046, 11554, 5633, 9587, 2333, + 7008, 7084, 5047, 7199, 9865, 8997, 569, 6390, + 10845, 9679, 8268, 11472, 4203, 1997, 2, 9331, + 162, 6182, 2000, 3649, 9792, 6363, 7557, 6187, + 8510, 9935, 5536, 9019, 3706, 12009, 1452, 3067, + 5494, 9692, 4865, 6019, 7106, 9610, 4588, 10165, + 6261, 5887, 2652, 10172, 1580, 10379, 4638, 9949 +}; + +/* + * Table for inverse NTT, binary case: + * iGMb[x] = R*((1/g)^rev(x)) mod q + * Since g = 7, 1/g = 8778 mod 12289. + */ +static const uint16_t iGMb[] = { + 4091, 4401, 1081, 1229, 2530, 6014, 7947, 5329, + 2579, 4751, 6464, 11703, 7023, 2812, 5890, 10698, + 3109, 2125, 1960, 10925, 10601, 10404, 4189, 1875, + 5847, 8546, 4615, 5190, 11324, 10578, 5882, 11155, + 8417, 12275, 10599, 7446, 5719, 3569, 5981, 10108, + 4426, 8306, 10755, 4679, 11052, 1538, 11857, 100, + 8247, 6625, 9725, 5145, 3412, 7858, 5831, 9460, + 5217, 10740, 7882, 7506, 12172, 11292, 6049, 79, + 13, 6938, 8886, 5453, 4586, 11455, 2903, 4676, + 9843, 7621, 8822, 9109, 2083, 8507, 8685, 3110, + 7015, 3269, 1367, 6397, 10259, 8435, 10527, 11559, + 11094, 2211, 1808, 7319, 48, 9547, 2560, 1228, + 9438, 10787, 11800, 1820, 11406, 8966, 6159, 3012, + 6109, 2796, 2203, 1652, 711, 7004, 1053, 8973, + 5244, 1517, 9322, 11269, 900, 3888, 11133, 10736, + 4949, 7616, 9974, 4746, 10270, 126, 2921, 6720, + 6635, 6543, 1582, 4868, 42, 673, 2240, 7219, + 1296, 11989, 7675, 8578, 11949, 989, 10541, 7687, + 7085, 8487, 1004, 10236, 4703, 163, 9143, 4597, + 6431, 12052, 2991, 11938, 4647, 3362, 2060, 11357, + 12011, 6664, 5655, 7225, 5914, 9327, 4092, 5880, + 6932, 3402, 5133, 9394, 11229, 5252, 9008, 1556, + 6908, 4773, 3853, 8780, 10325, 7737, 1758, 7103, + 11375, 12273, 8602, 3243, 6536, 7590, 8591, 11552, + 6101, 3253, 9969, 9640, 4506, 3736, 6829, 10822, + 9130, 9948, 3566, 2133, 3901, 6038, 7333, 6609, + 3468, 4659, 625, 2700, 7738, 3443, 3060, 3388, + 3526, 4418, 11911, 6232, 1730, 2558, 10340, 5344, + 5286, 2190, 11562, 6199, 2482, 8756, 5387, 4101, + 4609, 8605, 8226, 144, 5656, 8704, 2621, 5424, + 10812, 2959, 11346, 6249, 1715, 4951, 9540, 1888, + 3764, 39, 8219, 2080, 2502, 1469, 10550, 8709, + 5601, 1093, 3784, 5041, 2058, 8399, 11448, 9639, + 2059, 9878, 7405, 2496, 7918, 11594, 371, 7993, + 3073, 10326, 40, 10004, 9245, 7987, 5603, 4051, + 7894, 676, 11380, 7379, 6501, 4981, 2628, 3488, + 10956, 7022, 6737, 9933, 7139, 2330, 3884, 5473, + 7865, 6941, 5737, 5613, 9505, 11568, 11277, 2510, + 6689, 386, 4462, 105, 2076, 10443, 119, 3955, + 4370, 11505, 3672, 11439, 750, 3240, 3133, 754, + 4013, 11929, 9210, 5378, 11881, 11018, 2818, 1851, + 4966, 8181, 2688, 6205, 6814, 926, 2936, 4327, + 10175, 7089, 6047, 9410, 10492, 8950, 2472, 6255, + 728, 7569, 6056, 10432, 11036, 2452, 2811, 3787, + 945, 8998, 1244, 8815, 11017, 11218, 5894, 4325, + 4639, 3819, 9826, 7056, 6786, 8670, 5539, 7707, + 1361, 9812, 2949, 11265, 10301, 9108, 478, 6489, + 101, 1911, 9483, 3608, 11997, 10536, 812, 8915, + 637, 8159, 5299, 9128, 3512, 8290, 7068, 7922, + 3036, 4759, 2163, 3937, 3755, 11306, 7739, 4922, + 11932, 424, 5538, 6228, 11131, 7778, 11974, 1097, + 2890, 10027, 2569, 2250, 2352, 821, 2550, 11016, + 7769, 136, 617, 3157, 5889, 9219, 6855, 120, + 4405, 1825, 9635, 7214, 10261, 11393, 2441, 9562, + 11176, 599, 2085, 11465, 7233, 6177, 4801, 9926, + 9010, 4514, 9455, 11352, 11670, 6174, 7950, 9766, + 6896, 11603, 3213, 8473, 9873, 2835, 10422, 3732, + 7961, 1457, 10857, 8069, 832, 1628, 3410, 4900, + 10855, 5111, 9543, 6325, 7431, 4083, 3072, 8847, + 9853, 10122, 5259, 11413, 6556, 303, 1465, 3871, + 4873, 5813, 10017, 6898, 3311, 5947, 8637, 5852, + 3856, 928, 4933, 8530, 1871, 2184, 5571, 5879, + 3481, 11597, 9511, 8153, 35, 2609, 5963, 8064, + 1080, 12039, 8444, 3052, 3813, 11065, 6736, 8454, + 2340, 7651, 1910, 10709, 2117, 9637, 6402, 6028, + 2124, 7701, 2679, 5183, 6270, 7424, 2597, 6795, + 9222, 10837, 280, 8583, 3270, 6753, 2354, 3779, + 6102, 4732, 5926, 2497, 8640, 10289, 6107, 12127, + 2958, 12287, 10292, 8086, 817, 4021, 2610, 1444, + 5899, 11720, 3292, 2424, 5090, 7242, 5205, 5281, + 9956, 2702, 6656, 735, 2243, 11656, 833, 3107, + 6012, 6801, 1126, 6339, 5250, 10391, 9642, 5278, + 3513, 9769, 3025, 779, 9433, 3392, 7437, 668, + 10184, 8111, 6527, 6568, 10831, 6482, 8263, 5711, + 9780, 467, 5462, 4425, 11999, 1205, 5015, 6918, + 5096, 3827, 5525, 11579, 3518, 4875, 7388, 1931, + 6615, 1541, 8708, 260, 3385, 4792, 4391, 5697, + 7895, 2155, 7337, 236, 10635, 11534, 1906, 4793, + 9527, 7239, 8354, 5121, 10662, 2311, 3346, 8556, + 707, 1088, 4936, 678, 10245, 18, 5684, 960, + 4459, 7957, 226, 2451, 6, 8874, 320, 6298, + 8963, 8735, 2852, 2981, 1707, 5408, 5017, 9876, + 9790, 2968, 1899, 6729, 4183, 5290, 10084, 7679, + 7941, 8744, 5694, 3461, 4175, 5747, 5561, 3378, + 5227, 952, 4319, 9810, 4356, 3088, 11118, 840, + 6257, 486, 6000, 1342, 10382, 6017, 4798, 5489, + 4498, 4193, 2306, 6521, 1475, 6372, 9029, 8037, + 1625, 7020, 4740, 5730, 7956, 6351, 6494, 6917, + 11405, 7487, 10202, 10155, 7666, 7556, 11509, 1546, + 6571, 10199, 2265, 7327, 5824, 11396, 11581, 9722, + 2251, 11199, 5356, 7408, 2861, 4003, 9215, 484, + 7526, 9409, 12235, 6157, 9025, 2121, 10255, 2519, + 9533, 3824, 8674, 11419, 10888, 4762, 11303, 4097, + 2414, 6496, 9953, 10554, 808, 2999, 2130, 4286, + 12078, 7445, 5132, 7915, 245, 5974, 4874, 7292, + 7560, 10539, 9952, 9075, 2113, 3721, 10285, 10022, + 9578, 8934, 11074, 9498, 294, 4711, 3391, 1377, + 9072, 10189, 4569, 10890, 9909, 6923, 53, 4653, + 439, 10253, 7028, 10207, 8343, 1141, 2556, 7601, + 8150, 10630, 8648, 9832, 7951, 11245, 2131, 5765, + 10343, 9781, 2718, 1419, 4531, 3844, 4066, 4293, + 11657, 11525, 11353, 4313, 4869, 12186, 1611, 10892, + 11489, 8833, 2393, 15, 10830, 5003, 17, 565, + 5891, 12177, 11058, 10412, 8885, 3974, 10981, 7130, + 5840, 10482, 8338, 6035, 6964, 1574, 10936, 2020, + 2465, 8191, 384, 2642, 2729, 5399, 2175, 9396, + 11987, 8035, 4375, 6611, 5010, 11812, 9131, 11427, + 104, 6348, 9643, 6757, 12110, 5617, 10935, 541, + 135, 3041, 7200, 6526, 5085, 12136, 842, 4129, + 7685, 11079, 8426, 1008, 2725, 11772, 6058, 1101, + 1950, 8424, 5688, 6876, 12005, 10079, 5335, 927, + 1770, 273, 8377, 2271, 5225, 10283, 116, 11807, + 91, 11699, 757, 1304, 7524, 6451, 8032, 8154, + 7456, 4191, 309, 2318, 2292, 10393, 11639, 9481, + 12238, 10594, 9569, 7912, 10368, 9889, 12244, 7179, + 3924, 3188, 367, 2077, 336, 5384, 5631, 8596, + 4621, 1775, 8866, 451, 6108, 1317, 6246, 8795, + 5896, 7283, 3132, 11564, 4977, 12161, 7371, 1366, + 12130, 10619, 3809, 5149, 6300, 2638, 4197, 1418, + 10065, 4156, 8373, 8644, 10445, 882, 8158, 10173, + 9763, 12191, 459, 2966, 3166, 405, 5000, 9311, + 6404, 8986, 1551, 8175, 3630, 10766, 9265, 700, + 8573, 9508, 6630, 11437, 11595, 5850, 3950, 4775, + 11941, 1446, 6018, 3386, 11470, 5310, 5476, 553, + 9474, 2586, 1431, 2741, 473, 11383, 4745, 836, + 4062, 10666, 7727, 11752, 5534, 312, 4307, 4351, + 5764, 8679, 8381, 8187, 5, 7395, 4363, 1152, + 5421, 5231, 6473, 436, 7567, 8603, 6229, 8230 +}; + +/* + * Reduce a small signed integer modulo q. The source integer MUST + * be between -q/2 and +q/2. + */ +static inline uint32_t +mq_conv_small(int x) { + /* + * If x < 0, the cast to uint32_t will set the high bit to 1. + */ + uint32_t y; + + y = (uint32_t)x; + y += Q & -(y >> 31); + return y; +} + +/* + * Addition modulo q. Operands must be in the 0..q-1 range. + */ +static inline uint32_t +mq_add(uint32_t x, uint32_t y) { + /* + * We compute x + y - q. If the result is negative, then the + * high bit will be set, and 'd >> 31' will be equal to 1; + * thus '-(d >> 31)' will be an all-one pattern. Otherwise, + * it will be an all-zero pattern. In other words, this + * implements a conditional addition of q. + */ + uint32_t d; + + d = x + y - Q; + d += Q & -(d >> 31); + return d; +} + +/* + * Subtraction modulo q. Operands must be in the 0..q-1 range. + */ +static inline uint32_t +mq_sub(uint32_t x, uint32_t y) { + /* + * As in mq_add(), we use a conditional addition to ensure the + * result is in the 0..q-1 range. + */ + uint32_t d; + + d = x - y; + d += Q & -(d >> 31); + return d; +} + +/* + * Division by 2 modulo q. Operand must be in the 0..q-1 range. + */ +static inline uint32_t +mq_rshift1(uint32_t x) { + x += Q & -(x & 1); + return (x >> 1); +} + +/* + * Montgomery multiplication modulo q. If we set R = 2^16 mod q, then + * this function computes: x * y / R mod q + * Operands must be in the 0..q-1 range. + */ +static inline uint32_t +mq_montymul(uint32_t x, uint32_t y) { + uint32_t z, w; + + /* + * We compute x*y + k*q with a value of k chosen so that the 16 + * low bits of the result are 0. We can then shift the value. + * After the shift, result may still be larger than q, but it + * will be lower than 2*q, so a conditional subtraction works. + */ + + z = x * y; + w = ((z * Q0I) & 0xFFFF) * Q; + + /* + * When adding z and w, the result will have its low 16 bits + * equal to 0. Since x, y and z are lower than q, the sum will + * be no more than (2^15 - 1) * q + (q - 1)^2, which will + * fit on 29 bits. + */ + z = (z + w) >> 16; + + /* + * After the shift, analysis shows that the value will be less + * than 2q. We do a subtraction then conditional subtraction to + * ensure the result is in the expected range. + */ + z -= Q; + z += Q & -(z >> 31); + return z; +} + +/* + * Montgomery squaring (computes (x^2)/R). + */ +static inline uint32_t +mq_montysqr(uint32_t x) { + return mq_montymul(x, x); +} + +/* + * Divide x by y modulo q = 12289. + */ +static inline uint32_t +mq_div_12289(uint32_t x, uint32_t y) { + /* + * We invert y by computing y^(q-2) mod q. + * + * We use the following addition chain for exponent e = 12287: + * + * e0 = 1 + * e1 = 2 * e0 = 2 + * e2 = e1 + e0 = 3 + * e3 = e2 + e1 = 5 + * e4 = 2 * e3 = 10 + * e5 = 2 * e4 = 20 + * e6 = 2 * e5 = 40 + * e7 = 2 * e6 = 80 + * e8 = 2 * e7 = 160 + * e9 = e8 + e2 = 163 + * e10 = e9 + e8 = 323 + * e11 = 2 * e10 = 646 + * e12 = 2 * e11 = 1292 + * e13 = e12 + e9 = 1455 + * e14 = 2 * e13 = 2910 + * e15 = 2 * e14 = 5820 + * e16 = e15 + e10 = 6143 + * e17 = 2 * e16 = 12286 + * e18 = e17 + e0 = 12287 + * + * Additions on exponents are converted to Montgomery + * multiplications. We define all intermediate results as so + * many local variables, and let the C compiler work out which + * must be kept around. + */ + uint32_t y0, y1, y2, y3, y4, y5, y6, y7, y8, y9; + uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18; + + y0 = mq_montymul(y, R2); + y1 = mq_montysqr(y0); + y2 = mq_montymul(y1, y0); + y3 = mq_montymul(y2, y1); + y4 = mq_montysqr(y3); + y5 = mq_montysqr(y4); + y6 = mq_montysqr(y5); + y7 = mq_montysqr(y6); + y8 = mq_montysqr(y7); + y9 = mq_montymul(y8, y2); + y10 = mq_montymul(y9, y8); + y11 = mq_montysqr(y10); + y12 = mq_montysqr(y11); + y13 = mq_montymul(y12, y9); + y14 = mq_montysqr(y13); + y15 = mq_montysqr(y14); + y16 = mq_montymul(y15, y10); + y17 = mq_montysqr(y16); + y18 = mq_montymul(y17, y0); + + /* + * Final multiplication with x, which is not in Montgomery + * representation, computes the correct division result. + */ + return mq_montymul(y18, x); +} + +/* + * Compute NTT on a ring element. + */ +static void +mq_NTT(uint16_t *a, unsigned logn) { + size_t n, t, m; + + n = (size_t)1 << logn; + t = n; + for (m = 1; m < n; m <<= 1) { + size_t ht, i, j1; + + ht = t >> 1; + for (i = 0, j1 = 0; i < m; i ++, j1 += t) { + size_t j, j2; + uint32_t s; + + s = GMb[m + i]; + j2 = j1 + ht; + for (j = j1; j < j2; j ++) { + uint32_t u, v; + + u = a[j]; + v = mq_montymul(a[j + ht], s); + a[j] = (uint16_t)mq_add(u, v); + a[j + ht] = (uint16_t)mq_sub(u, v); + } + } + t = ht; + } +} + +/* + * Compute the inverse NTT on a ring element, binary case. + */ +static void +mq_iNTT(uint16_t *a, unsigned logn) { + size_t n, t, m; + uint32_t ni; + + n = (size_t)1 << logn; + t = 1; + m = n; + while (m > 1) { + size_t hm, dt, i, j1; + + hm = m >> 1; + dt = t << 1; + for (i = 0, j1 = 0; i < hm; i ++, j1 += dt) { + size_t j, j2; + uint32_t s; + + j2 = j1 + t; + s = iGMb[hm + i]; + for (j = j1; j < j2; j ++) { + uint32_t u, v, w; + + u = a[j]; + v = a[j + t]; + a[j] = (uint16_t)mq_add(u, v); + w = mq_sub(u, v); + a[j + t] = (uint16_t) + mq_montymul(w, s); + } + } + t = dt; + m = hm; + } + + /* + * To complete the inverse NTT, we must now divide all values by + * n (the vector size). We thus need the inverse of n, i.e. we + * need to divide 1 by 2 logn times. But we also want it in + * Montgomery representation, i.e. we also want to multiply it + * by R = 2^16. In the common case, this should be a simple right + * shift. The loop below is generic and works also in corner cases; + * its computation time is negligible. + */ + ni = R; + for (m = n; m > 1; m >>= 1) { + ni = mq_rshift1(ni); + } + for (m = 0; m < n; m ++) { + a[m] = (uint16_t)mq_montymul(a[m], ni); + } +} + +/* + * Convert a polynomial (mod q) to Montgomery representation. + */ +static void +mq_poly_tomonty(uint16_t *f, unsigned logn) { + size_t u, n; + + n = (size_t)1 << logn; + for (u = 0; u < n; u ++) { + f[u] = (uint16_t)mq_montymul(f[u], R2); + } +} + +/* + * Multiply two polynomials together (NTT representation, and using + * a Montgomery multiplication). Result f*g is written over f. + */ +static void +mq_poly_montymul_ntt(uint16_t *f, const uint16_t *g, unsigned logn) { + size_t u, n; + + n = (size_t)1 << logn; + for (u = 0; u < n; u ++) { + f[u] = (uint16_t)mq_montymul(f[u], g[u]); + } +} + +/* + * Subtract polynomial g from polynomial f. + */ +static void +mq_poly_sub(uint16_t *f, const uint16_t *g, unsigned logn) { + size_t u, n; + + n = (size_t)1 << logn; + for (u = 0; u < n; u ++) { + f[u] = (uint16_t)mq_sub(f[u], g[u]); + } +} + +/* ===================================================================== */ + +/* see inner.h */ +void +PQCLEAN_FALCON512_CLEAN_to_ntt_monty(uint16_t *h, unsigned logn) { + mq_NTT(h, logn); + mq_poly_tomonty(h, logn); +} + +/* see inner.h */ +int +PQCLEAN_FALCON512_CLEAN_verify_raw(const uint16_t *c0, const int16_t *s2, + const uint16_t *h, unsigned logn, uint8_t *tmp) { + size_t u, n; + uint16_t *tt; + + n = (size_t)1 << logn; + tt = (uint16_t *)tmp; + + /* + * Reduce s2 elements modulo q ([0..q-1] range). + */ + for (u = 0; u < n; u ++) { + uint32_t w; + + w = (uint32_t)s2[u]; + w += Q & -(w >> 31); + tt[u] = (uint16_t)w; + } + + /* + * Compute -s1 = s2*h - c0 mod phi mod q (in tt[]). + */ + mq_NTT(tt, logn); + mq_poly_montymul_ntt(tt, h, logn); + mq_iNTT(tt, logn); + mq_poly_sub(tt, c0, logn); + + /* + * Normalize -s1 elements into the [-q/2..q/2] range. + */ + for (u = 0; u < n; u ++) { + int32_t w; + + w = (int32_t)tt[u]; + w -= (int32_t)(Q & -(((Q >> 1) - (uint32_t)w) >> 31)); + ((int16_t *)tt)[u] = (int16_t)w; + } + + /* + * Signature is valid if and only if the aggregate (-s1,s2) vector + * is short enough. + */ + return PQCLEAN_FALCON512_CLEAN_is_short((int16_t *)tt, s2, logn); +} + +/* see inner.h */ +int +PQCLEAN_FALCON512_CLEAN_compute_public(uint16_t *h, + const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp) { + size_t u, n; + uint16_t *tt; + + n = (size_t)1 << logn; + tt = (uint16_t *)tmp; + for (u = 0; u < n; u ++) { + tt[u] = (uint16_t)mq_conv_small(f[u]); + h[u] = (uint16_t)mq_conv_small(g[u]); + } + mq_NTT(h, logn); + mq_NTT(tt, logn); + for (u = 0; u < n; u ++) { + if (tt[u] == 0) { + return 0; + } + h[u] = (uint16_t)mq_div_12289(h[u], tt[u]); + } + mq_iNTT(h, logn); + return 1; +} + +/* see inner.h */ +int +PQCLEAN_FALCON512_CLEAN_complete_private(int8_t *G, + const int8_t *f, const int8_t *g, const int8_t *F, + unsigned logn, uint8_t *tmp) { + size_t u, n; + uint16_t *t1, *t2; + + n = (size_t)1 << logn; + t1 = (uint16_t *)tmp; + t2 = t1 + n; + for (u = 0; u < n; u ++) { + t1[u] = (uint16_t)mq_conv_small(g[u]); + t2[u] = (uint16_t)mq_conv_small(F[u]); + } + mq_NTT(t1, logn); + mq_NTT(t2, logn); + mq_poly_tomonty(t1, logn); + mq_poly_montymul_ntt(t1, t2, logn); + for (u = 0; u < n; u ++) { + t2[u] = (uint16_t)mq_conv_small(f[u]); + } + mq_NTT(t2, logn); + for (u = 0; u < n; u ++) { + if (t2[u] == 0) { + return 0; + } + t1[u] = (uint16_t)mq_div_12289(t1[u], t2[u]); + } + mq_iNTT(t1, logn); + for (u = 0; u < n; u ++) { + uint32_t w; + int32_t gi; + + w = t1[u]; + w -= (Q & ~ -((w - (Q >> 1)) >> 31)); + gi = *(int32_t *)&w; + if (gi < -127 || gi > +127) { + return 0; + } + G[u] = (int8_t)gi; + } + return 1; +} + +/* see inner.h */ +int +PQCLEAN_FALCON512_CLEAN_is_invertible( + const int16_t *s2, unsigned logn, uint8_t *tmp) { + size_t u, n; + uint16_t *tt; + uint32_t r; + + n = (size_t)1 << logn; + tt = (uint16_t *)tmp; + for (u = 0; u < n; u ++) { + uint32_t w; + + w = (uint32_t)s2[u]; + w += Q & -(w >> 31); + tt[u] = (uint16_t)w; + } + mq_NTT(tt, logn); + r = 0; + for (u = 0; u < n; u ++) { + r |= (uint32_t)(tt[u] - 1); + } + return (int)(1u - (r >> 31)); +} + +/* see inner.h */ +int +PQCLEAN_FALCON512_CLEAN_verify_recover(uint16_t *h, + const uint16_t *c0, const int16_t *s1, const int16_t *s2, + unsigned logn, uint8_t *tmp) { + size_t u, n; + uint16_t *tt; + uint32_t r; + + n = (size_t)1 << logn; + + /* + * Reduce elements of s1 and s2 modulo q; then write s2 into tt[] + * and c0 - s1 into h[]. + */ + tt = (uint16_t *)tmp; + for (u = 0; u < n; u ++) { + uint32_t w; + + w = (uint32_t)s2[u]; + w += Q & -(w >> 31); + tt[u] = (uint16_t)w; + + w = (uint32_t)s1[u]; + w += Q & -(w >> 31); + w = mq_sub(c0[u], w); + h[u] = (uint16_t)w; + } + + /* + * Compute h = (c0 - s1) / s2. If one of the coefficients of s2 + * is zero (in NTT representation) then the operation fails. We + * keep that information into a flag so that we do not deviate + * from strict constant-time processing; if all coefficients of + * s2 are non-zero, then the high bit of r will be zero. + */ + mq_NTT(tt, logn); + mq_NTT(h, logn); + r = 0; + for (u = 0; u < n; u ++) { + r |= (uint32_t)(tt[u] - 1); + h[u] = (uint16_t)mq_div_12289(h[u], tt[u]); + } + mq_iNTT(h, logn); + + /* + * Signature is acceptable if and only if it is short enough, + * and s2 was invertible mod phi mod q. The caller must still + * check that the rebuilt public key matches the expected + * value (e.g. through a hash). + */ + r = ~r & (uint32_t) - PQCLEAN_FALCON512_CLEAN_is_short(s1, s2, logn); + return (int)(r >> 31); +} + +/* see inner.h */ +int +PQCLEAN_FALCON512_CLEAN_count_nttzero(const int16_t *sig, unsigned logn, uint8_t *tmp) { + uint16_t *s2; + size_t u, n; + uint32_t r; + + n = (size_t)1 << logn; + s2 = (uint16_t *)tmp; + for (u = 0; u < n; u ++) { + uint32_t w; + + w = (uint32_t)sig[u]; + w += Q & -(w >> 31); + s2[u] = (uint16_t)w; + } + mq_NTT(s2, logn); + r = 0; + for (u = 0; u < n; u ++) { + uint32_t w; + + w = (uint32_t)s2[u] - 1u; + r += (w >> 31); + } + return (int)r; +} diff --git a/crypto_sign/rainbow/rainbowI-classic/META.yml b/crypto_sign/rainbow/rainbowI-classic/META.yml new file mode 100644 index 00000000..8b46fdb9 --- /dev/null +++ b/crypto_sign/rainbow/rainbowI-classic/META.yml @@ -0,0 +1,20 @@ +name: "RAINBOW(16,36,32,32) - classic" +type: signature +claimed-nist-level: 1 +length-public-key: 161600 +length-secret-key: 103648 +length-signature: 66 +nistkat-sha256: 5cef855ed222382139f2fd91a84c3c651c5c4f8f59f5bb9cb3c8648b6ca34c52 +testvectors-sha256: 4896b97fee529f932396734f8bae1bd11ddf99d77586f7b96a7d87ada3a37ac2 +principal-submitters: + - Jintai Ding +auxiliary-submitters: + - Ming-Shing Chen + - Matthias Kannwischer + - Jacques Patarin + - Albrecht Petzoldt + - Dieter Schmidt + - Bo-Yin Yang +implementations: + - name: clean + version: https://github.com/fast-crypto-lab/rainbow-submission-round2/commit/173ada0e077e1b9dbd8e4a78994f87acc0c92263 diff --git a/crypto_sign/rainbow/rainbowI-classic/clean/LICENSE b/crypto_sign/rainbow/rainbowI-classic/clean/LICENSE new file mode 100644 index 00000000..cb00a6e3 --- /dev/null +++ b/crypto_sign/rainbow/rainbowI-classic/clean/LICENSE @@ -0,0 +1,8 @@ +`Software implementation of Rainbow for NIST R2 submission' by Ming-Shing Chen + +To the extent possible under law, the person who associated CC0 with +`Software implementation of Rainbow for NIST R2 submission' has waived all copyright and related or neighboring rights +to `Software implementation of Rainbow for NIST R2 submission'. + +You should have received a copy of the CC0 legalcode along with this +work. If not, see . diff --git a/crypto_sign/rainbow/rainbowI-classic/clean/Makefile.Microsoft_nmake b/crypto_sign/rainbow/rainbowI-classic/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..5091e3a1 --- /dev/null +++ b/crypto_sign/rainbow/rainbowI-classic/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=librainbowI-classic_clean.lib +OBJECTS = blas_comm.obj parallel_matrix_op.obj rainbow.obj rainbow_keypair.obj rainbow_keypair_computation.obj sign.obj utils_hash.obj utils_prng.obj blas.obj gf.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/rainbow/rainbowI-classic/clean/api.h b/crypto_sign/rainbow/rainbowI-classic/clean/api.h new file mode 100644 index 00000000..1b2bf5b1 --- /dev/null +++ b/crypto_sign/rainbow/rainbowI-classic/clean/api.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_RAINBOWICLASSIC_CLEAN_API_H +#define PQCLEAN_RAINBOWICLASSIC_CLEAN_API_H + +#include +#include + +#define PQCLEAN_RAINBOWICLASSIC_CLEAN_CRYPTO_SECRETKEYBYTES 103648 +#define PQCLEAN_RAINBOWICLASSIC_CLEAN_CRYPTO_PUBLICKEYBYTES 161600 +#define PQCLEAN_RAINBOWICLASSIC_CLEAN_CRYPTO_BYTES 66 +#define PQCLEAN_RAINBOWICLASSIC_CLEAN_CRYPTO_ALGNAME "RAINBOW(16,36,32,32) - classic" + +int PQCLEAN_RAINBOWICLASSIC_CLEAN_crypto_sign_keypair(uint8_t *pk, uint8_t *sk); + + +int PQCLEAN_RAINBOWICLASSIC_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +int PQCLEAN_RAINBOWICLASSIC_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +int PQCLEAN_RAINBOWICLASSIC_CLEAN_crypto_sign(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int PQCLEAN_RAINBOWICLASSIC_CLEAN_crypto_sign_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + + +#endif diff --git a/crypto_sign/rainbow/rainbowI-classic/clean/blas.c b/crypto_sign/rainbow/rainbowI-classic/clean/blas.c new file mode 100644 index 00000000..72f13dd0 --- /dev/null +++ b/crypto_sign/rainbow/rainbowI-classic/clean/blas.c @@ -0,0 +1,43 @@ +#include "blas.h" +#include "gf.h" + +#include + +void PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_predicated_add(uint8_t *accu_b, uint8_t predicate, const uint8_t *a, size_t _num_byte) { + uint8_t pr_u8 = (uint8_t) ((uint8_t) 0 - predicate); + for (size_t i = 0; i < _num_byte; i++) { + accu_b[i] ^= (a[i] & pr_u8); + } +} + +void PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_add(uint8_t *accu_b, const uint8_t *a, size_t _num_byte) { + for (size_t i = 0; i < _num_byte; i++) { + accu_b[i] ^= a[i]; + } +} + + +void PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_mul_scalar(uint8_t *a, uint8_t gf16_b, size_t _num_byte) { + uint8_t tmp; + for (size_t i = 0; i < _num_byte; i++) { + tmp = PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16_mul(a[i] & 0xF, gf16_b); + tmp |= (uint8_t) (PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16_mul(a[i] >> 4, gf16_b) << 4); + a[i] = tmp; + } +} + +void PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_madd(uint8_t *accu_c, const uint8_t *a, uint8_t gf16_b, size_t _num_byte) { + for (size_t i = 0; i < _num_byte; i++) { + accu_c[i] ^= PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16_mul(a[i] & 0xF, gf16_b); + accu_c[i] ^= (uint8_t) (PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16_mul(a[i] >> 4, gf16_b) << 4); + } +} + +uint8_t PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_dot(const uint8_t *a, const uint8_t *b, size_t _num_byte) { + uint8_t r = 0; + for (size_t i = 0; i < _num_byte; i++) { + r ^= PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16_mul(a[i], b[i]); + } + return r; +} + diff --git a/crypto_sign/rainbow/rainbowI-classic/clean/blas.h b/crypto_sign/rainbow/rainbowI-classic/clean/blas.h new file mode 100644 index 00000000..57201967 --- /dev/null +++ b/crypto_sign/rainbow/rainbowI-classic/clean/blas.h @@ -0,0 +1,20 @@ +#ifndef _BLAS_H_ +#define _BLAS_H_ +/// @file blas.h +/// @brief Functions for implementing basic linear algebra functions. +/// + +#include "rainbow_config.h" +#include +#include + +void PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_predicated_add(uint8_t *accu_b, uint8_t predicate, const uint8_t *a, size_t _num_byte); +void PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_add(uint8_t *accu_b, const uint8_t *a, size_t _num_byte); + + +void PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_madd(uint8_t *accu_c, const uint8_t *a, uint8_t gf16_b, size_t _num_byte); +void PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_mul_scalar(uint8_t *a, uint8_t gf16_b, size_t _num_byte); +uint8_t PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_dot(const uint8_t *a, const uint8_t *b, size_t _num_byte); + + +#endif // _BLAS_H_ diff --git a/crypto_sign/rainbow/rainbowI-classic/clean/blas_comm.c b/crypto_sign/rainbow/rainbowI-classic/clean/blas_comm.c new file mode 100644 index 00000000..931297b5 --- /dev/null +++ b/crypto_sign/rainbow/rainbowI-classic/clean/blas_comm.c @@ -0,0 +1,152 @@ +/// @file blas_comm.c +/// @brief The standard implementations for blas_comm.h +/// + +#include "blas_comm.h" +#include "blas.h" +#include "gf.h" +#include "rainbow_config.h" + +#include +#include + +void PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_set_zero(uint8_t *b, unsigned int _num_byte) { + for (size_t i = 0; i < _num_byte; i++) { + b[i] = 0; + } +} + +/// @brief get an element from GF(16) vector . +/// +/// @param[in] a - the input vector a. +/// @param[in] i - the index in the vector a. +/// @return the value of the element. +/// +uint8_t PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_get_ele(const uint8_t *a, unsigned int i) { + uint8_t r = a[i >> 1]; + uint8_t r0 = r & 0xf; + uint8_t r1 = r >> 4; + uint8_t m = (uint8_t)(-((int8_t)i & 1)); + return (uint8_t)((r1 & m) | ((~m) & r0)); +} + +/// @brief set an element for a GF(16) vector . +/// +/// @param[in,out] a - the vector a. +/// @param[in] i - the index in the vector a. +/// @param[in] v - the value for the i-th element in vector a. +/// @return the value of the element. +/// +static uint8_t PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_set_ele(uint8_t *a, unsigned int i, uint8_t v) { + uint8_t m = (uint8_t)(0xf ^ (-((int8_t)i & 1))); /// 1--> 0xf0 , 0--> 0x0f + uint8_t ai_remaining = (uint8_t)(a[i >> 1] & (~m)); /// erase + a[i >> 1] = (uint8_t)(ai_remaining | (m & (v << 4)) | (m & v & 0xf)); /// set + return v; +} + +static void gf16mat_prod_ref(uint8_t *c, const uint8_t *matA, unsigned int n_A_vec_byte, unsigned int n_A_width, const uint8_t *b) { + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_set_zero(c, n_A_vec_byte); + for (unsigned int i = 0; i < n_A_width; i++) { + uint8_t bb = PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_get_ele(b, i); + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_madd(c, matA, bb, n_A_vec_byte); + matA += n_A_vec_byte; + } +} + +void PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16mat_mul(uint8_t *c, const uint8_t *a, const uint8_t *b, unsigned int len_vec) { + unsigned int n_vec_byte = (len_vec + 1) / 2; + for (unsigned int k = 0; k < len_vec; k++) { + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_set_zero(c, n_vec_byte); + const uint8_t *bk = b + n_vec_byte * k; + for (unsigned int i = 0; i < len_vec; i++) { + uint8_t bb = PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_get_ele(bk, i); + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_madd(c, a + n_vec_byte * i, bb, n_vec_byte); + } + c += n_vec_byte; + } +} + +static unsigned int gf16mat_gauss_elim_ref(uint8_t *mat, unsigned int h, unsigned int w) { + unsigned int n_w_byte = (w + 1) / 2; + unsigned int r8 = 1; + for (unsigned int i = 0; i < h; i++) { + unsigned int offset_byte = i >> 1; + uint8_t *ai = mat + n_w_byte * i; + for (unsigned int j = i + 1; j < h; j++) { + uint8_t *aj = mat + n_w_byte * j; + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_predicated_add(ai + offset_byte, 1 ^ PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16_is_nonzero(PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_get_ele(ai, i)), aj + offset_byte, n_w_byte - offset_byte); + } + uint8_t pivot = PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_get_ele(ai, i); + r8 &= PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16_is_nonzero(pivot); + pivot = PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16_inv(pivot); + offset_byte = (i + 1) >> 1; + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_mul_scalar(ai + offset_byte, pivot, n_w_byte - offset_byte); + for (unsigned int j = 0; j < h; j++) { + if (i == j) { + continue; + } + uint8_t *aj = mat + n_w_byte * j; + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_madd(aj + offset_byte, ai + offset_byte, PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_get_ele(aj, i), n_w_byte - offset_byte); + } + } + return r8; +} + +static unsigned int gf16mat_solve_linear_eq_ref(uint8_t *sol, const uint8_t *inp_mat, const uint8_t *c_terms, unsigned int n) { + uint8_t mat[64 * 33]; + unsigned int n_byte = (n + 1) >> 1; + for (unsigned int i = 0; i < n; i++) { + memcpy(mat + i * (n_byte + 1), inp_mat + i * n_byte, n_byte); + mat[i * (n_byte + 1) + n_byte] = PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_get_ele(c_terms, i); + } + unsigned int r8 = PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16mat_gauss_elim(mat, n, n + 2); + for (unsigned int i = 0; i < n; i++) { + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_set_ele(sol, i, mat[i * (n_byte + 1) + n_byte]); + } + return r8; +} + +static inline void gf16mat_submat(uint8_t *mat2, unsigned int w2, unsigned int st, const uint8_t *mat, unsigned int w, unsigned int h) { + unsigned int n_byte_w1 = (w + 1) / 2; + unsigned int n_byte_w2 = (w2 + 1) / 2; + unsigned int st_2 = st / 2; + for (unsigned int i = 0; i < h; i++) { + for (unsigned int j = 0; j < n_byte_w2; j++) { + mat2[i * n_byte_w2 + j] = mat[i * n_byte_w1 + st_2 + j]; + } + } +} + +unsigned int PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16mat_inv(uint8_t *inv_a, const uint8_t *a, unsigned int H, uint8_t *buffer) { + unsigned int n_w_byte = (H + 1) / 2; + + uint8_t *aa = buffer; + for (unsigned int i = 0; i < H; i++) { + uint8_t *ai = aa + i * 2 * n_w_byte; + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_set_zero(ai, 2 * n_w_byte); + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_add(ai, a + i * n_w_byte, n_w_byte); + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_set_ele(ai + n_w_byte, i, 1); + } + unsigned int r8 = PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16mat_gauss_elim(aa, H, 2 * H); + gf16mat_submat(inv_a, H, H, aa, 2 * H, H); + return r8; +} + +// choosing the implementations depends on the macros _BLAS_AVX2_ and _BLAS_SSE + +#define gf16mat_prod_impl gf16mat_prod_ref +#define gf16mat_gauss_elim_impl gf16mat_gauss_elim_ref +#define gf16mat_solve_linear_eq_impl gf16mat_solve_linear_eq_ref + +void PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16mat_prod(uint8_t *c, const uint8_t *matA, unsigned int n_A_vec_byte, unsigned int n_A_width, const uint8_t *b) { + gf16mat_prod_impl(c, matA, n_A_vec_byte, n_A_width, b); +} + +unsigned int PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16mat_gauss_elim(uint8_t *mat, unsigned int h, unsigned int w) { + return gf16mat_gauss_elim_impl(mat, h, w); +} + +unsigned int PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16mat_solve_linear_eq(uint8_t *sol, const uint8_t *inp_mat, const uint8_t *c_terms, unsigned int n) { + return gf16mat_solve_linear_eq_impl(sol, inp_mat, c_terms, n); +} + diff --git a/crypto_sign/rainbow/rainbowI-classic/clean/blas_comm.h b/crypto_sign/rainbow/rainbowI-classic/clean/blas_comm.h new file mode 100644 index 00000000..4278b495 --- /dev/null +++ b/crypto_sign/rainbow/rainbowI-classic/clean/blas_comm.h @@ -0,0 +1,74 @@ +#ifndef _BLAS_COMM_H_ +#define _BLAS_COMM_H_ +/// @file blas_comm.h +/// @brief Common functions for linear algebra. +/// + +#include "rainbow_config.h" +#include + +/// @brief set a vector to 0. +/// +/// @param[in,out] b - the vector b. +/// @param[in] _num_byte - number of bytes for the vector b. +/// +void PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_set_zero(uint8_t *b, unsigned int _num_byte); + +/// @brief get an element from GF(16) vector . +/// +/// @param[in] a - the input vector a. +/// @param[in] i - the index in the vector a. +/// @return the value of the element. +/// +uint8_t PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_get_ele(const uint8_t *a, unsigned int i); + +/// @brief matrix-matrix multiplication: c = a * b , in GF(16) +/// +/// @param[out] c - the output matrix c +/// @param[in] c - a matrix a. +/// @param[in] b - a matrix b. +/// @param[in] len_vec - the length of column vectors. +/// +void PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16mat_mul(uint8_t *c, const uint8_t *a, const uint8_t *b, unsigned int len_vec); + +/// @brief Gauss elimination for a matrix, in GF(16) +/// +/// @param[in,out] mat - the matrix. +/// @param[in] h - the height of the matrix. +/// @param[in] w - the width of the matrix. +/// @return 1(true) if success. 0(false) if the matrix is singular. +/// +unsigned int PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16mat_gauss_elim(uint8_t *mat, unsigned int h, unsigned int w); + +/// @brief Solving linear equations, in GF(16) +/// +/// @param[out] sol - the solutions. +/// @param[in] inp_mat - the matrix parts of input equations. +/// @param[in] c_terms - the constant terms of the input equations. +/// @param[in] n - the number of equations. +/// @return 1(true) if success. 0(false) if the matrix is singular. +/// +unsigned int PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16mat_solve_linear_eq(uint8_t *sol, const uint8_t *inp_mat, const uint8_t *c_terms, unsigned int n); + +/// @brief Computing the inverse matrix, in GF(16) +/// +/// @param[out] inv_a - the output of matrix a. +/// @param[in] a - a matrix a. +/// @param[in] H - height of matrix a, i.e., matrix a is an HxH matrix. +/// @param[in] buffer - The buffer for computations. it has to be as large as 2 input matrixes. +/// @return 1(true) if success. 0(false) if the matrix is singular. +/// +unsigned int PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16mat_inv(uint8_t *inv_a, const uint8_t *a, unsigned int H, uint8_t *buffer); + +/// @brief matrix-vector multiplication: c = matA * b , in GF(16) +/// +/// @param[out] c - the output vector c +/// @param[in] matA - a column-major matrix A. +/// @param[in] n_A_vec_byte - the size of column vectors in bytes. +/// @param[in] n_A_width - the width of matrix A. +/// @param[in] b - the vector b. +/// +void PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16mat_prod(uint8_t *c, const uint8_t *matA, unsigned int n_A_vec_byte, unsigned int n_A_width, const uint8_t *b); + + +#endif // _BLAS_COMM_H_ diff --git a/crypto_sign/rainbow/rainbowI-classic/clean/gf.c b/crypto_sign/rainbow/rainbowI-classic/clean/gf.c new file mode 100644 index 00000000..486e0fdc --- /dev/null +++ b/crypto_sign/rainbow/rainbowI-classic/clean/gf.c @@ -0,0 +1,54 @@ +#include "gf.h" + +//// gf4 := gf2[x]/x^2+x+1 +static inline uint8_t gf4_mul_2(uint8_t a) { + uint8_t r = (uint8_t)(a << 1); + r ^= (uint8_t)((a >> 1) * 7); + return r; +} + +static inline uint8_t gf4_mul(uint8_t a, uint8_t b) { + uint8_t r = (uint8_t)(a * (b & 1)); + return r ^ (uint8_t)(gf4_mul_2(a) * (b >> 1)); +} + +static inline uint8_t gf4_squ(uint8_t a) { + return a ^ (a >> 1); +} + +//// gf16 := gf4[y]/y^2+y+x +uint8_t PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16_mul(uint8_t a, uint8_t b) { + uint8_t a0 = a & 3; + uint8_t a1 = (a >> 2); + uint8_t b0 = b & 3; + uint8_t b1 = (b >> 2); + uint8_t a0b0 = gf4_mul(a0, b0); + uint8_t a1b1 = gf4_mul(a1, b1); + uint8_t a0b1_a1b0 = gf4_mul(a0 ^ a1, b0 ^ b1) ^ a0b0 ^ a1b1; + uint8_t a1b1_x2 = gf4_mul_2(a1b1); + return (uint8_t)((a0b1_a1b0 ^ a1b1) << 2 ^ a0b0 ^ a1b1_x2); +} + +static inline uint8_t gf16_squ(uint8_t a) { + uint8_t a0 = a & 3; + uint8_t a1 = (a >> 2); + a1 = gf4_squ(a1); + uint8_t a1squ_x2 = gf4_mul_2(a1); + return (uint8_t)((a1 << 2) ^ a1squ_x2 ^ gf4_squ(a0)); +} + +uint8_t PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16_is_nonzero(uint8_t a) { + unsigned int a4 = a & 0xf; + unsigned int r = ((unsigned int)0) - a4; + r >>= 4; + return r & 1; +} + +uint8_t PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16_inv(uint8_t a) { + uint8_t a2 = gf16_squ(a); + uint8_t a4 = gf16_squ(a2); + uint8_t a8 = gf16_squ(a4); + uint8_t a6 = PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16_mul(a4, a2); + return PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16_mul(a8, a6); +} + diff --git a/crypto_sign/rainbow/rainbowI-classic/clean/gf.h b/crypto_sign/rainbow/rainbowI-classic/clean/gf.h new file mode 100644 index 00000000..367e03cd --- /dev/null +++ b/crypto_sign/rainbow/rainbowI-classic/clean/gf.h @@ -0,0 +1,18 @@ +#ifndef _GF16_H_ +#define _GF16_H_ + +#include "rainbow_config.h" +#include + +/// @file gf16.h +/// @brief Library for arithmetics in GF(16) and GF(256) +/// + +uint8_t PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16_mul(uint8_t a, uint8_t b); + + +uint8_t PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16_is_nonzero(uint8_t a); +uint8_t PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16_inv(uint8_t a); + + +#endif // _GF16_H_ diff --git a/crypto_sign/rainbow/rainbowI-classic/clean/parallel_matrix_op.c b/crypto_sign/rainbow/rainbowI-classic/clean/parallel_matrix_op.c new file mode 100644 index 00000000..91624d6a --- /dev/null +++ b/crypto_sign/rainbow/rainbowI-classic/clean/parallel_matrix_op.c @@ -0,0 +1,182 @@ +/// @file parallel_matrix_op.c +/// @brief the standard implementations for functions in parallel_matrix_op.h +/// +/// the standard implementations for functions in parallel_matrix_op.h +/// + +#include "parallel_matrix_op.h" +#include "blas.h" +#include "blas_comm.h" + +/// +/// @brief Calculate the corresponding index in an array for an upper-triangle(UT) matrix. +/// +/// @param[in] i_row - the i-th row in an upper-triangle matrix. +/// @param[in] j_col - the j-th column in an upper-triangle matrix. +/// @param[in] dim - the dimension of the upper-triangle matrix, i.e., an dim x dim matrix. +/// @return the corresponding index in an array storage. +/// +unsigned int PQCLEAN_RAINBOWICLASSIC_CLEAN_idx_of_trimat(unsigned int i_row, unsigned int j_col, unsigned int dim) { + return (dim + dim - i_row + 1) * i_row / 2 + j_col - i_row; +} + +/// +/// @brief Calculate the corresponding index in an array for an upper-triangle or lower-triangle matrix. +/// +/// @param[in] i_row - the i-th row in a triangle matrix. +/// @param[in] j_col - the j-th column in a triangle matrix. +/// @param[in] dim - the dimension of the triangle matrix, i.e., an dim x dim matrix. +/// @return the corresponding index in an array storage. +/// +static inline unsigned int idx_of_2trimat(unsigned int i_row, unsigned int j_col, unsigned int n_var) { + if (i_row > j_col) { + return PQCLEAN_RAINBOWICLASSIC_CLEAN_idx_of_trimat(j_col, i_row, n_var); + } + return PQCLEAN_RAINBOWICLASSIC_CLEAN_idx_of_trimat(i_row, j_col, n_var); +} + +void PQCLEAN_RAINBOWICLASSIC_CLEAN_UpperTrianglize(unsigned char *btriC, const unsigned char *bA, unsigned int Awidth, unsigned int size_batch) { + unsigned char *runningC = btriC; + unsigned int Aheight = Awidth; + for (unsigned int i = 0; i < Aheight; i++) { + for (unsigned int j = 0; j < i; j++) { + unsigned int idx = PQCLEAN_RAINBOWICLASSIC_CLEAN_idx_of_trimat(j, i, Aheight); + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_add(btriC + idx * size_batch, bA + size_batch * (i * Awidth + j), size_batch); + } + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_add(runningC, bA + size_batch * (i * Awidth + i), size_batch * (Aheight - i)); + runningC += size_batch * (Aheight - i); + } +} + +void PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_trimat_madd_gf16(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch) { + unsigned int Awidth = Bheight; + unsigned int Aheight = Awidth; + for (unsigned int i = 0; i < Aheight; i++) { + for (unsigned int j = 0; j < Bwidth; j++) { + for (unsigned int k = 0; k < Bheight; k++) { + if (k < i) { + continue; + } + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_madd(bC, &btriA[(k - i) * size_batch], PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_get_ele(&B[j * size_Bcolvec], k), size_batch); + } + bC += size_batch; + } + btriA += (Aheight - i) * size_batch; + } +} + +void PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_trimatTr_madd_gf16(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch) { + unsigned int Aheight = Bheight; + for (unsigned int i = 0; i < Aheight; i++) { + for (unsigned int j = 0; j < Bwidth; j++) { + for (unsigned int k = 0; k < Bheight; k++) { + if (i < k) { + continue; + } + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_madd(bC, &btriA[size_batch * (PQCLEAN_RAINBOWICLASSIC_CLEAN_idx_of_trimat(k, i, Aheight))], PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_get_ele(&B[j * size_Bcolvec], k), size_batch); + } + bC += size_batch; + } + } +} + +void PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_2trimat_madd_gf16(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch) { + unsigned int Aheight = Bheight; + for (unsigned int i = 0; i < Aheight; i++) { + for (unsigned int j = 0; j < Bwidth; j++) { + for (unsigned int k = 0; k < Bheight; k++) { + if (i == k) { + continue; + } + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_madd(bC, &btriA[size_batch * (idx_of_2trimat(i, k, Aheight))], PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_get_ele(&B[j * size_Bcolvec], k), size_batch); + } + bC += size_batch; + } + } +} + +void PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_matTr_madd_gf16(unsigned char *bC, const unsigned char *A_to_tr, unsigned int Aheight, unsigned int size_Acolvec, unsigned int Awidth, + const unsigned char *bB, unsigned int Bwidth, unsigned int size_batch) { + unsigned int Atr_height = Awidth; + unsigned int Atr_width = Aheight; + for (unsigned int i = 0; i < Atr_height; i++) { + for (unsigned int j = 0; j < Atr_width; j++) { + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_madd(bC, &bB[j * Bwidth * size_batch], PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_get_ele(&A_to_tr[size_Acolvec * i], j), size_batch * Bwidth); + } + bC += size_batch * Bwidth; + } +} + +void PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_bmatTr_madd_gf16(unsigned char *bC, const unsigned char *bA_to_tr, unsigned int Awidth_before_tr, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch) { + const unsigned char *bA = bA_to_tr; + unsigned int Aheight = Awidth_before_tr; + for (unsigned int i = 0; i < Aheight; i++) { + for (unsigned int j = 0; j < Bwidth; j++) { + for (unsigned int k = 0; k < Bheight; k++) { + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_madd(bC, &bA[size_batch * (i + k * Aheight)], PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_get_ele(&B[j * size_Bcolvec], k), size_batch); + } + bC += size_batch; + } + } +} + +void PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_mat_madd_gf16(unsigned char *bC, const unsigned char *bA, unsigned int Aheight, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch) { + unsigned int Awidth = Bheight; + for (unsigned int i = 0; i < Aheight; i++) { + for (unsigned int j = 0; j < Bwidth; j++) { + for (unsigned int k = 0; k < Bheight; k++) { + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_madd(bC, &bA[k * size_batch], PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_get_ele(&B[j * size_Bcolvec], k), size_batch); + } + bC += size_batch; + } + bA += (Awidth) * size_batch; + } +} + +void PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_quad_recmat_eval_gf16(unsigned char *z, const unsigned char *y, unsigned int dim_y, const unsigned char *mat, + const unsigned char *x, unsigned int dim_x, unsigned int size_batch) { + unsigned char tmp[128]; + + unsigned char _x[128]; + for (unsigned int i = 0; i < dim_x; i++) { + _x[i] = PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_get_ele(x, i); + } + unsigned char _y[128]; + for (unsigned int i = 0; i < dim_y; i++) { + _y[i] = PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_get_ele(y, i); + } + + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_set_zero(z, size_batch); + for (unsigned int i = 0; i < dim_y; i++) { + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_set_zero(tmp, size_batch); + for (unsigned int j = 0; j < dim_x; j++) { + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_madd(tmp, mat, _x[j], size_batch); + mat += size_batch; + } + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_madd(z, tmp, _y[i], size_batch); + } +} + +void PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_quad_trimat_eval_gf16(unsigned char *y, const unsigned char *trimat, const unsigned char *x, unsigned int dim, unsigned int size_batch) { + unsigned char tmp[256]; + + unsigned char _x[256]; + for (unsigned int i = 0; i < dim; i++) { + _x[i] = PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_get_ele(x, i); + } + + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_set_zero(y, size_batch); + for (unsigned int i = 0; i < dim; i++) { + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_set_zero(tmp, size_batch); + for (unsigned int j = i; j < dim; j++) { + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_madd(tmp, trimat, _x[j], size_batch); + trimat += size_batch; + } + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_madd(y, tmp, _x[i], size_batch); + } +} diff --git a/crypto_sign/rainbow/rainbowI-classic/clean/parallel_matrix_op.h b/crypto_sign/rainbow/rainbowI-classic/clean/parallel_matrix_op.h new file mode 100644 index 00000000..52f596e5 --- /dev/null +++ b/crypto_sign/rainbow/rainbowI-classic/clean/parallel_matrix_op.h @@ -0,0 +1,260 @@ +#ifndef _P_MATRIX_OP_H_ +#define _P_MATRIX_OP_H_ +/// @file parallel_matrix_op.h +/// @brief Librarys for operations of batched matrixes. +/// +/// + +//////////////// Section: triangle matrix <-> rectangle matrix /////////////////////////////////// + +/// +/// @brief Calculate the corresponding index in an array for an upper-triangle(UT) matrix. +/// +/// @param[in] i_row - the i-th row in an upper-triangle matrix. +/// @param[in] j_col - the j-th column in an upper-triangle matrix. +/// @param[in] dim - the dimension of the upper-triangle matrix, i.e., an dim x dim matrix. +/// @return the corresponding index in an array storage. +/// +unsigned int PQCLEAN_RAINBOWICLASSIC_CLEAN_idx_of_trimat(unsigned int i_row, unsigned int j_col, unsigned int dim); + +/// +/// @brief Upper trianglize a rectangle matrix to the corresponding upper-trangle matrix. +/// +/// @param[out] btriC - the batched upper-trianglized matrix C. +/// @param[in] bA - a batched retangle matrix A. +/// @param[in] bwidth - the width of the batched matrix A, i.e., A is a Awidth x Awidth matrix. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWICLASSIC_CLEAN_UpperTrianglize(unsigned char *btriC, const unsigned char *bA, unsigned int Awidth, unsigned int size_batch); + +//////////////////// Section: matrix multiplications /////////////////////////////// + +/// +/// @brief bC += btriA * B , in GF(16) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] btriA - a batched UT matrix A. +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_trimat_madd_gf16(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += btriA * B , in GF(256) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] btriA - a batched UT matrix A. +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_trimat_madd_gf256(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += btriA^Tr * B , in GF(16) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] btriA - a batched UT matrix A. A will be transposed while multiplying. +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_trimatTr_madd_gf16(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += btriA^Tr * B , in GF(256) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] btriA - a batched UT matrix A, which will be transposed while multiplying. +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_trimatTr_madd_gf256(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += (btriA + btriA^Tr) *B , in GF(16) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] btriA - a batched UT matrix A. The operand for multiplication is (btriA + btriA^Tr). +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_2trimat_madd_gf16(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += (btriA + btriA^Tr) *B , in GF(256) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] btriA - a batched UT matrix A. The operand for multiplication is (btriA + btriA^Tr). +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_2trimat_madd_gf256(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += A^Tr * bB , in GF(16) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] A_to_tr - a column-major matrix A. The operand for multiplication is A^Tr. +/// @param[in] Aheight - the height of A. +/// @param[in] size_Acolvec - the size of a column vector in A. +/// @param[in] Awidth - the width of A. +/// @param[in] bB - a batched matrix B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_matTr_madd_gf16(unsigned char *bC, + const unsigned char *A_to_tr, unsigned int Aheight, unsigned int size_Acolvec, unsigned int Awidth, + const unsigned char *bB, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += A^Tr * bB , in GF(256) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] A_to_tr - a column-major matrix A. The operand for multiplication is A^Tr. +/// @param[in] Aheight - the height of A. +/// @param[in] size_Acolvec - the size of a column vector in A. +/// @param[in] Awidth - the width of A. +/// @param[in] bB - a batched matrix B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_matTr_madd_gf256(unsigned char *bC, + const unsigned char *A_to_tr, unsigned int Aheight, unsigned int size_Acolvec, unsigned int Awidth, + const unsigned char *bB, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += bA^Tr * B , in GF(16) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] bA_to_tr - a batched matrix A. The operand for multiplication is (bA^Tr). +/// @param[in] Awidth_befor_tr - the width of A. +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_bmatTr_madd_gf16(unsigned char *bC, const unsigned char *bA_to_tr, unsigned int Awidth_before_tr, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += bA^Tr * B , in GF(256) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] bA_to_tr - a batched matrix A. The operand for multiplication is (bA^Tr). +/// @param[in] Awidth_befor_tr - the width of A. +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_bmatTr_madd_gf256(unsigned char *bC, const unsigned char *bA_to_tr, unsigned int Awidth_before_tr, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += bA * B , in GF(16) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] bA - a batched matrix A. +/// @param[in] Aheigh - the height of A. +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_mat_madd_gf16(unsigned char *bC, const unsigned char *bA, unsigned int Aheight, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += bA * B , in GF(256) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] bA - a batched matrix A. +/// @param[in] Aheigh - the height of A. +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_mat_madd_gf256(unsigned char *bC, const unsigned char *bA, unsigned int Aheight, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +//////////////////// Section: "quadratric" matrix evaluation /////////////////////////////// + +/// +/// @brief y = x^Tr * trimat * x , in GF(16) +/// +/// @param[out] y - the returned batched element y. +/// @param[in] trimat - a batched matrix. +/// @param[in] x - an input vector x. +/// @param[in] dim - the dimension of matrix trimat (and x). +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_quad_trimat_eval_gf16(unsigned char *y, const unsigned char *trimat, const unsigned char *x, unsigned int dim, unsigned int size_batch); + +/// +/// @brief y = x^Tr * trimat * x , in GF(256) +/// +/// @param[out] y - the returned batched element y. +/// @param[in] trimat - a batched matrix. +/// @param[in] x - an input vector x. +/// @param[in] dim - the dimension of matrix trimat (and x). +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_quad_trimat_eval_gf256(unsigned char *y, const unsigned char *trimat, const unsigned char *x, unsigned int dim, unsigned int size_batch); + +/// +/// @brief z = y^Tr * mat * x , in GF(16) +/// +/// @param[out] z - the returned batched element z. +/// @param[in] y - an input vector y. +/// @param[in] dim_y - the length of y. +/// @param[in] mat - a batched matrix. +/// @param[in] x - an input vector x. +/// @param[in] dim_x - the length of x. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_quad_recmat_eval_gf16(unsigned char *z, const unsigned char *y, unsigned int dim_y, + const unsigned char *mat, const unsigned char *x, unsigned int dim_x, unsigned int size_batch); + +/// +/// @brief z = y^Tr * mat * x , in GF(256) +/// +/// @param[out] z - the returned batched element z. +/// @param[in] y - an input vector y. +/// @param[in] dim_y - the length of y. +/// @param[in] mat - a batched matrix. +/// @param[in] x - an input vector x. +/// @param[in] dim_x - the length of x. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_quad_recmat_eval_gf256(unsigned char *z, const unsigned char *y, unsigned int dim_y, + const unsigned char *mat, const unsigned char *x, unsigned int dim_x, unsigned int size_batch); + +#endif // _P_MATRIX_OP_H_ diff --git a/crypto_sign/rainbow/rainbowI-classic/clean/rainbow.c b/crypto_sign/rainbow/rainbowI-classic/clean/rainbow.c new file mode 100644 index 00000000..05add91c --- /dev/null +++ b/crypto_sign/rainbow/rainbowI-classic/clean/rainbow.c @@ -0,0 +1,168 @@ +/// @file rainbow.c +/// @brief The standard implementations for functions in rainbow.h +/// + +#include "blas.h" +#include "parallel_matrix_op.h" +#include "rainbow.h" +#include "rainbow_blas.h" +#include "rainbow_config.h" +#include "rainbow_keypair.h" +#include "utils_hash.h" +#include "utils_prng.h" +#include +#include +#include + +#define MAX_ATTEMPT_FRMAT 128 + +int PQCLEAN_RAINBOWICLASSIC_CLEAN_rainbow_sign(uint8_t *signature, const sk_t *sk, const uint8_t *_digest) { + uint8_t mat_l1[_O1 * _O1_BYTE]; + uint8_t mat_l2[_O2 * _O2_BYTE]; + uint8_t mat_buffer[2 * _MAX_O * _MAX_O_BYTE]; + + // setup PRNG + prng_t prng_sign; + uint8_t prng_preseed[LEN_SKSEED + _HASH_LEN]; + memcpy(prng_preseed, sk->sk_seed, LEN_SKSEED); + memcpy(prng_preseed + LEN_SKSEED, _digest, _HASH_LEN); // prng_preseed = sk_seed || digest + uint8_t prng_seed[_HASH_LEN]; + PQCLEAN_RAINBOWICLASSIC_CLEAN_hash_msg(prng_seed, _HASH_LEN, prng_preseed, _HASH_LEN + LEN_SKSEED); + PQCLEAN_RAINBOWICLASSIC_CLEAN_prng_set(&prng_sign, prng_seed, _HASH_LEN); // seed = H( sk_seed || digest ) + for (unsigned int i = 0; i < LEN_SKSEED + _HASH_LEN; i++) { + prng_preseed[i] ^= prng_preseed[i]; // clean + } + for (unsigned int i = 0; i < _HASH_LEN; i++) { + prng_seed[i] ^= prng_seed[i]; // clean + } + + // roll vinegars. + uint8_t vinegar[_V1_BYTE]; + unsigned int n_attempt = 0; + unsigned int l1_succ = 0; + while (!l1_succ) { + if (MAX_ATTEMPT_FRMAT <= n_attempt) { + break; + } + PQCLEAN_RAINBOWICLASSIC_CLEAN_prng_gen(&prng_sign, vinegar, _V1_BYTE); // generating vinegars + gfmat_prod(mat_l1, sk->l1_F2, _O1 * _O1_BYTE, _V1, vinegar); // generating the linear equations for layer 1 + l1_succ = gfmat_inv(mat_l1, mat_l1, _O1, mat_buffer); // check if the linear equation solvable + n_attempt++; + } + + // Given the vinegars, pre-compute variables needed for layer 2 + uint8_t r_l1_F1[_O1_BYTE] = {0}; + uint8_t r_l2_F1[_O2_BYTE] = {0}; + batch_quad_trimat_eval(r_l1_F1, sk->l1_F1, vinegar, _V1, _O1_BYTE); + batch_quad_trimat_eval(r_l2_F1, sk->l2_F1, vinegar, _V1, _O2_BYTE); + uint8_t mat_l2_F3[_O2 * _O2_BYTE]; + uint8_t mat_l2_F2[_O1 * _O2_BYTE]; + gfmat_prod(mat_l2_F3, sk->l2_F3, _O2 * _O2_BYTE, _V1, vinegar); + gfmat_prod(mat_l2_F2, sk->l2_F2, _O1 * _O2_BYTE, _V1, vinegar); + + // Some local variables. + uint8_t _z[_PUB_M_BYTE]; + uint8_t y[_PUB_M_BYTE]; + uint8_t *x_v1 = vinegar; + uint8_t x_o1[_O1_BYTE]; + uint8_t x_o2[_O2_BYTE]; + + uint8_t digest_salt[_HASH_LEN + _SALT_BYTE]; + memcpy(digest_salt, _digest, _HASH_LEN); + uint8_t *salt = digest_salt + _HASH_LEN; + + uint8_t temp_o[_MAX_O_BYTE + 32] = {0}; + unsigned int succ = 0; + while (!succ) { + if (MAX_ATTEMPT_FRMAT <= n_attempt) { + break; + } + // The computation: H(digest||salt) --> z --S--> y --C-map--> x --T--> w + + PQCLEAN_RAINBOWICLASSIC_CLEAN_prng_gen(&prng_sign, salt, _SALT_BYTE); // roll the salt + PQCLEAN_RAINBOWICLASSIC_CLEAN_hash_msg(_z, _PUB_M_BYTE, digest_salt, _HASH_LEN + _SALT_BYTE); // H(digest||salt) + + // y = S^-1 * z + memcpy(y, _z, _PUB_M_BYTE); // identity part of S + gfmat_prod(temp_o, sk->s1, _O1_BYTE, _O2, _z + _O1_BYTE); + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_add(y, temp_o, _O1_BYTE); + + // Central Map: + // layer 1: calculate x_o1 + memcpy(temp_o, r_l1_F1, _O1_BYTE); + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_add(temp_o, y, _O1_BYTE); + gfmat_prod(x_o1, mat_l1, _O1_BYTE, _O1, temp_o); + + // layer 2: calculate x_o2 + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_set_zero(temp_o, _O2_BYTE); + gfmat_prod(temp_o, mat_l2_F2, _O2_BYTE, _O1, x_o1); // F2 + batch_quad_trimat_eval(mat_l2, sk->l2_F5, x_o1, _O1, _O2_BYTE); // F5 + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_add(temp_o, mat_l2, _O2_BYTE); + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_add(temp_o, r_l2_F1, _O2_BYTE); // F1 + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_add(temp_o, y + _O1_BYTE, _O2_BYTE); + + // generate the linear equations of the 2nd layer + gfmat_prod(mat_l2, sk->l2_F6, _O2 * _O2_BYTE, _O1, x_o1); // F6 + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_add(mat_l2, mat_l2_F3, _O2 * _O2_BYTE); // F3 + succ = gfmat_inv(mat_l2, mat_l2, _O2, mat_buffer); + gfmat_prod(x_o2, mat_l2, _O2_BYTE, _O2, temp_o); // solve l2 eqs + + n_attempt++; + }; + // w = T^-1 * y + uint8_t w[_PUB_N_BYTE]; + // identity part of T. + memcpy(w, x_v1, _V1_BYTE); + memcpy(w + _V1_BYTE, x_o1, _O1_BYTE); + memcpy(w + _V2_BYTE, x_o2, _O2_BYTE); + // Computing the t1 part. + gfmat_prod(y, sk->t1, _V1_BYTE, _O1, x_o1); + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_add(w, y, _V1_BYTE); + // Computing the t4 part. + gfmat_prod(y, sk->t4, _V1_BYTE, _O2, x_o2); + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_add(w, y, _V1_BYTE); + // Computing the t3 part. + gfmat_prod(y, sk->t3, _O1_BYTE, _O2, x_o2); + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_add(w + _V1_BYTE, y, _O1_BYTE); + + memset(signature, 0, _SIGNATURE_BYTE); // set the output 0 + // clean + memset(&prng_sign, 0, sizeof(prng_t)); + memset(vinegar, 0, _V1_BYTE); + memset(r_l1_F1, 0, _O1_BYTE); + memset(r_l2_F1, 0, _O2_BYTE); + memset(_z, 0, _PUB_M_BYTE); + memset(y, 0, _PUB_M_BYTE); + memset(x_o1, 0, _O1_BYTE); + memset(x_o2, 0, _O2_BYTE); + memset(temp_o, 0, sizeof(temp_o)); + + // return: copy w and salt to the signature. + if (MAX_ATTEMPT_FRMAT <= n_attempt) { + return -1; + } + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_add(signature, w, _PUB_N_BYTE); + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_add(signature + _PUB_N_BYTE, salt, _SALT_BYTE); + return 0; +} + +int PQCLEAN_RAINBOWICLASSIC_CLEAN_rainbow_verify(const uint8_t *digest, const uint8_t *signature, const pk_t *pk) { + unsigned char digest_ck[_PUB_M_BYTE]; + // public_map( digest_ck , pk , signature ); Evaluating the quadratic public polynomials. + batch_quad_trimat_eval(digest_ck, pk->pk, signature, _PUB_N, _PUB_M_BYTE); + + unsigned char correct[_PUB_M_BYTE]; + unsigned char digest_salt[_HASH_LEN + _SALT_BYTE]; + memcpy(digest_salt, digest, _HASH_LEN); + memcpy(digest_salt + _HASH_LEN, signature + _PUB_N_BYTE, _SALT_BYTE); + PQCLEAN_RAINBOWICLASSIC_CLEAN_hash_msg(correct, _PUB_M_BYTE, digest_salt, _HASH_LEN + _SALT_BYTE); // H( digest || salt ) + + // check consistancy. + unsigned char cc = 0; + for (unsigned int i = 0; i < _PUB_M_BYTE; i++) { + cc |= (digest_ck[i] ^ correct[i]); + } + return (0 == cc) ? 0 : -1; +} + + diff --git a/crypto_sign/rainbow/rainbowI-classic/clean/rainbow.h b/crypto_sign/rainbow/rainbowI-classic/clean/rainbow.h new file mode 100644 index 00000000..cf7f2b4c --- /dev/null +++ b/crypto_sign/rainbow/rainbowI-classic/clean/rainbow.h @@ -0,0 +1,33 @@ +#ifndef _RAINBOW_H_ +#define _RAINBOW_H_ +/// @file rainbow.h +/// @brief APIs for rainbow. +/// + +#include "rainbow_config.h" +#include "rainbow_keypair.h" + +#include + +/// +/// @brief Signing function for classical secret key. +/// +/// @param[out] signature - the signature. +/// @param[in] sk - the secret key. +/// @param[in] digest - the digest. +/// +int PQCLEAN_RAINBOWICLASSIC_CLEAN_rainbow_sign(uint8_t *signature, const sk_t *sk, const uint8_t *digest); + +/// +/// @brief Verifying function. +/// +/// @param[in] digest - the digest. +/// @param[in] signature - the signature. +/// @param[in] pk - the public key. +/// @return 0 for successful verified. -1 for failed verification. +/// +int PQCLEAN_RAINBOWICLASSIC_CLEAN_rainbow_verify(const uint8_t *digest, const uint8_t *signature, const pk_t *pk); + + + +#endif // _RAINBOW_H_ diff --git a/crypto_sign/rainbow/rainbowI-classic/clean/rainbow_blas.h b/crypto_sign/rainbow/rainbowI-classic/clean/rainbow_blas.h new file mode 100644 index 00000000..99fc5be4 --- /dev/null +++ b/crypto_sign/rainbow/rainbowI-classic/clean/rainbow_blas.h @@ -0,0 +1,32 @@ +#ifndef _RAINBOW_BLAS_H_ +#define _RAINBOW_BLAS_H_ +/// @file rainbow_blas.h +/// @brief Defining the functions used in rainbow.c acconding to the definitions in rainbow_config.h +/// +/// Defining the functions used in rainbow.c acconding to the definitions in rainbow_config.h + +#include "blas.h" +#include "blas_comm.h" +#include "parallel_matrix_op.h" +#include "rainbow_config.h" + + +#define gfv_get_ele PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_get_ele +#define gfv_mul_scalar PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_mul_scalar +#define gfv_madd PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16v_madd + +#define gfmat_prod PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16mat_prod +#define gfmat_inv PQCLEAN_RAINBOWICLASSIC_CLEAN_gf16mat_inv + +#define batch_trimat_madd PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_trimat_madd_gf16 +#define batch_trimatTr_madd PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_trimatTr_madd_gf16 +#define batch_2trimat_madd PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_2trimat_madd_gf16 +#define batch_matTr_madd PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_matTr_madd_gf16 +#define batch_bmatTr_madd PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_bmatTr_madd_gf16 +#define batch_mat_madd PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_mat_madd_gf16 + +#define batch_quad_trimat_eval PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_quad_trimat_eval_gf16 +#define batch_quad_recmat_eval PQCLEAN_RAINBOWICLASSIC_CLEAN_batch_quad_recmat_eval_gf16 + + +#endif // _RAINBOW_BLAS_H_ diff --git a/crypto_sign/rainbow/rainbowI-classic/clean/rainbow_config.h b/crypto_sign/rainbow/rainbowI-classic/clean/rainbow_config.h new file mode 100644 index 00000000..a581af34 --- /dev/null +++ b/crypto_sign/rainbow/rainbowI-classic/clean/rainbow_config.h @@ -0,0 +1,49 @@ +#ifndef _H_RAINBOW_CONFIG_H_ +#define _H_RAINBOW_CONFIG_H_ + +/// @file rainbow_config.h +/// @brief Defining the parameters of the Rainbow and the corresponding constants. +/// + +#define _USE_GF16 +#define _GFSIZE 16 +#define _V1 36 +#define _O1 32 +#define _O2 32 +#define _MAX_O 32 +#define _HASH_LEN 32 + + +#define _V2 ((_V1) + (_O1)) + +/// size of N, in # of gf elements. +#define _PUB_N (_V1 + _O1 + _O2) + +/// size of M, in # gf elements. +#define _PUB_M (_O1 + _O2) + +/// size of variables, in # bytes. + +// GF16 +#define _V1_BYTE (_V1 / 2) +#define _V2_BYTE (_V2 / 2) +#define _O1_BYTE (_O1 / 2) +#define _O2_BYTE (_O2 / 2) +#define _MAX_O_BYTE (_MAX_O / 2) +#define _PUB_N_BYTE (_PUB_N / 2) +#define _PUB_M_BYTE (_PUB_M / 2) + + +/// length of seed for public key, in # bytes +#define LEN_PKSEED 32 + +/// length of seed for secret key, in # bytes +#define LEN_SKSEED 32 + +/// length of salt for a signature, in # bytes +#define _SALT_BYTE 16 + +/// length of a signature +#define _SIGNATURE_BYTE (_PUB_N_BYTE + _SALT_BYTE) + +#endif // _H_RAINBOW_CONFIG_H_ diff --git a/crypto_sign/rainbow/rainbowI-classic/clean/rainbow_keypair.c b/crypto_sign/rainbow/rainbowI-classic/clean/rainbow_keypair.c new file mode 100644 index 00000000..39d059e6 --- /dev/null +++ b/crypto_sign/rainbow/rainbowI-classic/clean/rainbow_keypair.c @@ -0,0 +1,155 @@ +/// @file rainbow_keypair.c +/// @brief implementations of functions in rainbow_keypair.h +/// + +#include "rainbow_keypair.h" +#include "blas.h" +#include "blas_comm.h" +#include "rainbow_blas.h" +#include "rainbow_keypair_computation.h" +#include "utils_prng.h" +#include +#include +#include + +static +void generate_S_T( unsigned char *s_and_t, prng_t *prng0 ) { + sk_t *_sk; + unsigned size; + + size = sizeof(_sk->s1); + PQCLEAN_RAINBOWICLASSIC_CLEAN_prng_gen( prng0, s_and_t, size ); + s_and_t += size; + + size = sizeof(_sk->t1); + PQCLEAN_RAINBOWICLASSIC_CLEAN_prng_gen( prng0, s_and_t, size ); + s_and_t += size; + + size = sizeof(_sk->t4); + PQCLEAN_RAINBOWICLASSIC_CLEAN_prng_gen( prng0, s_and_t, size ); + s_and_t += size; + + size = sizeof(_sk->t3); + PQCLEAN_RAINBOWICLASSIC_CLEAN_prng_gen( prng0, s_and_t, size ); +} + + +static +unsigned generate_l1_F12( unsigned char *sk, prng_t *prng0 ) { + unsigned n_byte_generated = 0; + sk_t *_sk; + unsigned size; + + size = sizeof(_sk->l1_F1); + PQCLEAN_RAINBOWICLASSIC_CLEAN_prng_gen( prng0, sk, size ); + sk += size; + n_byte_generated += size; + + size = sizeof(_sk->l1_F2); + PQCLEAN_RAINBOWICLASSIC_CLEAN_prng_gen( prng0, sk, size ); + n_byte_generated += size; + + return n_byte_generated; +} + + +static +unsigned generate_l2_F12356( unsigned char *sk, prng_t *prng0 ) { + unsigned n_byte_generated = 0; + sk_t *_sk; + unsigned size; + + size = sizeof(_sk->l2_F1); + PQCLEAN_RAINBOWICLASSIC_CLEAN_prng_gen( prng0, sk, size ); + sk += size; + n_byte_generated += size; + + size = sizeof(_sk->l2_F2); + PQCLEAN_RAINBOWICLASSIC_CLEAN_prng_gen( prng0, sk, size ); + sk += size; + n_byte_generated += size; + + size = sizeof(_sk->l2_F3); + PQCLEAN_RAINBOWICLASSIC_CLEAN_prng_gen( prng0, sk, size ); + sk += size; + n_byte_generated += size; + + size = sizeof(_sk->l2_F5); + PQCLEAN_RAINBOWICLASSIC_CLEAN_prng_gen( prng0, sk, size ); + sk += size; + n_byte_generated += size; + + size = sizeof(_sk->l2_F6); + PQCLEAN_RAINBOWICLASSIC_CLEAN_prng_gen( prng0, sk, size ); + n_byte_generated += size; + + return n_byte_generated; +} + + +static void generate_B1_B2(unsigned char *sk, prng_t *prng0) { + sk += generate_l1_F12(sk, prng0); + generate_l2_F12356(sk, prng0); +} + +static void calculate_t4(unsigned char *t2_to_t4, const unsigned char *t1, const unsigned char *t3) { + // t4 = T_sk.t1 * T_sk.t3 - T_sk.t2 + unsigned char temp[_V1_BYTE + 32]; + unsigned char *t4 = t2_to_t4; + for (unsigned int i = 0; i < _O2; i++) { /// t3 width + gfmat_prod(temp, t1, _V1_BYTE, _O1, t3); + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_add(t4, temp, _V1_BYTE); + t4 += _V1_BYTE; + t3 += _O1_BYTE; + } +} + +static void obsfucate_l1_polys(unsigned char *l1_polys, const unsigned char *l2_polys, unsigned int n_terms, const unsigned char *s1) { + unsigned char temp[_O1_BYTE + 32]; + while (n_terms--) { + gfmat_prod(temp, s1, _O1_BYTE, _O2, l2_polys); + PQCLEAN_RAINBOWICLASSIC_CLEAN_gf256v_add(l1_polys, temp, _O1_BYTE); + l1_polys += _O1_BYTE; + l2_polys += _O2_BYTE; + } +} + +/////////////////// Classic ////////////////////////////////// + +static void generate_secretkey(sk_t *sk, const unsigned char *sk_seed) { + memcpy(sk->sk_seed, sk_seed, LEN_SKSEED); + + // set up prng + prng_t prng0; + PQCLEAN_RAINBOWICLASSIC_CLEAN_prng_set(&prng0, sk_seed, LEN_SKSEED); + + // generating secret key with prng. + generate_S_T(sk->s1, &prng0); + generate_B1_B2(sk->l1_F1, &prng0); + + // clean prng + memset(&prng0, 0, sizeof(prng_t)); +} + +void PQCLEAN_RAINBOWICLASSIC_CLEAN_generate_keypair(pk_t *rpk, sk_t *sk, const unsigned char *sk_seed) { + generate_secretkey(sk, sk_seed); + + // set up a temporary structure ext_cpk_t for calculating public key. + ext_cpk_t pk; + + PQCLEAN_RAINBOWICLASSIC_CLEAN_calculate_Q_from_F(&pk, sk, sk); // compute the public key in ext_cpk_t format. + calculate_t4(sk->t4, sk->t1, sk->t3); + + obsfucate_l1_polys(pk.l1_Q1, pk.l2_Q1, N_TRIANGLE_TERMS(_V1), sk->s1); + obsfucate_l1_polys(pk.l1_Q2, pk.l2_Q2, _V1 * _O1, sk->s1); + obsfucate_l1_polys(pk.l1_Q3, pk.l2_Q3, _V1 * _O2, sk->s1); + obsfucate_l1_polys(pk.l1_Q5, pk.l2_Q5, N_TRIANGLE_TERMS(_O1), sk->s1); + obsfucate_l1_polys(pk.l1_Q6, pk.l2_Q6, _O1 * _O2, sk->s1); + obsfucate_l1_polys(pk.l1_Q9, pk.l2_Q9, N_TRIANGLE_TERMS(_O2), sk->s1); + // so far, the pk contains the full pk but in ext_cpk_t format. + + PQCLEAN_RAINBOWICLASSIC_CLEAN_extcpk_to_pk(rpk, &pk); // convert the public key from ext_cpk_t to pk_t. +} + + + diff --git a/crypto_sign/rainbow/rainbowI-classic/clean/rainbow_keypair.h b/crypto_sign/rainbow/rainbowI-classic/clean/rainbow_keypair.h new file mode 100644 index 00000000..a47366f5 --- /dev/null +++ b/crypto_sign/rainbow/rainbowI-classic/clean/rainbow_keypair.h @@ -0,0 +1,61 @@ +#ifndef _RAINBOW_KEYPAIR_H_ +#define _RAINBOW_KEYPAIR_H_ +/// @file rainbow_keypair.h +/// @brief Formats of key pairs and functions for generating key pairs. +/// Formats of key pairs and functions for generating key pairs. +/// + +#include "rainbow_config.h" + +#define N_TRIANGLE_TERMS(n_var) ((n_var) * ((n_var) + 1) / 2) + +/// @brief public key for classic rainbow +/// +/// public key for classic rainbow +/// +typedef struct rainbow_publickey { + unsigned char pk[(_PUB_M_BYTE)*N_TRIANGLE_TERMS(_PUB_N)]; +} pk_t; + +/// @brief secret key for classic rainbow +/// +/// secret key for classic rainbow +/// +typedef struct rainbow_secretkey { + /// + /// seed for generating secret key. + /// Generating S, T, and F for classic rainbow. + /// Generating S and T only for cyclic rainbow. + unsigned char sk_seed[LEN_SKSEED]; + + unsigned char s1[_O1_BYTE * _O2]; ///< part of S map + unsigned char t1[_V1_BYTE * _O1]; ///< part of T map + unsigned char t4[_V1_BYTE * _O2]; ///< part of T map + unsigned char t3[_O1_BYTE * _O2]; ///< part of T map + + unsigned char l1_F1[_O1_BYTE * N_TRIANGLE_TERMS(_V1)]; ///< part of C-map, F1, Layer1 + unsigned char l1_F2[_O1_BYTE * _V1 * _O1]; ///< part of C-map, F2, Layer1 + + unsigned char l2_F1[_O2_BYTE * N_TRIANGLE_TERMS(_V1)]; ///< part of C-map, F1, Layer2 + unsigned char l2_F2[_O2_BYTE * _V1 * _O1]; ///< part of C-map, F2, Layer2 + + unsigned char l2_F3[_O2_BYTE * _V1 * _O2]; ///< part of C-map, F3, Layer2 + unsigned char l2_F5[_O2_BYTE * N_TRIANGLE_TERMS(_O1)]; ///< part of C-map, F5, Layer2 + unsigned char l2_F6[_O2_BYTE * _O1 * _O2]; ///< part of C-map, F6, Layer2 +} sk_t; + + +/// +/// @brief Generate key pairs for classic rainbow. +/// +/// @param[out] pk - the public key. +/// @param[out] sk - the secret key. +/// @param[in] sk_seed - seed for generating the secret key. +/// +void PQCLEAN_RAINBOWICLASSIC_CLEAN_generate_keypair(pk_t *pk, sk_t *sk, const unsigned char *sk_seed); + + + + + +#endif // _RAINBOW_KEYPAIR_H_ diff --git a/crypto_sign/rainbow/rainbowI-classic/clean/rainbow_keypair_computation.c b/crypto_sign/rainbow/rainbowI-classic/clean/rainbow_keypair_computation.c new file mode 100644 index 00000000..0eb7290c --- /dev/null +++ b/crypto_sign/rainbow/rainbowI-classic/clean/rainbow_keypair_computation.c @@ -0,0 +1,189 @@ +/// @file rainbow_keypair_computation.c +/// @brief Implementations for functions in rainbow_keypair_computation.h +/// + +#include "rainbow_keypair_computation.h" +#include "blas.h" +#include "blas_comm.h" +#include "rainbow_blas.h" +#include "rainbow_keypair.h" +#include +#include +#include + +void PQCLEAN_RAINBOWICLASSIC_CLEAN_extcpk_to_pk(pk_t *pk, const ext_cpk_t *cpk) { + const unsigned char *idx_l1 = cpk->l1_Q1; + const unsigned char *idx_l2 = cpk->l2_Q1; + for (unsigned int i = 0; i < _V1; i++) { + for (unsigned int j = i; j < _V1; j++) { + unsigned int pub_idx = PQCLEAN_RAINBOWICLASSIC_CLEAN_idx_of_trimat(i, j, _PUB_N); + memcpy(&pk->pk[_PUB_M_BYTE * pub_idx], idx_l1, _O1_BYTE); + memcpy((&pk->pk[_PUB_M_BYTE * pub_idx]) + _O1_BYTE, idx_l2, _O2_BYTE); + idx_l1 += _O1_BYTE; + idx_l2 += _O2_BYTE; + } + } + idx_l1 = cpk->l1_Q2; + idx_l2 = cpk->l2_Q2; + for (unsigned int i = 0; i < _V1; i++) { + for (unsigned int j = _V1; j < _V1 + _O1; j++) { + unsigned int pub_idx = PQCLEAN_RAINBOWICLASSIC_CLEAN_idx_of_trimat(i, j, _PUB_N); + memcpy(&pk->pk[_PUB_M_BYTE * pub_idx], idx_l1, _O1_BYTE); + memcpy((&pk->pk[_PUB_M_BYTE * pub_idx]) + _O1_BYTE, idx_l2, _O2_BYTE); + idx_l1 += _O1_BYTE; + idx_l2 += _O2_BYTE; + } + } + idx_l1 = cpk->l1_Q3; + idx_l2 = cpk->l2_Q3; + for (unsigned int i = 0; i < _V1; i++) { + for (unsigned int j = _V1 + _O1; j < _PUB_N; j++) { + unsigned int pub_idx = PQCLEAN_RAINBOWICLASSIC_CLEAN_idx_of_trimat(i, j, _PUB_N); + memcpy(&pk->pk[_PUB_M_BYTE * pub_idx], idx_l1, _O1_BYTE); + memcpy((&pk->pk[_PUB_M_BYTE * pub_idx]) + _O1_BYTE, idx_l2, _O2_BYTE); + idx_l1 += _O1_BYTE; + idx_l2 += _O2_BYTE; + } + } + idx_l1 = cpk->l1_Q5; + idx_l2 = cpk->l2_Q5; + for (unsigned int i = _V1; i < _V1 + _O1; i++) { + for (unsigned int j = i; j < _V1 + _O1; j++) { + unsigned int pub_idx = PQCLEAN_RAINBOWICLASSIC_CLEAN_idx_of_trimat(i, j, _PUB_N); + memcpy(&pk->pk[_PUB_M_BYTE * pub_idx], idx_l1, _O1_BYTE); + memcpy((&pk->pk[_PUB_M_BYTE * pub_idx]) + _O1_BYTE, idx_l2, _O2_BYTE); + idx_l1 += _O1_BYTE; + idx_l2 += _O2_BYTE; + } + } + idx_l1 = cpk->l1_Q6; + idx_l2 = cpk->l2_Q6; + for (unsigned int i = _V1; i < _V1 + _O1; i++) { + for (unsigned int j = _V1 + _O1; j < _PUB_N; j++) { + unsigned int pub_idx = PQCLEAN_RAINBOWICLASSIC_CLEAN_idx_of_trimat(i, j, _PUB_N); + memcpy(&pk->pk[_PUB_M_BYTE * pub_idx], idx_l1, _O1_BYTE); + memcpy((&pk->pk[_PUB_M_BYTE * pub_idx]) + _O1_BYTE, idx_l2, _O2_BYTE); + idx_l1 += _O1_BYTE; + idx_l2 += _O2_BYTE; + } + } + idx_l1 = cpk->l1_Q9; + idx_l2 = cpk->l2_Q9; + for (unsigned int i = _V1 + _O1; i < _PUB_N; i++) { + for (unsigned int j = i; j < _PUB_N; j++) { + unsigned int pub_idx = PQCLEAN_RAINBOWICLASSIC_CLEAN_idx_of_trimat(i, j, _PUB_N); + memcpy(&pk->pk[_PUB_M_BYTE * pub_idx], idx_l1, _O1_BYTE); + memcpy((&pk->pk[_PUB_M_BYTE * pub_idx]) + _O1_BYTE, idx_l2, _O2_BYTE); + idx_l1 += _O1_BYTE; + idx_l2 += _O2_BYTE; + } + } +} + +static void calculate_Q_from_F_ref(ext_cpk_t *Qs, const sk_t *Fs, const sk_t *Ts) { + /* + Layer 1 + Computing : + Q_pk.l1_F1s[i] = F_sk.l1_F1s[i] + + Q_pk.l1_F2s[i] = (F1* T1 + F2) + F1tr * t1 + Q_pk.l1_F5s[i] = UT( T1tr* (F1 * T1 + F2) ) + */ + const unsigned char *t2 = Ts->t4; + + memcpy(Qs->l1_Q1, Fs->l1_F1, _O1_BYTE * N_TRIANGLE_TERMS(_V1)); + + memcpy(Qs->l1_Q2, Fs->l1_F2, _O1_BYTE * _V1 * _O1); + batch_trimat_madd(Qs->l1_Q2, Fs->l1_F1, Ts->t1, _V1, _V1_BYTE, _O1, _O1_BYTE); // F1*T1 + F2 + + memset(Qs->l1_Q3, 0, _O1_BYTE * _V1 * _O2); + memset(Qs->l1_Q5, 0, _O1_BYTE * N_TRIANGLE_TERMS(_O1)); + memset(Qs->l1_Q6, 0, _O1_BYTE * _O1 * _O2); + memset(Qs->l1_Q9, 0, _O1_BYTE * N_TRIANGLE_TERMS(_O2)); + + // l1_Q5 : _O1_BYTE * _O1 * _O1 + // l1_Q9 : _O1_BYTE * _O2 * _O2 + // l2_Q5 : _O2_BYTE * _V1 * _O1 + // l2_Q9 : _O2_BYTE * _V1 * _O2 + + unsigned char tempQ[_MAX_O_BYTE * _MAX_O * _MAX_O + 32]; + + memset(tempQ, 0, _O1_BYTE * _O1 * _O1); // l1_Q5 + batch_matTr_madd(tempQ, Ts->t1, _V1, _V1_BYTE, _O1, Qs->l1_Q2, _O1, _O1_BYTE); // t1_tr*(F1*T1 + F2) + PQCLEAN_RAINBOWICLASSIC_CLEAN_UpperTrianglize(Qs->l1_Q5, tempQ, _O1, _O1_BYTE); // UT( ... ) // Q5 + + batch_trimatTr_madd(Qs->l1_Q2, Fs->l1_F1, Ts->t1, _V1, _V1_BYTE, _O1, _O1_BYTE); // Q2 + /* + Computing: + F1_T2 = F1 * t2 + F2_T3 = F2 * t3 + F1_F1T_T2 + F2_T3 = F1_T2 + F2_T3 + F1tr * t2 + Q_pk.l1_F3s[i] = F1_F1T_T2 + F2_T3 + Q_pk.l1_F6s[i] = T1tr* ( F1_F1T_T2 + F2_T3 ) + F2tr * t2 + Q_pk.l1_F9s[i] = UT( T2tr* ( F1_T2 + F2_T3 ) ) + */ + batch_trimat_madd(Qs->l1_Q3, Fs->l1_F1, t2, _V1, _V1_BYTE, _O2, _O1_BYTE); // F1*T2 + batch_mat_madd(Qs->l1_Q3, Fs->l1_F2, _V1, Ts->t3, _O1, _O1_BYTE, _O2, _O1_BYTE); // F1_T2 + F2_T3 + + memset(tempQ, 0, _O1_BYTE * _O2 * _O2); // l1_Q9 + batch_matTr_madd(tempQ, t2, _V1, _V1_BYTE, _O2, Qs->l1_Q3, _O2, _O1_BYTE); // T2tr * ( F1_T2 + F2_T3 ) + PQCLEAN_RAINBOWICLASSIC_CLEAN_UpperTrianglize(Qs->l1_Q9, tempQ, _O2, _O1_BYTE); // Q9 + + batch_trimatTr_madd(Qs->l1_Q3, Fs->l1_F1, t2, _V1, _V1_BYTE, _O2, _O1_BYTE); // F1_F1T_T2 + F2_T3 // Q3 + + batch_bmatTr_madd(Qs->l1_Q6, Fs->l1_F2, _O1, t2, _V1, _V1_BYTE, _O2, _O1_BYTE); // F2tr*T2 + batch_matTr_madd(Qs->l1_Q6, Ts->t1, _V1, _V1_BYTE, _O1, Qs->l1_Q3, _O2, _O1_BYTE); // Q6 + + /* + layer 2 + Computing: + Q1 = F1 + Q2 = F1_F1T*T1 + F2 + Q5 = UT( T1tr( F1*T1 + F2 ) + F5 ) + */ + memcpy(Qs->l2_Q1, Fs->l2_F1, _O2_BYTE * N_TRIANGLE_TERMS(_V1)); + + memcpy(Qs->l2_Q2, Fs->l2_F2, _O2_BYTE * _V1 * _O1); + batch_trimat_madd(Qs->l2_Q2, Fs->l2_F1, Ts->t1, _V1, _V1_BYTE, _O1, _O2_BYTE); // F1*T1 + F2 + + memcpy(Qs->l2_Q5, Fs->l2_F5, _O2_BYTE * N_TRIANGLE_TERMS(_O1)); + memset(tempQ, 0, _O2_BYTE * _O1 * _O1); // l2_Q5 + batch_matTr_madd(tempQ, Ts->t1, _V1, _V1_BYTE, _O1, Qs->l2_Q2, _O1, _O2_BYTE); // t1_tr*(F1*T1 + F2) + PQCLEAN_RAINBOWICLASSIC_CLEAN_UpperTrianglize(Qs->l2_Q5, tempQ, _O1, _O2_BYTE); // UT( ... ) // Q5 + + batch_trimatTr_madd(Qs->l2_Q2, Fs->l2_F1, Ts->t1, _V1, _V1_BYTE, _O1, _O2_BYTE); // Q2 + + /* + Computing: + F1_T2 = F1 * t2 + F2_T3 = F2 * t3 + F1_F1T_T2 + F2_T3 = F1_T2 + F2_T3 + F1tr * t2 + + Q3 = F1_F1T*T2 + F2*T3 + F3 + Q9 = UT( T2tr*( F1*T2 + F2*T3 + F3 ) + T3tr*( F5*T3 + F6 ) ) + Q6 = T1tr*( F1_F1T*T2 + F2*T3 + F3 ) + F2Tr*T2 + F5_F5T*T3 + F6 + */ + memcpy(Qs->l2_Q3, Fs->l2_F3, _O2_BYTE * _V1 * _O2); + batch_trimat_madd(Qs->l2_Q3, Fs->l2_F1, t2, _V1, _V1_BYTE, _O2, _O2_BYTE); // F1*T2 + F3 + batch_mat_madd(Qs->l2_Q3, Fs->l2_F2, _V1, Ts->t3, _O1, _O1_BYTE, _O2, _O2_BYTE); // F1_T2 + F2_T3 + F3 + + memset(tempQ, 0, _O2_BYTE * _O2 * _O2); // l2_Q9 + batch_matTr_madd(tempQ, t2, _V1, _V1_BYTE, _O2, Qs->l2_Q3, _O2, _O2_BYTE); // T2tr * ( ..... ) + + memcpy(Qs->l2_Q6, Fs->l2_F6, _O2_BYTE * _O1 * _O2); + + batch_trimat_madd(Qs->l2_Q6, Fs->l2_F5, Ts->t3, _O1, _O1_BYTE, _O2, _O2_BYTE); // F5*T3 + F6 + batch_matTr_madd(tempQ, Ts->t3, _O1, _O1_BYTE, _O2, Qs->l2_Q6, _O2, _O2_BYTE); // T2tr*( ..... ) + T3tr*( ..... ) + memset(Qs->l2_Q9, 0, _O2_BYTE * N_TRIANGLE_TERMS(_O2)); + PQCLEAN_RAINBOWICLASSIC_CLEAN_UpperTrianglize(Qs->l2_Q9, tempQ, _O2, _O2_BYTE); // Q9 + + batch_trimatTr_madd(Qs->l2_Q3, Fs->l2_F1, t2, _V1, _V1_BYTE, _O2, _O2_BYTE); // F1_F1T_T2 + F2_T3 + F3 // Q3 + + batch_bmatTr_madd(Qs->l2_Q6, Fs->l2_F2, _O1, t2, _V1, _V1_BYTE, _O2, _O2_BYTE); // F5*T3 + F6 + F2tr*T2 + batch_trimatTr_madd(Qs->l2_Q6, Fs->l2_F5, Ts->t3, _O1, _O1_BYTE, _O2, _O2_BYTE); // F2tr*T2 + F5_F5T*T3 + F6 + batch_matTr_madd(Qs->l2_Q6, Ts->t1, _V1, _V1_BYTE, _O1, Qs->l2_Q3, _O2, _O2_BYTE); // Q6 +} +#define calculate_Q_from_F_impl calculate_Q_from_F_ref +void PQCLEAN_RAINBOWICLASSIC_CLEAN_calculate_Q_from_F(ext_cpk_t *Qs, const sk_t *Fs, const sk_t *Ts) { + calculate_Q_from_F_impl(Qs, Fs, Ts); +} diff --git a/crypto_sign/rainbow/rainbowI-classic/clean/rainbow_keypair_computation.h b/crypto_sign/rainbow/rainbowI-classic/clean/rainbow_keypair_computation.h new file mode 100644 index 00000000..13a931e8 --- /dev/null +++ b/crypto_sign/rainbow/rainbowI-classic/clean/rainbow_keypair_computation.h @@ -0,0 +1,53 @@ +#ifndef _RAINBOW_KEYPAIR_COMP_H_ +#define _RAINBOW_KEYPAIR_COMP_H_ +/// @file rainbow_keypair_computation.h +/// @brief Functions for calculating pk/sk while generating keys. +/// +/// Defining an internal structure of public key. +/// Functions for calculating pk/sk for key generation. +/// + +#include "rainbow_keypair.h" + +/// @brief The (internal use) public key for rainbow +/// +/// The (internal use) public key for rainbow. The public +/// polynomials are divided into l1_Q1, l1_Q2, ... l1_Q9, +/// l2_Q1, .... , l2_Q9. +/// +typedef struct rainbow_extend_publickey { + unsigned char l1_Q1[_O1_BYTE * N_TRIANGLE_TERMS(_V1)]; + unsigned char l1_Q2[_O1_BYTE * _V1 * _O1]; + unsigned char l1_Q3[_O1_BYTE * _V1 * _O2]; + unsigned char l1_Q5[_O1_BYTE * N_TRIANGLE_TERMS(_O1)]; + unsigned char l1_Q6[_O1_BYTE * _O1 * _O2]; + unsigned char l1_Q9[_O1_BYTE * N_TRIANGLE_TERMS(_O2)]; + + unsigned char l2_Q1[_O2_BYTE * N_TRIANGLE_TERMS(_V1)]; + unsigned char l2_Q2[_O2_BYTE * _V1 * _O1]; + unsigned char l2_Q3[_O2_BYTE * _V1 * _O2]; + unsigned char l2_Q5[_O2_BYTE * N_TRIANGLE_TERMS(_O1)]; + unsigned char l2_Q6[_O2_BYTE * _O1 * _O2]; + unsigned char l2_Q9[_O2_BYTE * N_TRIANGLE_TERMS(_O2)]; +} ext_cpk_t; + +/// +/// @brief converting formats of public keys : from ext_cpk_t version to pk_t +/// +/// @param[out] pk - the classic public key. +/// @param[in] cpk - the internel public key. +/// +void PQCLEAN_RAINBOWICLASSIC_CLEAN_extcpk_to_pk(pk_t *pk, const ext_cpk_t *cpk); +///////////////////////////////////////////////// + +/// +/// @brief Computing public key from secret key +/// +/// @param[out] Qs - the public key +/// @param[in] Fs - parts of the secret key: l1_F1, l1_F2, l2_F1, l2_F2, l2_F3, l2_F5, l2_F6 +/// @param[in] Ts - parts of the secret key: T1, T4, T3 +/// +void PQCLEAN_RAINBOWICLASSIC_CLEAN_calculate_Q_from_F(ext_cpk_t *Qs, const sk_t *Fs, const sk_t *Ts); + + +#endif // _RAINBOW_KEYPAIR_COMP_H_ diff --git a/crypto_sign/rainbow/rainbowI-classic/clean/sign.c b/crypto_sign/rainbow/rainbowI-classic/clean/sign.c new file mode 100644 index 00000000..7ffdca39 --- /dev/null +++ b/crypto_sign/rainbow/rainbowI-classic/clean/sign.c @@ -0,0 +1,74 @@ +/// @file sign.c +/// @brief the implementations for functions in api.h +/// +/// + +#include "api.h" +#include "rainbow.h" +#include "rainbow_config.h" +#include "rainbow_keypair.h" +#include "randombytes.h" +#include "utils_hash.h" +#include +#include + +int PQCLEAN_RAINBOWICLASSIC_CLEAN_crypto_sign_keypair(unsigned char *pk, unsigned char *sk) { + unsigned char sk_seed[LEN_SKSEED] = {0}; + randombytes(sk_seed, LEN_SKSEED); + + PQCLEAN_RAINBOWICLASSIC_CLEAN_generate_keypair((pk_t *)pk, (sk_t *)sk, sk_seed); + return 0; +} + +int PQCLEAN_RAINBOWICLASSIC_CLEAN_crypto_sign(unsigned char *sm, size_t *smlen, const unsigned char *m, size_t mlen, const unsigned char *sk) { + unsigned char digest[_HASH_LEN]; + + PQCLEAN_RAINBOWICLASSIC_CLEAN_hash_msg(digest, _HASH_LEN, m, mlen); + + memcpy(sm, m, mlen); + smlen[0] = mlen + _SIGNATURE_BYTE; + + return PQCLEAN_RAINBOWICLASSIC_CLEAN_rainbow_sign(sm + mlen, (const sk_t *)sk, digest); +} + +int PQCLEAN_RAINBOWICLASSIC_CLEAN_crypto_sign_open(unsigned char *m, size_t *mlen, const unsigned char *sm, size_t smlen, const unsigned char *pk) { + int rc; + if (_SIGNATURE_BYTE > smlen) { + rc = -1; + } else { + *mlen = smlen - _SIGNATURE_BYTE; + + unsigned char digest[_HASH_LEN]; + PQCLEAN_RAINBOWICLASSIC_CLEAN_hash_msg(digest, _HASH_LEN, sm, *mlen); + + rc = PQCLEAN_RAINBOWICLASSIC_CLEAN_rainbow_verify(digest, sm + mlen[0], (const pk_t *)pk); + } + if (!rc) { + memmove(m, sm, smlen - _SIGNATURE_BYTE); + } else { // bad signature + *mlen = (size_t) -1; + memset(m, 0, smlen); + } + return rc; +} + +int PQCLEAN_RAINBOWICLASSIC_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + unsigned char digest[_HASH_LEN]; + + PQCLEAN_RAINBOWICLASSIC_CLEAN_hash_msg(digest, _HASH_LEN, m, mlen); + *siglen = _SIGNATURE_BYTE; + return PQCLEAN_RAINBOWICLASSIC_CLEAN_rainbow_sign(sig, (const sk_t *)sk, digest); +} + +int PQCLEAN_RAINBOWICLASSIC_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + if (siglen != _SIGNATURE_BYTE) { + return -1; + } + unsigned char digest[_HASH_LEN]; + PQCLEAN_RAINBOWICLASSIC_CLEAN_hash_msg(digest, _HASH_LEN, m, mlen); + return PQCLEAN_RAINBOWICLASSIC_CLEAN_rainbow_verify(digest, sig, (const pk_t *)pk); +} diff --git a/crypto_sign/rainbow/rainbowI-classic/clean/utils_hash.c b/crypto_sign/rainbow/rainbowI-classic/clean/utils_hash.c new file mode 100644 index 00000000..eab04da8 --- /dev/null +++ b/crypto_sign/rainbow/rainbowI-classic/clean/utils_hash.c @@ -0,0 +1,50 @@ +/// @file utils_hash.c +/// @brief the adapter for SHA2 families. +/// +/// + +#include "utils_hash.h" +#include "rainbow_config.h" +#include "sha2.h" + +static inline int h(unsigned char *digest, const unsigned char *m, size_t mlen) { + sha256(digest, m, mlen); + return 0; +} + +static inline int expand_hash(unsigned char *digest, size_t n_digest, const unsigned char *hash) { + if (_HASH_LEN >= n_digest) { + for (size_t i = 0; i < n_digest; i++) { + digest[i] = hash[i]; + } + return 0; + } + for (size_t i = 0; i < _HASH_LEN; i++) { + digest[i] = hash[i]; + } + n_digest -= _HASH_LEN; + + while (_HASH_LEN <= n_digest) { + h(digest + _HASH_LEN, digest, _HASH_LEN); + + n_digest -= _HASH_LEN; + digest += _HASH_LEN; + } + unsigned char temp[_HASH_LEN]; + if (n_digest) { + h(temp, digest, _HASH_LEN); + for (size_t i = 0; i < n_digest; i++) { + digest[_HASH_LEN + i] = temp[i]; + } + } + return 0; +} + +int PQCLEAN_RAINBOWICLASSIC_CLEAN_hash_msg(unsigned char *digest, + size_t len_digest, + const unsigned char *m, + size_t mlen) { + unsigned char buf[_HASH_LEN]; + h(buf, m, mlen); + return expand_hash(digest, len_digest, buf); +} diff --git a/crypto_sign/rainbow/rainbowI-classic/clean/utils_hash.h b/crypto_sign/rainbow/rainbowI-classic/clean/utils_hash.h new file mode 100644 index 00000000..5faa067a --- /dev/null +++ b/crypto_sign/rainbow/rainbowI-classic/clean/utils_hash.h @@ -0,0 +1,11 @@ +#ifndef _UTILS_HASH_H_ +#define _UTILS_HASH_H_ +/// @file utils_hash.h +/// @brief the interface for adapting hash functions. +/// + +#include + +int PQCLEAN_RAINBOWICLASSIC_CLEAN_hash_msg(unsigned char *digest, size_t len_digest, const unsigned char *m, size_t mlen); + +#endif // _UTILS_HASH_H_ diff --git a/crypto_sign/rainbow/rainbowI-classic/clean/utils_prng.c b/crypto_sign/rainbow/rainbowI-classic/clean/utils_prng.c new file mode 100644 index 00000000..4d6c9b95 --- /dev/null +++ b/crypto_sign/rainbow/rainbowI-classic/clean/utils_prng.c @@ -0,0 +1,97 @@ +/// @file utils_prng.c +/// @brief The implementation of PRNG related functions. +/// + +#include "utils_prng.h" +#include "aes.h" +#include "randombytes.h" +#include "utils_hash.h" +#include +#include + +static void prng_update(const unsigned char *provided_data, + unsigned char *Key, + unsigned char *V) { + unsigned char temp[48]; + aes256ctx ctx; + aes256_ecb_keyexp(&ctx, Key); + for (int i = 0; i < 3; i++) { + //increment V + for (int j = 15; j >= 0; j--) { + if (V[j] == 0xff) { + V[j] = 0x00; + } else { + V[j]++; + break; + } + } + aes256_ecb(temp + 16 * i, V, 1, &ctx); + } + if (provided_data != NULL) { + for (int i = 0; i < 48; i++) { + temp[i] ^= provided_data[i]; + } + } + aes256_ctx_release(&ctx); + memcpy(Key, temp, 32); + memcpy(V, temp + 32, 16); +} +static void randombytes_init_with_state(prng_t *state, + unsigned char *entropy_input_48bytes) { + memset(state->Key, 0x00, 32); + memset(state->V, 0x00, 16); + prng_update(entropy_input_48bytes, state->Key, state->V); +} + +static int randombytes_with_state(prng_t *state, + unsigned char *x, + size_t xlen) { + + unsigned char block[16]; + int i = 0; + + aes256ctx ctx; + aes256_ecb_keyexp(&ctx, state->Key); + + while (xlen > 0) { + //increment V + for (int j = 15; j >= 0; j--) { + if (state->V[j] == 0xff) { + state->V[j] = 0x00; + } else { + state->V[j]++; + break; + } + } + aes256_ecb(block, state->V, 1, &ctx); + if (xlen > 15) { + memcpy(x + i, block, 16); + i += 16; + xlen -= 16; + } else { + memcpy(x + i, block, xlen); + xlen = 0; + } + } + aes256_ctx_release(&ctx); + prng_update(NULL, state->Key, state->V); + return 0; +} + +int PQCLEAN_RAINBOWICLASSIC_CLEAN_prng_set(prng_t *ctx, const void *prng_seed, unsigned long prng_seedlen) { + unsigned char seed[48]; + if (prng_seedlen >= 48) { + memcpy(seed, prng_seed, 48); + } else { + memcpy(seed, prng_seed, prng_seedlen); + PQCLEAN_RAINBOWICLASSIC_CLEAN_hash_msg(seed + prng_seedlen, 48 - (unsigned)prng_seedlen, (const unsigned char *)prng_seed, prng_seedlen); + } + + randombytes_init_with_state(ctx, seed); + + return 0; +} + +int PQCLEAN_RAINBOWICLASSIC_CLEAN_prng_gen(prng_t *ctx, unsigned char *out, unsigned long outlen) { + return randombytes_with_state(ctx, out, outlen); +} diff --git a/crypto_sign/rainbow/rainbowI-classic/clean/utils_prng.h b/crypto_sign/rainbow/rainbowI-classic/clean/utils_prng.h new file mode 100644 index 00000000..76b869f8 --- /dev/null +++ b/crypto_sign/rainbow/rainbowI-classic/clean/utils_prng.h @@ -0,0 +1,18 @@ +#ifndef _UTILS_PRNG_H_ +#define _UTILS_PRNG_H_ +/// @file utils_prng.h +/// @brief the interface for adapting PRNG functions. +/// +/// + +#include "randombytes.h" + +typedef struct { + unsigned char Key[32]; + unsigned char V[16]; +} prng_t; + +int PQCLEAN_RAINBOWICLASSIC_CLEAN_prng_set(prng_t *ctx, const void *prng_seed, unsigned long prng_seedlen); +int PQCLEAN_RAINBOWICLASSIC_CLEAN_prng_gen(prng_t *ctx, unsigned char *out, unsigned long outlen); + +#endif // _UTILS_PRNG_H_ diff --git a/crypto_sign/rainbow/rainbowIII-classic/META.yml b/crypto_sign/rainbow/rainbowIII-classic/META.yml new file mode 100644 index 00000000..1f6e78f4 --- /dev/null +++ b/crypto_sign/rainbow/rainbowIII-classic/META.yml @@ -0,0 +1,20 @@ +name: "RAINBOW(256,68,32,48) - classic" +type: signature +claimed-nist-level: 3 +length-public-key: 882080 +length-secret-key: 626048 +length-signature: 164 +nistkat-sha256: 1eb9bb6e63cfdbd05a6eaca9989e969fd234b110b67ff7e6373e1af080b35f41 +testvectors-sha256: 79a7c069612ceb35b6d702a20d9cfdffe7923429820bc4019d3ab9b4b23a1b1b +principal-submitters: + - Jintai Ding +auxiliary-submitters: + - Ming-Shing Chen + - Matthias Kannwischer + - Jacques Patarin + - Albrecht Petzoldt + - Dieter Schmidt + - Bo-Yin Yang +implementations: + - name: clean + version: https://github.com/fast-crypto-lab/rainbow-submission-round2/commit/173ada0e077e1b9dbd8e4a78994f87acc0c92263 diff --git a/crypto_sign/rainbow/rainbowIII-classic/clean/LICENSE b/crypto_sign/rainbow/rainbowIII-classic/clean/LICENSE new file mode 100644 index 00000000..cb00a6e3 --- /dev/null +++ b/crypto_sign/rainbow/rainbowIII-classic/clean/LICENSE @@ -0,0 +1,8 @@ +`Software implementation of Rainbow for NIST R2 submission' by Ming-Shing Chen + +To the extent possible under law, the person who associated CC0 with +`Software implementation of Rainbow for NIST R2 submission' has waived all copyright and related or neighboring rights +to `Software implementation of Rainbow for NIST R2 submission'. + +You should have received a copy of the CC0 legalcode along with this +work. If not, see . diff --git a/crypto_sign/rainbow/rainbowIII-classic/clean/Makefile.Microsoft_nmake b/crypto_sign/rainbow/rainbowIII-classic/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..f94fcf58 --- /dev/null +++ b/crypto_sign/rainbow/rainbowIII-classic/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=librainbowIII-classic_clean.lib +OBJECTS = blas_comm.obj parallel_matrix_op.obj rainbow.obj rainbow_keypair.obj rainbow_keypair_computation.obj sign.obj utils_hash.obj utils_prng.obj blas.obj gf.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/rainbow/rainbowIII-classic/clean/api.h b/crypto_sign/rainbow/rainbowIII-classic/clean/api.h new file mode 100644 index 00000000..8b4d1661 --- /dev/null +++ b/crypto_sign/rainbow/rainbowIII-classic/clean/api.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_RAINBOWIIICLASSIC_CLEAN_API_H +#define PQCLEAN_RAINBOWIIICLASSIC_CLEAN_API_H + +#include +#include + +#define PQCLEAN_RAINBOWIIICLASSIC_CLEAN_CRYPTO_SECRETKEYBYTES 626048 +#define PQCLEAN_RAINBOWIIICLASSIC_CLEAN_CRYPTO_PUBLICKEYBYTES 882080 +#define PQCLEAN_RAINBOWIIICLASSIC_CLEAN_CRYPTO_BYTES 164 +#define PQCLEAN_RAINBOWIIICLASSIC_CLEAN_CRYPTO_ALGNAME "RAINBOW(256,68,32,48) - classic" + +int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_crypto_sign_keypair(uint8_t *pk, uint8_t *sk); + + +int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_crypto_sign(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_crypto_sign_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + + +#endif diff --git a/crypto_sign/rainbow/rainbowIII-classic/clean/blas.c b/crypto_sign/rainbow/rainbowIII-classic/clean/blas.c new file mode 100644 index 00000000..aaaefdf5 --- /dev/null +++ b/crypto_sign/rainbow/rainbowIII-classic/clean/blas.c @@ -0,0 +1,31 @@ +#include "blas.h" +#include "gf.h" + +#include + +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_predicated_add(uint8_t *accu_b, uint8_t predicate, const uint8_t *a, size_t _num_byte) { + uint8_t pr_u8 = (uint8_t) ((uint8_t) 0 - predicate); + for (size_t i = 0; i < _num_byte; i++) { + accu_b[i] ^= (a[i] & pr_u8); + } +} + +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_add(uint8_t *accu_b, const uint8_t *a, size_t _num_byte) { + for (size_t i = 0; i < _num_byte; i++) { + accu_b[i] ^= a[i]; + } +} + + +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_mul_scalar(uint8_t *a, uint8_t b, size_t _num_byte) { + for (size_t i = 0; i < _num_byte; i++) { + a[i] = PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256_mul(a[i], b); + } +} + +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_madd(uint8_t *accu_c, const uint8_t *a, uint8_t gf256_b, size_t _num_byte) { + for (size_t i = 0; i < _num_byte; i++) { + accu_c[i] ^= PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256_mul(a[i], gf256_b); + } +} + diff --git a/crypto_sign/rainbow/rainbowIII-classic/clean/blas.h b/crypto_sign/rainbow/rainbowIII-classic/clean/blas.h new file mode 100644 index 00000000..035cec12 --- /dev/null +++ b/crypto_sign/rainbow/rainbowIII-classic/clean/blas.h @@ -0,0 +1,19 @@ +#ifndef _BLAS_H_ +#define _BLAS_H_ +/// @file blas.h +/// @brief Functions for implementing basic linear algebra functions. +/// + +#include "rainbow_config.h" +#include +#include + +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_predicated_add(uint8_t *accu_b, uint8_t predicate, const uint8_t *a, size_t _num_byte); +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_add(uint8_t *accu_b, const uint8_t *a, size_t _num_byte); + + +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_mul_scalar(uint8_t *a, uint8_t b, size_t _num_byte); +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_madd(uint8_t *accu_c, const uint8_t *a, uint8_t gf256_b, size_t _num_byte); + + +#endif // _BLAS_H_ diff --git a/crypto_sign/rainbow/rainbowIII-classic/clean/blas_comm.c b/crypto_sign/rainbow/rainbowIII-classic/clean/blas_comm.c new file mode 100644 index 00000000..77aedcd5 --- /dev/null +++ b/crypto_sign/rainbow/rainbowIII-classic/clean/blas_comm.c @@ -0,0 +1,144 @@ +/// @file blas_comm.c +/// @brief The standard implementations for blas_comm.h +/// + +#include "blas_comm.h" +#include "blas.h" +#include "gf.h" +#include "rainbow_config.h" + +#include +#include + +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_set_zero(uint8_t *b, unsigned int _num_byte) { + for (size_t i = 0; i < _num_byte; i++) { + b[i] = 0; + } +} +/// @brief get an element from GF(256) vector . +/// +/// @param[in] a - the input vector a. +/// @param[in] i - the index in the vector a. +/// @return the value of the element. +/// +uint8_t PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_get_ele(const uint8_t *a, unsigned int i) { + return a[i]; +} + +unsigned int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_is_zero(const uint8_t *a, unsigned int _num_byte) { + uint8_t r = 0; + while (_num_byte--) { + r |= a[0]; + a++; + } + return (0 == r); +} + +/// polynomial multplication +/// School boook +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_polymul(uint8_t *c, const uint8_t *a, const uint8_t *b, unsigned int _num) { + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_set_zero(c, _num * 2 - 1); + for (unsigned int i = 0; i < _num; i++) { + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_madd(c + i, a, b[i], _num); + } +} + +static void gf256mat_prod_ref(uint8_t *c, const uint8_t *matA, unsigned int n_A_vec_byte, unsigned int n_A_width, const uint8_t *b) { + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_set_zero(c, n_A_vec_byte); + for (unsigned int i = 0; i < n_A_width; i++) { + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_madd(c, matA, b[i], n_A_vec_byte); + matA += n_A_vec_byte; + } +} + +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256mat_mul(uint8_t *c, const uint8_t *a, const uint8_t *b, unsigned int len_vec) { + unsigned int n_vec_byte = len_vec; + for (unsigned int k = 0; k < len_vec; k++) { + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_set_zero(c, n_vec_byte); + const uint8_t *bk = b + n_vec_byte * k; + for (unsigned int i = 0; i < len_vec; i++) { + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_madd(c, a + n_vec_byte * i, bk[i], n_vec_byte); + } + c += n_vec_byte; + } +} + +static unsigned int gf256mat_gauss_elim_ref(uint8_t *mat, unsigned int h, unsigned int w) { + unsigned int r8 = 1; + + for (unsigned int i = 0; i < h; i++) { + uint8_t *ai = mat + w * i; + unsigned int skip_len_align4 = i & ((unsigned int)~0x3); + + for (unsigned int j = i + 1; j < h; j++) { + uint8_t *aj = mat + w * j; + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_predicated_add(ai + skip_len_align4, 1 ^ PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256_is_nonzero(ai[i]), aj + skip_len_align4, w - skip_len_align4); + } + r8 &= PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256_is_nonzero(ai[i]); + uint8_t pivot = ai[i]; + pivot = PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256_inv(pivot); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_mul_scalar(ai + skip_len_align4, pivot, w - skip_len_align4); + for (unsigned int j = 0; j < h; j++) { + if (i == j) { + continue; + } + uint8_t *aj = mat + w * j; + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_madd(aj + skip_len_align4, ai + skip_len_align4, aj[i], w - skip_len_align4); + } + } + + return r8; +} + +static unsigned int gf256mat_solve_linear_eq_ref(uint8_t *sol, const uint8_t *inp_mat, const uint8_t *c_terms, unsigned int n) { + uint8_t mat[64 * 64]; + for (unsigned int i = 0; i < n; i++) { + memcpy(mat + i * (n + 1), inp_mat + i * n, n); + mat[i * (n + 1) + n] = c_terms[i]; + } + unsigned int r8 = PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256mat_gauss_elim(mat, n, n + 1); + for (unsigned int i = 0; i < n; i++) { + sol[i] = mat[i * (n + 1) + n]; + } + return r8; +} + +static inline void gf256mat_submat(uint8_t *mat2, unsigned int w2, unsigned int st, const uint8_t *mat, unsigned int w, unsigned int h) { + for (unsigned int i = 0; i < h; i++) { + for (unsigned int j = 0; j < w2; j++) { + mat2[i * w2 + j] = mat[i * w + st + j]; + } + } +} + +unsigned int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256mat_inv(uint8_t *inv_a, const uint8_t *a, unsigned int H, uint8_t *buffer) { + uint8_t *aa = buffer; + for (unsigned int i = 0; i < H; i++) { + uint8_t *ai = aa + i * 2 * H; + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_set_zero(ai, 2 * H); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_add(ai, a + i * H, H); + ai[H + i] = 1; + } + unsigned int r8 = PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256mat_gauss_elim(aa, H, 2 * H); + gf256mat_submat(inv_a, H, H, aa, 2 * H, H); + return r8; +} + + +// choosing the implementations depends on the macros _BLAS_AVX2_ and _BLAS_SSE + +#define gf256mat_prod_impl gf256mat_prod_ref +#define gf256mat_gauss_elim_impl gf256mat_gauss_elim_ref +#define gf256mat_solve_linear_eq_impl gf256mat_solve_linear_eq_ref +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256mat_prod(uint8_t *c, const uint8_t *matA, unsigned int n_A_vec_byte, unsigned int n_A_width, const uint8_t *b) { + gf256mat_prod_impl(c, matA, n_A_vec_byte, n_A_width, b); +} + +unsigned int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256mat_gauss_elim(uint8_t *mat, unsigned int h, unsigned int w) { + return gf256mat_gauss_elim_impl(mat, h, w); +} + +unsigned int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256mat_solve_linear_eq(uint8_t *sol, const uint8_t *inp_mat, const uint8_t *c_terms, unsigned int n) { + return gf256mat_solve_linear_eq_impl(sol, inp_mat, c_terms, n); +} + diff --git a/crypto_sign/rainbow/rainbowIII-classic/clean/blas_comm.h b/crypto_sign/rainbow/rainbowIII-classic/clean/blas_comm.h new file mode 100644 index 00000000..c21256e7 --- /dev/null +++ b/crypto_sign/rainbow/rainbowIII-classic/clean/blas_comm.h @@ -0,0 +1,90 @@ +#ifndef _BLAS_COMM_H_ +#define _BLAS_COMM_H_ +/// @file blas_comm.h +/// @brief Common functions for linear algebra. +/// + +#include "rainbow_config.h" +#include + +/// @brief set a vector to 0. +/// +/// @param[in,out] b - the vector b. +/// @param[in] _num_byte - number of bytes for the vector b. +/// +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_set_zero(uint8_t *b, unsigned int _num_byte); + +/// @brief get an element from GF(256) vector . +/// +/// @param[in] a - the input vector a. +/// @param[in] i - the index in the vector a. +/// @return the value of the element. +/// +uint8_t PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_get_ele(const uint8_t *a, unsigned int i); + +/// @brief check if a vector is 0. +/// +/// @param[in] a - the vector a. +/// @param[in] _num_byte - number of bytes for the vector a. +/// @return 1(true) if a is 0. 0(false) else. +/// +unsigned int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_is_zero(const uint8_t *a, unsigned int _num_byte); + +/// @brief polynomial multiplication: c = a*b +/// +/// @param[out] c - the output polynomial c +/// @param[in] a - the vector a. +/// @param[in] b - the vector b. +/// @param[in] _num - number of elements for the polynomials a and b. +/// +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_polymul(uint8_t *c, const uint8_t *a, const uint8_t *b, unsigned int _num); + +/// @brief matrix-vector multiplication: c = matA * b , in GF(256) +/// +/// @param[out] c - the output vector c +/// @param[in] matA - a column-major matrix A. +/// @param[in] n_A_vec_byte - the size of column vectors in bytes. +/// @param[in] n_A_width - the width of matrix A. +/// @param[in] b - the vector b. +/// +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256mat_prod(uint8_t *c, const uint8_t *matA, unsigned int n_A_vec_byte, unsigned int n_A_width, const uint8_t *b); + +/// @brief matrix-matrix multiplication: c = a * b , in GF(256) +/// +/// @param[out] c - the output matrix c +/// @param[in] c - a matrix a. +/// @param[in] b - a matrix b. +/// @param[in] len_vec - the length of column vectors. +/// +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256mat_mul(uint8_t *c, const uint8_t *a, const uint8_t *b, unsigned int len_vec); + +/// @brief Gauss elimination for a matrix, in GF(256) +/// +/// @param[in,out] mat - the matrix. +/// @param[in] h - the height of the matrix. +/// @param[in] w - the width of the matrix. +/// @return 1(true) if success. 0(false) if the matrix is singular. +/// +unsigned int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256mat_gauss_elim(uint8_t *mat, unsigned int h, unsigned int w); + +/// @brief Solving linear equations, in GF(256) +/// +/// @param[out] sol - the solutions. +/// @param[in] inp_mat - the matrix parts of input equations. +/// @param[in] c_terms - the constant terms of the input equations. +/// @param[in] n - the number of equations. +/// @return 1(true) if success. 0(false) if the matrix is singular. +/// +unsigned int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256mat_solve_linear_eq(uint8_t *sol, const uint8_t *inp_mat, const uint8_t *c_terms, unsigned int n); + +/// @brief Computing the inverse matrix, in GF(256) +/// +/// @param[out] inv_a - the output of matrix a. +/// @param[in] a - a matrix a. +/// @param[in] H - height of matrix a, i.e., matrix a is an HxH matrix. +/// @param[in] buffer - The buffer for computations. it has to be as large as 2 input matrixes. +/// @return 1(true) if success. 0(false) if the matrix is singular. +/// +unsigned int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256mat_inv(uint8_t *inv_a, const uint8_t *a, unsigned int H, uint8_t *buffer); + +#endif // _BLAS_COMM_H_ diff --git a/crypto_sign/rainbow/rainbowIII-classic/clean/gf.c b/crypto_sign/rainbow/rainbowIII-classic/clean/gf.c new file mode 100644 index 00000000..b6043f9a --- /dev/null +++ b/crypto_sign/rainbow/rainbowIII-classic/clean/gf.c @@ -0,0 +1,91 @@ +#include "gf.h" + +//// gf4 := gf2[x]/x^2+x+1 +static inline uint8_t gf4_mul_2(uint8_t a) { + uint8_t r = (uint8_t)(a << 1); + r ^= (uint8_t)((a >> 1) * 7); + return r; +} + +static inline uint8_t gf4_mul(uint8_t a, uint8_t b) { + uint8_t r = (uint8_t)(a * (b & 1)); + return r ^ (uint8_t)(gf4_mul_2(a) * (b >> 1)); +} + +static inline uint8_t gf4_squ(uint8_t a) { + return a ^ (a >> 1); +} + +//// gf16 := gf4[y]/y^2+y+x +uint8_t PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf16_mul(uint8_t a, uint8_t b) { + uint8_t a0 = a & 3; + uint8_t a1 = (a >> 2); + uint8_t b0 = b & 3; + uint8_t b1 = (b >> 2); + uint8_t a0b0 = gf4_mul(a0, b0); + uint8_t a1b1 = gf4_mul(a1, b1); + uint8_t a0b1_a1b0 = gf4_mul(a0 ^ a1, b0 ^ b1) ^ a0b0 ^ a1b1; + uint8_t a1b1_x2 = gf4_mul_2(a1b1); + return (uint8_t)((a0b1_a1b0 ^ a1b1) << 2 ^ a0b0 ^ a1b1_x2); +} + +static inline uint8_t gf16_squ(uint8_t a) { + uint8_t a0 = a & 3; + uint8_t a1 = (a >> 2); + a1 = gf4_squ(a1); + uint8_t a1squ_x2 = gf4_mul_2(a1); + return (uint8_t)((a1 << 2) ^ a1squ_x2 ^ gf4_squ(a0)); +} + +uint8_t PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256_is_nonzero(uint8_t a) { + unsigned int a8 = a; + unsigned int r = ((unsigned int)0) - a8; + r >>= 8; + return r & 1; +} + +static inline uint8_t gf4_mul_3(uint8_t a) { + uint8_t msk = (uint8_t)((a - 2) >> 1); + return (uint8_t)((msk & ((int)a * 3)) | ((~msk) & ((int)a - 1))); +} +static inline uint8_t gf16_mul_8(uint8_t a) { + uint8_t a0 = a & 3; + uint8_t a1 = a >> 2; + return (uint8_t)((gf4_mul_2(a0 ^ a1) << 2) | gf4_mul_3(a1)); +} + +// gf256 := gf16[X]/X^2+X+xy +uint8_t PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256_mul(uint8_t a, uint8_t b) { + uint8_t a0 = a & 15; + uint8_t a1 = (a >> 4); + uint8_t b0 = b & 15; + uint8_t b1 = (b >> 4); + uint8_t a0b0 = PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf16_mul(a0, b0); + uint8_t a1b1 = PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf16_mul(a1, b1); + uint8_t a0b1_a1b0 = PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf16_mul(a0 ^ a1, b0 ^ b1) ^ a0b0 ^ a1b1; + uint8_t a1b1_x8 = gf16_mul_8(a1b1); + return (uint8_t)((a0b1_a1b0 ^ a1b1) << 4 ^ a0b0 ^ a1b1_x8); +} + +static inline uint8_t gf256_squ(uint8_t a) { + uint8_t a0 = a & 15; + uint8_t a1 = (a >> 4); + a1 = gf16_squ(a1); + uint8_t a1squ_x8 = gf16_mul_8(a1); + return (uint8_t)((a1 << 4) ^ a1squ_x8 ^ gf16_squ(a0)); +} + +uint8_t PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256_inv(uint8_t a) { + // 128+64+32+16+8+4+2 = 254 + uint8_t a2 = gf256_squ(a); + uint8_t a4 = gf256_squ(a2); + uint8_t a8 = gf256_squ(a4); + uint8_t a4_2 = PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256_mul(a4, a2); + uint8_t a8_4_2 = PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256_mul(a4_2, a8); + uint8_t a64_ = gf256_squ(a8_4_2); + a64_ = gf256_squ(a64_); + a64_ = gf256_squ(a64_); + uint8_t a64_2 = PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256_mul(a64_, a8_4_2); + uint8_t a128_ = gf256_squ(a64_2); + return PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256_mul(a2, a128_); +} diff --git a/crypto_sign/rainbow/rainbowIII-classic/clean/gf.h b/crypto_sign/rainbow/rainbowIII-classic/clean/gf.h new file mode 100644 index 00000000..021c161d --- /dev/null +++ b/crypto_sign/rainbow/rainbowIII-classic/clean/gf.h @@ -0,0 +1,19 @@ +#ifndef _GF16_H_ +#define _GF16_H_ + +#include "rainbow_config.h" +#include + +/// @file gf16.h +/// @brief Library for arithmetics in GF(16) and GF(256) +/// + +uint8_t PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf16_mul(uint8_t a, uint8_t b); + + +uint8_t PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256_is_nonzero(uint8_t a); +uint8_t PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256_inv(uint8_t a); +uint8_t PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256_mul(uint8_t a, uint8_t b); + + +#endif // _GF16_H_ diff --git a/crypto_sign/rainbow/rainbowIII-classic/clean/parallel_matrix_op.c b/crypto_sign/rainbow/rainbowIII-classic/clean/parallel_matrix_op.c new file mode 100644 index 00000000..dae3807d --- /dev/null +++ b/crypto_sign/rainbow/rainbowIII-classic/clean/parallel_matrix_op.c @@ -0,0 +1,183 @@ +/// @file parallel_matrix_op.c +/// @brief the standard implementations for functions in parallel_matrix_op.h +/// +/// the standard implementations for functions in parallel_matrix_op.h +/// + +#include "parallel_matrix_op.h" +#include "blas.h" +#include "blas_comm.h" + +/// +/// @brief Calculate the corresponding index in an array for an upper-triangle(UT) matrix. +/// +/// @param[in] i_row - the i-th row in an upper-triangle matrix. +/// @param[in] j_col - the j-th column in an upper-triangle matrix. +/// @param[in] dim - the dimension of the upper-triangle matrix, i.e., an dim x dim matrix. +/// @return the corresponding index in an array storage. +/// +unsigned int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_idx_of_trimat(unsigned int i_row, unsigned int j_col, unsigned int dim) { + return (dim + dim - i_row + 1) * i_row / 2 + j_col - i_row; +} + +/// +/// @brief Calculate the corresponding index in an array for an upper-triangle or lower-triangle matrix. +/// +/// @param[in] i_row - the i-th row in a triangle matrix. +/// @param[in] j_col - the j-th column in a triangle matrix. +/// @param[in] dim - the dimension of the triangle matrix, i.e., an dim x dim matrix. +/// @return the corresponding index in an array storage. +/// +static inline unsigned int idx_of_2trimat(unsigned int i_row, unsigned int j_col, unsigned int n_var) { + if (i_row > j_col) { + return PQCLEAN_RAINBOWIIICLASSIC_CLEAN_idx_of_trimat(j_col, i_row, n_var); + } + return PQCLEAN_RAINBOWIIICLASSIC_CLEAN_idx_of_trimat(i_row, j_col, n_var); +} + +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_UpperTrianglize(unsigned char *btriC, const unsigned char *bA, unsigned int Awidth, unsigned int size_batch) { + unsigned char *runningC = btriC; + unsigned int Aheight = Awidth; + for (unsigned int i = 0; i < Aheight; i++) { + for (unsigned int j = 0; j < i; j++) { + unsigned int idx = PQCLEAN_RAINBOWIIICLASSIC_CLEAN_idx_of_trimat(j, i, Aheight); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_add(btriC + idx * size_batch, bA + size_batch * (i * Awidth + j), size_batch); + } + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_add(runningC, bA + size_batch * (i * Awidth + i), size_batch * (Aheight - i)); + runningC += size_batch * (Aheight - i); + } +} + +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_trimat_madd_gf256(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch) { + unsigned int Awidth = Bheight; + unsigned int Aheight = Awidth; + for (unsigned int i = 0; i < Aheight; i++) { + for (unsigned int j = 0; j < Bwidth; j++) { + for (unsigned int k = 0; k < Bheight; k++) { + if (k < i) { + continue; + } + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_madd(bC, &btriA[(k - i) * size_batch], PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_get_ele(&B[j * size_Bcolvec], k), size_batch); + } + bC += size_batch; + } + btriA += (Aheight - i) * size_batch; + } +} + +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_trimatTr_madd_gf256(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch) { + unsigned int Aheight = Bheight; + for (unsigned int i = 0; i < Aheight; i++) { + for (unsigned int j = 0; j < Bwidth; j++) { + for (unsigned int k = 0; k < Bheight; k++) { + if (i < k) { + continue; + } + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_madd(bC, &btriA[size_batch * (PQCLEAN_RAINBOWIIICLASSIC_CLEAN_idx_of_trimat(k, i, Aheight))], PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_get_ele(&B[j * size_Bcolvec], k), size_batch); + } + bC += size_batch; + } + } +} + +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_2trimat_madd_gf256(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch) { + unsigned int Aheight = Bheight; + for (unsigned int i = 0; i < Aheight; i++) { + for (unsigned int j = 0; j < Bwidth; j++) { + for (unsigned int k = 0; k < Bheight; k++) { + if (i == k) { + continue; + } + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_madd(bC, &btriA[size_batch * (idx_of_2trimat(i, k, Aheight))], PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_get_ele(&B[j * size_Bcolvec], k), size_batch); + } + bC += size_batch; + } + } +} + +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_matTr_madd_gf256(unsigned char *bC, const unsigned char *A_to_tr, unsigned int Aheight, unsigned int size_Acolvec, unsigned int Awidth, + const unsigned char *bB, unsigned int Bwidth, unsigned int size_batch) { + unsigned int Atr_height = Awidth; + unsigned int Atr_width = Aheight; + for (unsigned int i = 0; i < Atr_height; i++) { + for (unsigned int j = 0; j < Atr_width; j++) { + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_madd(bC, &bB[j * Bwidth * size_batch], PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_get_ele(&A_to_tr[size_Acolvec * i], j), size_batch * Bwidth); + } + bC += size_batch * Bwidth; + } +} + +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_bmatTr_madd_gf256(unsigned char *bC, const unsigned char *bA_to_tr, unsigned int Awidth_before_tr, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch) { + const unsigned char *bA = bA_to_tr; + unsigned int Aheight = Awidth_before_tr; + for (unsigned int i = 0; i < Aheight; i++) { + for (unsigned int j = 0; j < Bwidth; j++) { + for (unsigned int k = 0; k < Bheight; k++) { + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_madd(bC, &bA[size_batch * (i + k * Aheight)], PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_get_ele(&B[j * size_Bcolvec], k), size_batch); + } + bC += size_batch; + } + } +} + +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_mat_madd_gf256(unsigned char *bC, const unsigned char *bA, unsigned int Aheight, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch) { + unsigned int Awidth = Bheight; + for (unsigned int i = 0; i < Aheight; i++) { + for (unsigned int j = 0; j < Bwidth; j++) { + for (unsigned int k = 0; k < Bheight; k++) { + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_madd(bC, &bA[k * size_batch], PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_get_ele(&B[j * size_Bcolvec], k), size_batch); + } + bC += size_batch; + } + bA += (Awidth) * size_batch; + } +} + +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_quad_trimat_eval_gf256(unsigned char *y, const unsigned char *trimat, const unsigned char *x, unsigned int dim, unsigned int size_batch) { + unsigned char tmp[256]; + + unsigned char _x[256]; + for (unsigned int i = 0; i < dim; i++) { + _x[i] = PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_get_ele(x, i); + } + + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_set_zero(y, size_batch); + for (unsigned int i = 0; i < dim; i++) { + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_set_zero(tmp, size_batch); + for (unsigned int j = i; j < dim; j++) { + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_madd(tmp, trimat, _x[j], size_batch); + trimat += size_batch; + } + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_madd(y, tmp, _x[i], size_batch); + } +} + +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_quad_recmat_eval_gf256(unsigned char *z, const unsigned char *y, unsigned int dim_y, const unsigned char *mat, + const unsigned char *x, unsigned dim_x, unsigned size_batch) { + unsigned char tmp[128]; + + unsigned char _x[128]; + for (unsigned int i = 0; i < dim_x; i++) { + _x[i] = PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_get_ele(x, i); + } + unsigned char _y[128]; + for (unsigned int i = 0; i < dim_y; i++) { + _y[i] = PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_get_ele(y, i); + } + + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_set_zero(z, size_batch); + for (unsigned int i = 0; i < dim_y; i++) { + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_set_zero(tmp, size_batch); + for (unsigned int j = 0; j < dim_x; j++) { + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_madd(tmp, mat, _x[j], size_batch); + mat += size_batch; + } + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_madd(z, tmp, _y[i], size_batch); + } +} + diff --git a/crypto_sign/rainbow/rainbowIII-classic/clean/parallel_matrix_op.h b/crypto_sign/rainbow/rainbowIII-classic/clean/parallel_matrix_op.h new file mode 100644 index 00000000..96a874c7 --- /dev/null +++ b/crypto_sign/rainbow/rainbowIII-classic/clean/parallel_matrix_op.h @@ -0,0 +1,260 @@ +#ifndef _P_MATRIX_OP_H_ +#define _P_MATRIX_OP_H_ +/// @file parallel_matrix_op.h +/// @brief Librarys for operations of batched matrixes. +/// +/// + +//////////////// Section: triangle matrix <-> rectangle matrix /////////////////////////////////// + +/// +/// @brief Calculate the corresponding index in an array for an upper-triangle(UT) matrix. +/// +/// @param[in] i_row - the i-th row in an upper-triangle matrix. +/// @param[in] j_col - the j-th column in an upper-triangle matrix. +/// @param[in] dim - the dimension of the upper-triangle matrix, i.e., an dim x dim matrix. +/// @return the corresponding index in an array storage. +/// +unsigned int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_idx_of_trimat(unsigned int i_row, unsigned int j_col, unsigned int dim); + +/// +/// @brief Upper trianglize a rectangle matrix to the corresponding upper-trangle matrix. +/// +/// @param[out] btriC - the batched upper-trianglized matrix C. +/// @param[in] bA - a batched retangle matrix A. +/// @param[in] bwidth - the width of the batched matrix A, i.e., A is a Awidth x Awidth matrix. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_UpperTrianglize(unsigned char *btriC, const unsigned char *bA, unsigned int Awidth, unsigned int size_batch); + +//////////////////// Section: matrix multiplications /////////////////////////////// + +/// +/// @brief bC += btriA * B , in GF(16) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] btriA - a batched UT matrix A. +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_trimat_madd_gf16(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += btriA * B , in GF(256) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] btriA - a batched UT matrix A. +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_trimat_madd_gf256(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += btriA^Tr * B , in GF(16) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] btriA - a batched UT matrix A. A will be transposed while multiplying. +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_trimatTr_madd_gf16(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += btriA^Tr * B , in GF(256) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] btriA - a batched UT matrix A, which will be transposed while multiplying. +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_trimatTr_madd_gf256(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += (btriA + btriA^Tr) *B , in GF(16) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] btriA - a batched UT matrix A. The operand for multiplication is (btriA + btriA^Tr). +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_2trimat_madd_gf16(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += (btriA + btriA^Tr) *B , in GF(256) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] btriA - a batched UT matrix A. The operand for multiplication is (btriA + btriA^Tr). +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_2trimat_madd_gf256(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += A^Tr * bB , in GF(16) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] A_to_tr - a column-major matrix A. The operand for multiplication is A^Tr. +/// @param[in] Aheight - the height of A. +/// @param[in] size_Acolvec - the size of a column vector in A. +/// @param[in] Awidth - the width of A. +/// @param[in] bB - a batched matrix B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_matTr_madd_gf16(unsigned char *bC, + const unsigned char *A_to_tr, unsigned int Aheight, unsigned int size_Acolvec, unsigned int Awidth, + const unsigned char *bB, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += A^Tr * bB , in GF(256) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] A_to_tr - a column-major matrix A. The operand for multiplication is A^Tr. +/// @param[in] Aheight - the height of A. +/// @param[in] size_Acolvec - the size of a column vector in A. +/// @param[in] Awidth - the width of A. +/// @param[in] bB - a batched matrix B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_matTr_madd_gf256(unsigned char *bC, + const unsigned char *A_to_tr, unsigned int Aheight, unsigned int size_Acolvec, unsigned int Awidth, + const unsigned char *bB, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += bA^Tr * B , in GF(16) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] bA_to_tr - a batched matrix A. The operand for multiplication is (bA^Tr). +/// @param[in] Awidth_befor_tr - the width of A. +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_bmatTr_madd_gf16(unsigned char *bC, const unsigned char *bA_to_tr, unsigned int Awidth_before_tr, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += bA^Tr * B , in GF(256) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] bA_to_tr - a batched matrix A. The operand for multiplication is (bA^Tr). +/// @param[in] Awidth_befor_tr - the width of A. +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_bmatTr_madd_gf256(unsigned char *bC, const unsigned char *bA_to_tr, unsigned int Awidth_before_tr, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += bA * B , in GF(16) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] bA - a batched matrix A. +/// @param[in] Aheigh - the height of A. +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_mat_madd_gf16(unsigned char *bC, const unsigned char *bA, unsigned int Aheight, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += bA * B , in GF(256) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] bA - a batched matrix A. +/// @param[in] Aheigh - the height of A. +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_mat_madd_gf256(unsigned char *bC, const unsigned char *bA, unsigned int Aheight, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +//////////////////// Section: "quadratric" matrix evaluation /////////////////////////////// + +/// +/// @brief y = x^Tr * trimat * x , in GF(16) +/// +/// @param[out] y - the returned batched element y. +/// @param[in] trimat - a batched matrix. +/// @param[in] x - an input vector x. +/// @param[in] dim - the dimension of matrix trimat (and x). +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_quad_trimat_eval_gf16(unsigned char *y, const unsigned char *trimat, const unsigned char *x, unsigned int dim, unsigned int size_batch); + +/// +/// @brief y = x^Tr * trimat * x , in GF(256) +/// +/// @param[out] y - the returned batched element y. +/// @param[in] trimat - a batched matrix. +/// @param[in] x - an input vector x. +/// @param[in] dim - the dimension of matrix trimat (and x). +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_quad_trimat_eval_gf256(unsigned char *y, const unsigned char *trimat, const unsigned char *x, unsigned int dim, unsigned int size_batch); + +/// +/// @brief z = y^Tr * mat * x , in GF(16) +/// +/// @param[out] z - the returned batched element z. +/// @param[in] y - an input vector y. +/// @param[in] dim_y - the length of y. +/// @param[in] mat - a batched matrix. +/// @param[in] x - an input vector x. +/// @param[in] dim_x - the length of x. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_quad_recmat_eval_gf16(unsigned char *z, const unsigned char *y, unsigned int dim_y, + const unsigned char *mat, const unsigned char *x, unsigned int dim_x, unsigned int size_batch); + +/// +/// @brief z = y^Tr * mat * x , in GF(256) +/// +/// @param[out] z - the returned batched element z. +/// @param[in] y - an input vector y. +/// @param[in] dim_y - the length of y. +/// @param[in] mat - a batched matrix. +/// @param[in] x - an input vector x. +/// @param[in] dim_x - the length of x. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_quad_recmat_eval_gf256(unsigned char *z, const unsigned char *y, unsigned int dim_y, + const unsigned char *mat, const unsigned char *x, unsigned int dim_x, unsigned int size_batch); + +#endif // _P_MATRIX_OP_H_ diff --git a/crypto_sign/rainbow/rainbowIII-classic/clean/rainbow.c b/crypto_sign/rainbow/rainbowIII-classic/clean/rainbow.c new file mode 100644 index 00000000..3d93036f --- /dev/null +++ b/crypto_sign/rainbow/rainbowIII-classic/clean/rainbow.c @@ -0,0 +1,168 @@ +/// @file rainbow.c +/// @brief The standard implementations for functions in rainbow.h +/// + +#include "blas.h" +#include "parallel_matrix_op.h" +#include "rainbow.h" +#include "rainbow_blas.h" +#include "rainbow_config.h" +#include "rainbow_keypair.h" +#include "utils_hash.h" +#include "utils_prng.h" +#include +#include +#include + +#define MAX_ATTEMPT_FRMAT 128 + +int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_rainbow_sign(uint8_t *signature, const sk_t *sk, const uint8_t *_digest) { + uint8_t mat_l1[_O1 * _O1_BYTE]; + uint8_t mat_l2[_O2 * _O2_BYTE]; + uint8_t mat_buffer[2 * _MAX_O * _MAX_O_BYTE]; + + // setup PRNG + prng_t prng_sign; + uint8_t prng_preseed[LEN_SKSEED + _HASH_LEN]; + memcpy(prng_preseed, sk->sk_seed, LEN_SKSEED); + memcpy(prng_preseed + LEN_SKSEED, _digest, _HASH_LEN); // prng_preseed = sk_seed || digest + uint8_t prng_seed[_HASH_LEN]; + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_hash_msg(prng_seed, _HASH_LEN, prng_preseed, _HASH_LEN + LEN_SKSEED); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_prng_set(&prng_sign, prng_seed, _HASH_LEN); // seed = H( sk_seed || digest ) + for (unsigned int i = 0; i < LEN_SKSEED + _HASH_LEN; i++) { + prng_preseed[i] ^= prng_preseed[i]; // clean + } + for (unsigned int i = 0; i < _HASH_LEN; i++) { + prng_seed[i] ^= prng_seed[i]; // clean + } + + // roll vinegars. + uint8_t vinegar[_V1_BYTE]; + unsigned int n_attempt = 0; + unsigned int l1_succ = 0; + while (!l1_succ) { + if (MAX_ATTEMPT_FRMAT <= n_attempt) { + break; + } + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_prng_gen(&prng_sign, vinegar, _V1_BYTE); // generating vinegars + gfmat_prod(mat_l1, sk->l1_F2, _O1 * _O1_BYTE, _V1, vinegar); // generating the linear equations for layer 1 + l1_succ = gfmat_inv(mat_l1, mat_l1, _O1, mat_buffer); // check if the linear equation solvable + n_attempt++; + } + + // Given the vinegars, pre-compute variables needed for layer 2 + uint8_t r_l1_F1[_O1_BYTE] = {0}; + uint8_t r_l2_F1[_O2_BYTE] = {0}; + batch_quad_trimat_eval(r_l1_F1, sk->l1_F1, vinegar, _V1, _O1_BYTE); + batch_quad_trimat_eval(r_l2_F1, sk->l2_F1, vinegar, _V1, _O2_BYTE); + uint8_t mat_l2_F3[_O2 * _O2_BYTE]; + uint8_t mat_l2_F2[_O1 * _O2_BYTE]; + gfmat_prod(mat_l2_F3, sk->l2_F3, _O2 * _O2_BYTE, _V1, vinegar); + gfmat_prod(mat_l2_F2, sk->l2_F2, _O1 * _O2_BYTE, _V1, vinegar); + + // Some local variables. + uint8_t _z[_PUB_M_BYTE]; + uint8_t y[_PUB_M_BYTE]; + uint8_t *x_v1 = vinegar; + uint8_t x_o1[_O1_BYTE]; + uint8_t x_o2[_O2_BYTE]; + + uint8_t digest_salt[_HASH_LEN + _SALT_BYTE]; + memcpy(digest_salt, _digest, _HASH_LEN); + uint8_t *salt = digest_salt + _HASH_LEN; + + uint8_t temp_o[_MAX_O_BYTE + 32] = {0}; + unsigned int succ = 0; + while (!succ) { + if (MAX_ATTEMPT_FRMAT <= n_attempt) { + break; + } + // The computation: H(digest||salt) --> z --S--> y --C-map--> x --T--> w + + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_prng_gen(&prng_sign, salt, _SALT_BYTE); // roll the salt + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_hash_msg(_z, _PUB_M_BYTE, digest_salt, _HASH_LEN + _SALT_BYTE); // H(digest||salt) + + // y = S^-1 * z + memcpy(y, _z, _PUB_M_BYTE); // identity part of S + gfmat_prod(temp_o, sk->s1, _O1_BYTE, _O2, _z + _O1_BYTE); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_add(y, temp_o, _O1_BYTE); + + // Central Map: + // layer 1: calculate x_o1 + memcpy(temp_o, r_l1_F1, _O1_BYTE); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_add(temp_o, y, _O1_BYTE); + gfmat_prod(x_o1, mat_l1, _O1_BYTE, _O1, temp_o); + + // layer 2: calculate x_o2 + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_set_zero(temp_o, _O2_BYTE); + gfmat_prod(temp_o, mat_l2_F2, _O2_BYTE, _O1, x_o1); // F2 + batch_quad_trimat_eval(mat_l2, sk->l2_F5, x_o1, _O1, _O2_BYTE); // F5 + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_add(temp_o, mat_l2, _O2_BYTE); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_add(temp_o, r_l2_F1, _O2_BYTE); // F1 + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_add(temp_o, y + _O1_BYTE, _O2_BYTE); + + // generate the linear equations of the 2nd layer + gfmat_prod(mat_l2, sk->l2_F6, _O2 * _O2_BYTE, _O1, x_o1); // F6 + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_add(mat_l2, mat_l2_F3, _O2 * _O2_BYTE); // F3 + succ = gfmat_inv(mat_l2, mat_l2, _O2, mat_buffer); + gfmat_prod(x_o2, mat_l2, _O2_BYTE, _O2, temp_o); // solve l2 eqs + + n_attempt++; + }; + // w = T^-1 * y + uint8_t w[_PUB_N_BYTE]; + // identity part of T. + memcpy(w, x_v1, _V1_BYTE); + memcpy(w + _V1_BYTE, x_o1, _O1_BYTE); + memcpy(w + _V2_BYTE, x_o2, _O2_BYTE); + // Computing the t1 part. + gfmat_prod(y, sk->t1, _V1_BYTE, _O1, x_o1); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_add(w, y, _V1_BYTE); + // Computing the t4 part. + gfmat_prod(y, sk->t4, _V1_BYTE, _O2, x_o2); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_add(w, y, _V1_BYTE); + // Computing the t3 part. + gfmat_prod(y, sk->t3, _O1_BYTE, _O2, x_o2); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_add(w + _V1_BYTE, y, _O1_BYTE); + + memset(signature, 0, _SIGNATURE_BYTE); // set the output 0 + // clean + memset(&prng_sign, 0, sizeof(prng_t)); + memset(vinegar, 0, _V1_BYTE); + memset(r_l1_F1, 0, _O1_BYTE); + memset(r_l2_F1, 0, _O2_BYTE); + memset(_z, 0, _PUB_M_BYTE); + memset(y, 0, _PUB_M_BYTE); + memset(x_o1, 0, _O1_BYTE); + memset(x_o2, 0, _O2_BYTE); + memset(temp_o, 0, sizeof(temp_o)); + + // return: copy w and salt to the signature. + if (MAX_ATTEMPT_FRMAT <= n_attempt) { + return -1; + } + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_add(signature, w, _PUB_N_BYTE); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_add(signature + _PUB_N_BYTE, salt, _SALT_BYTE); + return 0; +} + +int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_rainbow_verify(const uint8_t *digest, const uint8_t *signature, const pk_t *pk) { + unsigned char digest_ck[_PUB_M_BYTE]; + // public_map( digest_ck , pk , signature ); Evaluating the quadratic public polynomials. + batch_quad_trimat_eval(digest_ck, pk->pk, signature, _PUB_N, _PUB_M_BYTE); + + unsigned char correct[_PUB_M_BYTE]; + unsigned char digest_salt[_HASH_LEN + _SALT_BYTE]; + memcpy(digest_salt, digest, _HASH_LEN); + memcpy(digest_salt + _HASH_LEN, signature + _PUB_N_BYTE, _SALT_BYTE); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_hash_msg(correct, _PUB_M_BYTE, digest_salt, _HASH_LEN + _SALT_BYTE); // H( digest || salt ) + + // check consistancy. + unsigned char cc = 0; + for (unsigned int i = 0; i < _PUB_M_BYTE; i++) { + cc |= (digest_ck[i] ^ correct[i]); + } + return (0 == cc) ? 0 : -1; +} + + diff --git a/crypto_sign/rainbow/rainbowIII-classic/clean/rainbow.h b/crypto_sign/rainbow/rainbowIII-classic/clean/rainbow.h new file mode 100644 index 00000000..7d72637d --- /dev/null +++ b/crypto_sign/rainbow/rainbowIII-classic/clean/rainbow.h @@ -0,0 +1,33 @@ +#ifndef _RAINBOW_H_ +#define _RAINBOW_H_ +/// @file rainbow.h +/// @brief APIs for rainbow. +/// + +#include "rainbow_config.h" +#include "rainbow_keypair.h" + +#include + +/// +/// @brief Signing function for classical secret key. +/// +/// @param[out] signature - the signature. +/// @param[in] sk - the secret key. +/// @param[in] digest - the digest. +/// +int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_rainbow_sign(uint8_t *signature, const sk_t *sk, const uint8_t *digest); + +/// +/// @brief Verifying function. +/// +/// @param[in] digest - the digest. +/// @param[in] signature - the signature. +/// @param[in] pk - the public key. +/// @return 0 for successful verified. -1 for failed verification. +/// +int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_rainbow_verify(const uint8_t *digest, const uint8_t *signature, const pk_t *pk); + + + +#endif // _RAINBOW_H_ diff --git a/crypto_sign/rainbow/rainbowIII-classic/clean/rainbow_blas.h b/crypto_sign/rainbow/rainbowIII-classic/clean/rainbow_blas.h new file mode 100644 index 00000000..c5c54829 --- /dev/null +++ b/crypto_sign/rainbow/rainbowIII-classic/clean/rainbow_blas.h @@ -0,0 +1,32 @@ +#ifndef _RAINBOW_BLAS_H_ +#define _RAINBOW_BLAS_H_ +/// @file rainbow_blas.h +/// @brief Defining the functions used in rainbow.c acconding to the definitions in rainbow_config.h +/// +/// Defining the functions used in rainbow.c acconding to the definitions in rainbow_config.h + +#include "blas.h" +#include "blas_comm.h" +#include "parallel_matrix_op.h" +#include "rainbow_config.h" + + +#define gfv_get_ele PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_get_ele +#define gfv_mul_scalar PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_mul_scalar +#define gfv_madd PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_madd + +#define gfmat_prod PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256mat_prod +#define gfmat_inv PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256mat_inv + +#define batch_trimat_madd PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_trimat_madd_gf256 +#define batch_trimatTr_madd PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_trimatTr_madd_gf256 +#define batch_2trimat_madd PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_2trimat_madd_gf256 +#define batch_matTr_madd PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_matTr_madd_gf256 +#define batch_bmatTr_madd PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_bmatTr_madd_gf256 +#define batch_mat_madd PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_mat_madd_gf256 + +#define batch_quad_trimat_eval PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_quad_trimat_eval_gf256 +#define batch_quad_recmat_eval PQCLEAN_RAINBOWIIICLASSIC_CLEAN_batch_quad_recmat_eval_gf256 + + +#endif // _RAINBOW_BLAS_H_ diff --git a/crypto_sign/rainbow/rainbowIII-classic/clean/rainbow_config.h b/crypto_sign/rainbow/rainbowIII-classic/clean/rainbow_config.h new file mode 100644 index 00000000..f3cbb700 --- /dev/null +++ b/crypto_sign/rainbow/rainbowIII-classic/clean/rainbow_config.h @@ -0,0 +1,48 @@ +#ifndef _H_RAINBOW_CONFIG_H_ +#define _H_RAINBOW_CONFIG_H_ + +/// @file rainbow_config.h +/// @brief Defining the parameters of the Rainbow and the corresponding constants. +/// + +#define _GFSIZE 256 +#define _V1 68 +#define _O1 32 +#define _O2 48 +#define _MAX_O 48 +#define _HASH_LEN 48 + + +#define _V2 ((_V1) + (_O1)) + +/// size of N, in # of gf elements. +#define _PUB_N (_V1 + _O1 + _O2) + +/// size of M, in # gf elements. +#define _PUB_M (_O1 + _O2) + +/// size of variables, in # bytes. + +// GF256 +#define _V1_BYTE (_V1) +#define _V2_BYTE (_V2) +#define _O1_BYTE (_O1) +#define _O2_BYTE (_O2) +#define _MAX_O_BYTE (_MAX_O) +#define _PUB_N_BYTE (_PUB_N) +#define _PUB_M_BYTE (_PUB_M) + + +/// length of seed for public key, in # bytes +#define LEN_PKSEED 32 + +/// length of seed for secret key, in # bytes +#define LEN_SKSEED 32 + +/// length of salt for a signature, in # bytes +#define _SALT_BYTE 16 + +/// length of a signature +#define _SIGNATURE_BYTE (_PUB_N_BYTE + _SALT_BYTE) + +#endif // _H_RAINBOW_CONFIG_H_ diff --git a/crypto_sign/rainbow/rainbowIII-classic/clean/rainbow_keypair.c b/crypto_sign/rainbow/rainbowIII-classic/clean/rainbow_keypair.c new file mode 100644 index 00000000..63708a9b --- /dev/null +++ b/crypto_sign/rainbow/rainbowIII-classic/clean/rainbow_keypair.c @@ -0,0 +1,155 @@ +/// @file rainbow_keypair.c +/// @brief implementations of functions in rainbow_keypair.h +/// + +#include "rainbow_keypair.h" +#include "blas.h" +#include "blas_comm.h" +#include "rainbow_blas.h" +#include "rainbow_keypair_computation.h" +#include "utils_prng.h" +#include +#include +#include + +static +void generate_S_T( unsigned char *s_and_t, prng_t *prng0 ) { + sk_t *_sk; + unsigned size; + + size = sizeof(_sk->s1); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_prng_gen( prng0, s_and_t, size ); + s_and_t += size; + + size = sizeof(_sk->t1); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_prng_gen( prng0, s_and_t, size ); + s_and_t += size; + + size = sizeof(_sk->t4); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_prng_gen( prng0, s_and_t, size ); + s_and_t += size; + + size = sizeof(_sk->t3); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_prng_gen( prng0, s_and_t, size ); +} + + +static +unsigned generate_l1_F12( unsigned char *sk, prng_t *prng0 ) { + unsigned n_byte_generated = 0; + sk_t *_sk; + unsigned size; + + size = sizeof(_sk->l1_F1); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_prng_gen( prng0, sk, size ); + sk += size; + n_byte_generated += size; + + size = sizeof(_sk->l1_F2); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_prng_gen( prng0, sk, size ); + n_byte_generated += size; + + return n_byte_generated; +} + + +static +unsigned generate_l2_F12356( unsigned char *sk, prng_t *prng0 ) { + unsigned n_byte_generated = 0; + sk_t *_sk; + unsigned size; + + size = sizeof(_sk->l2_F1); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_prng_gen( prng0, sk, size ); + sk += size; + n_byte_generated += size; + + size = sizeof(_sk->l2_F2); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_prng_gen( prng0, sk, size ); + sk += size; + n_byte_generated += size; + + size = sizeof(_sk->l2_F3); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_prng_gen( prng0, sk, size ); + sk += size; + n_byte_generated += size; + + size = sizeof(_sk->l2_F5); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_prng_gen( prng0, sk, size ); + sk += size; + n_byte_generated += size; + + size = sizeof(_sk->l2_F6); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_prng_gen( prng0, sk, size ); + n_byte_generated += size; + + return n_byte_generated; +} + + +static void generate_B1_B2(unsigned char *sk, prng_t *prng0) { + sk += generate_l1_F12(sk, prng0); + generate_l2_F12356(sk, prng0); +} + +static void calculate_t4(unsigned char *t2_to_t4, const unsigned char *t1, const unsigned char *t3) { + // t4 = T_sk.t1 * T_sk.t3 - T_sk.t2 + unsigned char temp[_V1_BYTE + 32]; + unsigned char *t4 = t2_to_t4; + for (unsigned int i = 0; i < _O2; i++) { /// t3 width + gfmat_prod(temp, t1, _V1_BYTE, _O1, t3); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_add(t4, temp, _V1_BYTE); + t4 += _V1_BYTE; + t3 += _O1_BYTE; + } +} + +static void obsfucate_l1_polys(unsigned char *l1_polys, const unsigned char *l2_polys, unsigned int n_terms, const unsigned char *s1) { + unsigned char temp[_O1_BYTE + 32]; + while (n_terms--) { + gfmat_prod(temp, s1, _O1_BYTE, _O2, l2_polys); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_gf256v_add(l1_polys, temp, _O1_BYTE); + l1_polys += _O1_BYTE; + l2_polys += _O2_BYTE; + } +} + +/////////////////// Classic ////////////////////////////////// + +static void generate_secretkey(sk_t *sk, const unsigned char *sk_seed) { + memcpy(sk->sk_seed, sk_seed, LEN_SKSEED); + + // set up prng + prng_t prng0; + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_prng_set(&prng0, sk_seed, LEN_SKSEED); + + // generating secret key with prng. + generate_S_T(sk->s1, &prng0); + generate_B1_B2(sk->l1_F1, &prng0); + + // clean prng + memset(&prng0, 0, sizeof(prng_t)); +} + +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_generate_keypair(pk_t *rpk, sk_t *sk, const unsigned char *sk_seed) { + generate_secretkey(sk, sk_seed); + + // set up a temporary structure ext_cpk_t for calculating public key. + ext_cpk_t pk; + + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_calculate_Q_from_F(&pk, sk, sk); // compute the public key in ext_cpk_t format. + calculate_t4(sk->t4, sk->t1, sk->t3); + + obsfucate_l1_polys(pk.l1_Q1, pk.l2_Q1, N_TRIANGLE_TERMS(_V1), sk->s1); + obsfucate_l1_polys(pk.l1_Q2, pk.l2_Q2, _V1 * _O1, sk->s1); + obsfucate_l1_polys(pk.l1_Q3, pk.l2_Q3, _V1 * _O2, sk->s1); + obsfucate_l1_polys(pk.l1_Q5, pk.l2_Q5, N_TRIANGLE_TERMS(_O1), sk->s1); + obsfucate_l1_polys(pk.l1_Q6, pk.l2_Q6, _O1 * _O2, sk->s1); + obsfucate_l1_polys(pk.l1_Q9, pk.l2_Q9, N_TRIANGLE_TERMS(_O2), sk->s1); + // so far, the pk contains the full pk but in ext_cpk_t format. + + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_extcpk_to_pk(rpk, &pk); // convert the public key from ext_cpk_t to pk_t. +} + + + diff --git a/crypto_sign/rainbow/rainbowIII-classic/clean/rainbow_keypair.h b/crypto_sign/rainbow/rainbowIII-classic/clean/rainbow_keypair.h new file mode 100644 index 00000000..1812f885 --- /dev/null +++ b/crypto_sign/rainbow/rainbowIII-classic/clean/rainbow_keypair.h @@ -0,0 +1,61 @@ +#ifndef _RAINBOW_KEYPAIR_H_ +#define _RAINBOW_KEYPAIR_H_ +/// @file rainbow_keypair.h +/// @brief Formats of key pairs and functions for generating key pairs. +/// Formats of key pairs and functions for generating key pairs. +/// + +#include "rainbow_config.h" + +#define N_TRIANGLE_TERMS(n_var) ((n_var) * ((n_var) + 1) / 2) + +/// @brief public key for classic rainbow +/// +/// public key for classic rainbow +/// +typedef struct rainbow_publickey { + unsigned char pk[(_PUB_M_BYTE)*N_TRIANGLE_TERMS(_PUB_N)]; +} pk_t; + +/// @brief secret key for classic rainbow +/// +/// secret key for classic rainbow +/// +typedef struct rainbow_secretkey { + /// + /// seed for generating secret key. + /// Generating S, T, and F for classic rainbow. + /// Generating S and T only for cyclic rainbow. + unsigned char sk_seed[LEN_SKSEED]; + + unsigned char s1[_O1_BYTE * _O2]; ///< part of S map + unsigned char t1[_V1_BYTE * _O1]; ///< part of T map + unsigned char t4[_V1_BYTE * _O2]; ///< part of T map + unsigned char t3[_O1_BYTE * _O2]; ///< part of T map + + unsigned char l1_F1[_O1_BYTE * N_TRIANGLE_TERMS(_V1)]; ///< part of C-map, F1, Layer1 + unsigned char l1_F2[_O1_BYTE * _V1 * _O1]; ///< part of C-map, F2, Layer1 + + unsigned char l2_F1[_O2_BYTE * N_TRIANGLE_TERMS(_V1)]; ///< part of C-map, F1, Layer2 + unsigned char l2_F2[_O2_BYTE * _V1 * _O1]; ///< part of C-map, F2, Layer2 + + unsigned char l2_F3[_O2_BYTE * _V1 * _O2]; ///< part of C-map, F3, Layer2 + unsigned char l2_F5[_O2_BYTE * N_TRIANGLE_TERMS(_O1)]; ///< part of C-map, F5, Layer2 + unsigned char l2_F6[_O2_BYTE * _O1 * _O2]; ///< part of C-map, F6, Layer2 +} sk_t; + + +/// +/// @brief Generate key pairs for classic rainbow. +/// +/// @param[out] pk - the public key. +/// @param[out] sk - the secret key. +/// @param[in] sk_seed - seed for generating the secret key. +/// +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_generate_keypair(pk_t *pk, sk_t *sk, const unsigned char *sk_seed); + + + + + +#endif // _RAINBOW_KEYPAIR_H_ diff --git a/crypto_sign/rainbow/rainbowIII-classic/clean/rainbow_keypair_computation.c b/crypto_sign/rainbow/rainbowIII-classic/clean/rainbow_keypair_computation.c new file mode 100644 index 00000000..62367d44 --- /dev/null +++ b/crypto_sign/rainbow/rainbowIII-classic/clean/rainbow_keypair_computation.c @@ -0,0 +1,189 @@ +/// @file rainbow_keypair_computation.c +/// @brief Implementations for functions in rainbow_keypair_computation.h +/// + +#include "rainbow_keypair_computation.h" +#include "blas.h" +#include "blas_comm.h" +#include "rainbow_blas.h" +#include "rainbow_keypair.h" +#include +#include +#include + +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_extcpk_to_pk(pk_t *pk, const ext_cpk_t *cpk) { + const unsigned char *idx_l1 = cpk->l1_Q1; + const unsigned char *idx_l2 = cpk->l2_Q1; + for (unsigned int i = 0; i < _V1; i++) { + for (unsigned int j = i; j < _V1; j++) { + unsigned int pub_idx = PQCLEAN_RAINBOWIIICLASSIC_CLEAN_idx_of_trimat(i, j, _PUB_N); + memcpy(&pk->pk[_PUB_M_BYTE * pub_idx], idx_l1, _O1_BYTE); + memcpy((&pk->pk[_PUB_M_BYTE * pub_idx]) + _O1_BYTE, idx_l2, _O2_BYTE); + idx_l1 += _O1_BYTE; + idx_l2 += _O2_BYTE; + } + } + idx_l1 = cpk->l1_Q2; + idx_l2 = cpk->l2_Q2; + for (unsigned int i = 0; i < _V1; i++) { + for (unsigned int j = _V1; j < _V1 + _O1; j++) { + unsigned int pub_idx = PQCLEAN_RAINBOWIIICLASSIC_CLEAN_idx_of_trimat(i, j, _PUB_N); + memcpy(&pk->pk[_PUB_M_BYTE * pub_idx], idx_l1, _O1_BYTE); + memcpy((&pk->pk[_PUB_M_BYTE * pub_idx]) + _O1_BYTE, idx_l2, _O2_BYTE); + idx_l1 += _O1_BYTE; + idx_l2 += _O2_BYTE; + } + } + idx_l1 = cpk->l1_Q3; + idx_l2 = cpk->l2_Q3; + for (unsigned int i = 0; i < _V1; i++) { + for (unsigned int j = _V1 + _O1; j < _PUB_N; j++) { + unsigned int pub_idx = PQCLEAN_RAINBOWIIICLASSIC_CLEAN_idx_of_trimat(i, j, _PUB_N); + memcpy(&pk->pk[_PUB_M_BYTE * pub_idx], idx_l1, _O1_BYTE); + memcpy((&pk->pk[_PUB_M_BYTE * pub_idx]) + _O1_BYTE, idx_l2, _O2_BYTE); + idx_l1 += _O1_BYTE; + idx_l2 += _O2_BYTE; + } + } + idx_l1 = cpk->l1_Q5; + idx_l2 = cpk->l2_Q5; + for (unsigned int i = _V1; i < _V1 + _O1; i++) { + for (unsigned int j = i; j < _V1 + _O1; j++) { + unsigned int pub_idx = PQCLEAN_RAINBOWIIICLASSIC_CLEAN_idx_of_trimat(i, j, _PUB_N); + memcpy(&pk->pk[_PUB_M_BYTE * pub_idx], idx_l1, _O1_BYTE); + memcpy((&pk->pk[_PUB_M_BYTE * pub_idx]) + _O1_BYTE, idx_l2, _O2_BYTE); + idx_l1 += _O1_BYTE; + idx_l2 += _O2_BYTE; + } + } + idx_l1 = cpk->l1_Q6; + idx_l2 = cpk->l2_Q6; + for (unsigned int i = _V1; i < _V1 + _O1; i++) { + for (unsigned int j = _V1 + _O1; j < _PUB_N; j++) { + unsigned int pub_idx = PQCLEAN_RAINBOWIIICLASSIC_CLEAN_idx_of_trimat(i, j, _PUB_N); + memcpy(&pk->pk[_PUB_M_BYTE * pub_idx], idx_l1, _O1_BYTE); + memcpy((&pk->pk[_PUB_M_BYTE * pub_idx]) + _O1_BYTE, idx_l2, _O2_BYTE); + idx_l1 += _O1_BYTE; + idx_l2 += _O2_BYTE; + } + } + idx_l1 = cpk->l1_Q9; + idx_l2 = cpk->l2_Q9; + for (unsigned int i = _V1 + _O1; i < _PUB_N; i++) { + for (unsigned int j = i; j < _PUB_N; j++) { + unsigned int pub_idx = PQCLEAN_RAINBOWIIICLASSIC_CLEAN_idx_of_trimat(i, j, _PUB_N); + memcpy(&pk->pk[_PUB_M_BYTE * pub_idx], idx_l1, _O1_BYTE); + memcpy((&pk->pk[_PUB_M_BYTE * pub_idx]) + _O1_BYTE, idx_l2, _O2_BYTE); + idx_l1 += _O1_BYTE; + idx_l2 += _O2_BYTE; + } + } +} + +static void calculate_Q_from_F_ref(ext_cpk_t *Qs, const sk_t *Fs, const sk_t *Ts) { + /* + Layer 1 + Computing : + Q_pk.l1_F1s[i] = F_sk.l1_F1s[i] + + Q_pk.l1_F2s[i] = (F1* T1 + F2) + F1tr * t1 + Q_pk.l1_F5s[i] = UT( T1tr* (F1 * T1 + F2) ) + */ + const unsigned char *t2 = Ts->t4; + + memcpy(Qs->l1_Q1, Fs->l1_F1, _O1_BYTE * N_TRIANGLE_TERMS(_V1)); + + memcpy(Qs->l1_Q2, Fs->l1_F2, _O1_BYTE * _V1 * _O1); + batch_trimat_madd(Qs->l1_Q2, Fs->l1_F1, Ts->t1, _V1, _V1_BYTE, _O1, _O1_BYTE); // F1*T1 + F2 + + memset(Qs->l1_Q3, 0, _O1_BYTE * _V1 * _O2); + memset(Qs->l1_Q5, 0, _O1_BYTE * N_TRIANGLE_TERMS(_O1)); + memset(Qs->l1_Q6, 0, _O1_BYTE * _O1 * _O2); + memset(Qs->l1_Q9, 0, _O1_BYTE * N_TRIANGLE_TERMS(_O2)); + + // l1_Q5 : _O1_BYTE * _O1 * _O1 + // l1_Q9 : _O1_BYTE * _O2 * _O2 + // l2_Q5 : _O2_BYTE * _V1 * _O1 + // l2_Q9 : _O2_BYTE * _V1 * _O2 + + unsigned char tempQ[_MAX_O_BYTE * _MAX_O * _MAX_O + 32]; + + memset(tempQ, 0, _O1_BYTE * _O1 * _O1); // l1_Q5 + batch_matTr_madd(tempQ, Ts->t1, _V1, _V1_BYTE, _O1, Qs->l1_Q2, _O1, _O1_BYTE); // t1_tr*(F1*T1 + F2) + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_UpperTrianglize(Qs->l1_Q5, tempQ, _O1, _O1_BYTE); // UT( ... ) // Q5 + + batch_trimatTr_madd(Qs->l1_Q2, Fs->l1_F1, Ts->t1, _V1, _V1_BYTE, _O1, _O1_BYTE); // Q2 + /* + Computing: + F1_T2 = F1 * t2 + F2_T3 = F2 * t3 + F1_F1T_T2 + F2_T3 = F1_T2 + F2_T3 + F1tr * t2 + Q_pk.l1_F3s[i] = F1_F1T_T2 + F2_T3 + Q_pk.l1_F6s[i] = T1tr* ( F1_F1T_T2 + F2_T3 ) + F2tr * t2 + Q_pk.l1_F9s[i] = UT( T2tr* ( F1_T2 + F2_T3 ) ) + */ + batch_trimat_madd(Qs->l1_Q3, Fs->l1_F1, t2, _V1, _V1_BYTE, _O2, _O1_BYTE); // F1*T2 + batch_mat_madd(Qs->l1_Q3, Fs->l1_F2, _V1, Ts->t3, _O1, _O1_BYTE, _O2, _O1_BYTE); // F1_T2 + F2_T3 + + memset(tempQ, 0, _O1_BYTE * _O2 * _O2); // l1_Q9 + batch_matTr_madd(tempQ, t2, _V1, _V1_BYTE, _O2, Qs->l1_Q3, _O2, _O1_BYTE); // T2tr * ( F1_T2 + F2_T3 ) + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_UpperTrianglize(Qs->l1_Q9, tempQ, _O2, _O1_BYTE); // Q9 + + batch_trimatTr_madd(Qs->l1_Q3, Fs->l1_F1, t2, _V1, _V1_BYTE, _O2, _O1_BYTE); // F1_F1T_T2 + F2_T3 // Q3 + + batch_bmatTr_madd(Qs->l1_Q6, Fs->l1_F2, _O1, t2, _V1, _V1_BYTE, _O2, _O1_BYTE); // F2tr*T2 + batch_matTr_madd(Qs->l1_Q6, Ts->t1, _V1, _V1_BYTE, _O1, Qs->l1_Q3, _O2, _O1_BYTE); // Q6 + + /* + layer 2 + Computing: + Q1 = F1 + Q2 = F1_F1T*T1 + F2 + Q5 = UT( T1tr( F1*T1 + F2 ) + F5 ) + */ + memcpy(Qs->l2_Q1, Fs->l2_F1, _O2_BYTE * N_TRIANGLE_TERMS(_V1)); + + memcpy(Qs->l2_Q2, Fs->l2_F2, _O2_BYTE * _V1 * _O1); + batch_trimat_madd(Qs->l2_Q2, Fs->l2_F1, Ts->t1, _V1, _V1_BYTE, _O1, _O2_BYTE); // F1*T1 + F2 + + memcpy(Qs->l2_Q5, Fs->l2_F5, _O2_BYTE * N_TRIANGLE_TERMS(_O1)); + memset(tempQ, 0, _O2_BYTE * _O1 * _O1); // l2_Q5 + batch_matTr_madd(tempQ, Ts->t1, _V1, _V1_BYTE, _O1, Qs->l2_Q2, _O1, _O2_BYTE); // t1_tr*(F1*T1 + F2) + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_UpperTrianglize(Qs->l2_Q5, tempQ, _O1, _O2_BYTE); // UT( ... ) // Q5 + + batch_trimatTr_madd(Qs->l2_Q2, Fs->l2_F1, Ts->t1, _V1, _V1_BYTE, _O1, _O2_BYTE); // Q2 + + /* + Computing: + F1_T2 = F1 * t2 + F2_T3 = F2 * t3 + F1_F1T_T2 + F2_T3 = F1_T2 + F2_T3 + F1tr * t2 + + Q3 = F1_F1T*T2 + F2*T3 + F3 + Q9 = UT( T2tr*( F1*T2 + F2*T3 + F3 ) + T3tr*( F5*T3 + F6 ) ) + Q6 = T1tr*( F1_F1T*T2 + F2*T3 + F3 ) + F2Tr*T2 + F5_F5T*T3 + F6 + */ + memcpy(Qs->l2_Q3, Fs->l2_F3, _O2_BYTE * _V1 * _O2); + batch_trimat_madd(Qs->l2_Q3, Fs->l2_F1, t2, _V1, _V1_BYTE, _O2, _O2_BYTE); // F1*T2 + F3 + batch_mat_madd(Qs->l2_Q3, Fs->l2_F2, _V1, Ts->t3, _O1, _O1_BYTE, _O2, _O2_BYTE); // F1_T2 + F2_T3 + F3 + + memset(tempQ, 0, _O2_BYTE * _O2 * _O2); // l2_Q9 + batch_matTr_madd(tempQ, t2, _V1, _V1_BYTE, _O2, Qs->l2_Q3, _O2, _O2_BYTE); // T2tr * ( ..... ) + + memcpy(Qs->l2_Q6, Fs->l2_F6, _O2_BYTE * _O1 * _O2); + + batch_trimat_madd(Qs->l2_Q6, Fs->l2_F5, Ts->t3, _O1, _O1_BYTE, _O2, _O2_BYTE); // F5*T3 + F6 + batch_matTr_madd(tempQ, Ts->t3, _O1, _O1_BYTE, _O2, Qs->l2_Q6, _O2, _O2_BYTE); // T2tr*( ..... ) + T3tr*( ..... ) + memset(Qs->l2_Q9, 0, _O2_BYTE * N_TRIANGLE_TERMS(_O2)); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_UpperTrianglize(Qs->l2_Q9, tempQ, _O2, _O2_BYTE); // Q9 + + batch_trimatTr_madd(Qs->l2_Q3, Fs->l2_F1, t2, _V1, _V1_BYTE, _O2, _O2_BYTE); // F1_F1T_T2 + F2_T3 + F3 // Q3 + + batch_bmatTr_madd(Qs->l2_Q6, Fs->l2_F2, _O1, t2, _V1, _V1_BYTE, _O2, _O2_BYTE); // F5*T3 + F6 + F2tr*T2 + batch_trimatTr_madd(Qs->l2_Q6, Fs->l2_F5, Ts->t3, _O1, _O1_BYTE, _O2, _O2_BYTE); // F2tr*T2 + F5_F5T*T3 + F6 + batch_matTr_madd(Qs->l2_Q6, Ts->t1, _V1, _V1_BYTE, _O1, Qs->l2_Q3, _O2, _O2_BYTE); // Q6 +} +#define calculate_Q_from_F_impl calculate_Q_from_F_ref +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_calculate_Q_from_F(ext_cpk_t *Qs, const sk_t *Fs, const sk_t *Ts) { + calculate_Q_from_F_impl(Qs, Fs, Ts); +} diff --git a/crypto_sign/rainbow/rainbowIII-classic/clean/rainbow_keypair_computation.h b/crypto_sign/rainbow/rainbowIII-classic/clean/rainbow_keypair_computation.h new file mode 100644 index 00000000..c9add890 --- /dev/null +++ b/crypto_sign/rainbow/rainbowIII-classic/clean/rainbow_keypair_computation.h @@ -0,0 +1,53 @@ +#ifndef _RAINBOW_KEYPAIR_COMP_H_ +#define _RAINBOW_KEYPAIR_COMP_H_ +/// @file rainbow_keypair_computation.h +/// @brief Functions for calculating pk/sk while generating keys. +/// +/// Defining an internal structure of public key. +/// Functions for calculating pk/sk for key generation. +/// + +#include "rainbow_keypair.h" + +/// @brief The (internal use) public key for rainbow +/// +/// The (internal use) public key for rainbow. The public +/// polynomials are divided into l1_Q1, l1_Q2, ... l1_Q9, +/// l2_Q1, .... , l2_Q9. +/// +typedef struct rainbow_extend_publickey { + unsigned char l1_Q1[_O1_BYTE * N_TRIANGLE_TERMS(_V1)]; + unsigned char l1_Q2[_O1_BYTE * _V1 * _O1]; + unsigned char l1_Q3[_O1_BYTE * _V1 * _O2]; + unsigned char l1_Q5[_O1_BYTE * N_TRIANGLE_TERMS(_O1)]; + unsigned char l1_Q6[_O1_BYTE * _O1 * _O2]; + unsigned char l1_Q9[_O1_BYTE * N_TRIANGLE_TERMS(_O2)]; + + unsigned char l2_Q1[_O2_BYTE * N_TRIANGLE_TERMS(_V1)]; + unsigned char l2_Q2[_O2_BYTE * _V1 * _O1]; + unsigned char l2_Q3[_O2_BYTE * _V1 * _O2]; + unsigned char l2_Q5[_O2_BYTE * N_TRIANGLE_TERMS(_O1)]; + unsigned char l2_Q6[_O2_BYTE * _O1 * _O2]; + unsigned char l2_Q9[_O2_BYTE * N_TRIANGLE_TERMS(_O2)]; +} ext_cpk_t; + +/// +/// @brief converting formats of public keys : from ext_cpk_t version to pk_t +/// +/// @param[out] pk - the classic public key. +/// @param[in] cpk - the internel public key. +/// +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_extcpk_to_pk(pk_t *pk, const ext_cpk_t *cpk); +///////////////////////////////////////////////// + +/// +/// @brief Computing public key from secret key +/// +/// @param[out] Qs - the public key +/// @param[in] Fs - parts of the secret key: l1_F1, l1_F2, l2_F1, l2_F2, l2_F3, l2_F5, l2_F6 +/// @param[in] Ts - parts of the secret key: T1, T4, T3 +/// +void PQCLEAN_RAINBOWIIICLASSIC_CLEAN_calculate_Q_from_F(ext_cpk_t *Qs, const sk_t *Fs, const sk_t *Ts); + + +#endif // _RAINBOW_KEYPAIR_COMP_H_ diff --git a/crypto_sign/rainbow/rainbowIII-classic/clean/sign.c b/crypto_sign/rainbow/rainbowIII-classic/clean/sign.c new file mode 100644 index 00000000..81e3b0f2 --- /dev/null +++ b/crypto_sign/rainbow/rainbowIII-classic/clean/sign.c @@ -0,0 +1,74 @@ +/// @file sign.c +/// @brief the implementations for functions in api.h +/// +/// + +#include "api.h" +#include "rainbow.h" +#include "rainbow_config.h" +#include "rainbow_keypair.h" +#include "randombytes.h" +#include "utils_hash.h" +#include +#include + +int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_crypto_sign_keypair(unsigned char *pk, unsigned char *sk) { + unsigned char sk_seed[LEN_SKSEED] = {0}; + randombytes(sk_seed, LEN_SKSEED); + + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_generate_keypair((pk_t *)pk, (sk_t *)sk, sk_seed); + return 0; +} + +int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_crypto_sign(unsigned char *sm, size_t *smlen, const unsigned char *m, size_t mlen, const unsigned char *sk) { + unsigned char digest[_HASH_LEN]; + + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_hash_msg(digest, _HASH_LEN, m, mlen); + + memcpy(sm, m, mlen); + smlen[0] = mlen + _SIGNATURE_BYTE; + + return PQCLEAN_RAINBOWIIICLASSIC_CLEAN_rainbow_sign(sm + mlen, (const sk_t *)sk, digest); +} + +int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_crypto_sign_open(unsigned char *m, size_t *mlen, const unsigned char *sm, size_t smlen, const unsigned char *pk) { + int rc; + if (_SIGNATURE_BYTE > smlen) { + rc = -1; + } else { + *mlen = smlen - _SIGNATURE_BYTE; + + unsigned char digest[_HASH_LEN]; + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_hash_msg(digest, _HASH_LEN, sm, *mlen); + + rc = PQCLEAN_RAINBOWIIICLASSIC_CLEAN_rainbow_verify(digest, sm + mlen[0], (const pk_t *)pk); + } + if (!rc) { + memmove(m, sm, smlen - _SIGNATURE_BYTE); + } else { // bad signature + *mlen = (size_t) -1; + memset(m, 0, smlen); + } + return rc; +} + +int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + unsigned char digest[_HASH_LEN]; + + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_hash_msg(digest, _HASH_LEN, m, mlen); + *siglen = _SIGNATURE_BYTE; + return PQCLEAN_RAINBOWIIICLASSIC_CLEAN_rainbow_sign(sig, (const sk_t *)sk, digest); +} + +int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + if (siglen != _SIGNATURE_BYTE) { + return -1; + } + unsigned char digest[_HASH_LEN]; + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_hash_msg(digest, _HASH_LEN, m, mlen); + return PQCLEAN_RAINBOWIIICLASSIC_CLEAN_rainbow_verify(digest, sig, (const pk_t *)pk); +} diff --git a/crypto_sign/rainbow/rainbowIII-classic/clean/utils_hash.c b/crypto_sign/rainbow/rainbowIII-classic/clean/utils_hash.c new file mode 100644 index 00000000..a00c94e5 --- /dev/null +++ b/crypto_sign/rainbow/rainbowIII-classic/clean/utils_hash.c @@ -0,0 +1,50 @@ +/// @file utils_hash.c +/// @brief the adapter for SHA2 families. +/// +/// + +#include "utils_hash.h" +#include "rainbow_config.h" +#include "sha2.h" + +static inline int h(unsigned char *digest, const unsigned char *m, size_t mlen) { + sha384(digest, m, mlen); + return 0; +} + +static inline int expand_hash(unsigned char *digest, size_t n_digest, const unsigned char *hash) { + if (_HASH_LEN >= n_digest) { + for (size_t i = 0; i < n_digest; i++) { + digest[i] = hash[i]; + } + return 0; + } + for (size_t i = 0; i < _HASH_LEN; i++) { + digest[i] = hash[i]; + } + n_digest -= _HASH_LEN; + + while (_HASH_LEN <= n_digest) { + h(digest + _HASH_LEN, digest, _HASH_LEN); + + n_digest -= _HASH_LEN; + digest += _HASH_LEN; + } + unsigned char temp[_HASH_LEN]; + if (n_digest) { + h(temp, digest, _HASH_LEN); + for (size_t i = 0; i < n_digest; i++) { + digest[_HASH_LEN + i] = temp[i]; + } + } + return 0; +} + +int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_hash_msg(unsigned char *digest, + size_t len_digest, + const unsigned char *m, + size_t mlen) { + unsigned char buf[_HASH_LEN]; + h(buf, m, mlen); + return expand_hash(digest, len_digest, buf); +} diff --git a/crypto_sign/rainbow/rainbowIII-classic/clean/utils_hash.h b/crypto_sign/rainbow/rainbowIII-classic/clean/utils_hash.h new file mode 100644 index 00000000..a36aa481 --- /dev/null +++ b/crypto_sign/rainbow/rainbowIII-classic/clean/utils_hash.h @@ -0,0 +1,11 @@ +#ifndef _UTILS_HASH_H_ +#define _UTILS_HASH_H_ +/// @file utils_hash.h +/// @brief the interface for adapting hash functions. +/// + +#include + +int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_hash_msg(unsigned char *digest, size_t len_digest, const unsigned char *m, size_t mlen); + +#endif // _UTILS_HASH_H_ diff --git a/crypto_sign/rainbow/rainbowIII-classic/clean/utils_prng.c b/crypto_sign/rainbow/rainbowIII-classic/clean/utils_prng.c new file mode 100644 index 00000000..2bfa1b1a --- /dev/null +++ b/crypto_sign/rainbow/rainbowIII-classic/clean/utils_prng.c @@ -0,0 +1,97 @@ +/// @file utils_prng.c +/// @brief The implementation of PRNG related functions. +/// + +#include "utils_prng.h" +#include "aes.h" +#include "randombytes.h" +#include "utils_hash.h" +#include +#include + +static void prng_update(const unsigned char *provided_data, + unsigned char *Key, + unsigned char *V) { + unsigned char temp[48]; + aes256ctx ctx; + aes256_ecb_keyexp(&ctx, Key); + for (int i = 0; i < 3; i++) { + //increment V + for (int j = 15; j >= 0; j--) { + if (V[j] == 0xff) { + V[j] = 0x00; + } else { + V[j]++; + break; + } + } + aes256_ecb(temp + 16 * i, V, 1, &ctx); + } + if (provided_data != NULL) { + for (int i = 0; i < 48; i++) { + temp[i] ^= provided_data[i]; + } + } + aes256_ctx_release(&ctx); + memcpy(Key, temp, 32); + memcpy(V, temp + 32, 16); +} +static void randombytes_init_with_state(prng_t *state, + unsigned char *entropy_input_48bytes) { + memset(state->Key, 0x00, 32); + memset(state->V, 0x00, 16); + prng_update(entropy_input_48bytes, state->Key, state->V); +} + +static int randombytes_with_state(prng_t *state, + unsigned char *x, + size_t xlen) { + + unsigned char block[16]; + int i = 0; + + aes256ctx ctx; + aes256_ecb_keyexp(&ctx, state->Key); + + while (xlen > 0) { + //increment V + for (int j = 15; j >= 0; j--) { + if (state->V[j] == 0xff) { + state->V[j] = 0x00; + } else { + state->V[j]++; + break; + } + } + aes256_ecb(block, state->V, 1, &ctx); + if (xlen > 15) { + memcpy(x + i, block, 16); + i += 16; + xlen -= 16; + } else { + memcpy(x + i, block, xlen); + xlen = 0; + } + } + aes256_ctx_release(&ctx); + prng_update(NULL, state->Key, state->V); + return 0; +} + +int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_prng_set(prng_t *ctx, const void *prng_seed, unsigned long prng_seedlen) { + unsigned char seed[48]; + if (prng_seedlen >= 48) { + memcpy(seed, prng_seed, 48); + } else { + memcpy(seed, prng_seed, prng_seedlen); + PQCLEAN_RAINBOWIIICLASSIC_CLEAN_hash_msg(seed + prng_seedlen, 48 - (unsigned)prng_seedlen, (const unsigned char *)prng_seed, prng_seedlen); + } + + randombytes_init_with_state(ctx, seed); + + return 0; +} + +int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_prng_gen(prng_t *ctx, unsigned char *out, unsigned long outlen) { + return randombytes_with_state(ctx, out, outlen); +} diff --git a/crypto_sign/rainbow/rainbowIII-classic/clean/utils_prng.h b/crypto_sign/rainbow/rainbowIII-classic/clean/utils_prng.h new file mode 100644 index 00000000..40214f42 --- /dev/null +++ b/crypto_sign/rainbow/rainbowIII-classic/clean/utils_prng.h @@ -0,0 +1,18 @@ +#ifndef _UTILS_PRNG_H_ +#define _UTILS_PRNG_H_ +/// @file utils_prng.h +/// @brief the interface for adapting PRNG functions. +/// +/// + +#include "randombytes.h" + +typedef struct { + unsigned char Key[32]; + unsigned char V[16]; +} prng_t; + +int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_prng_set(prng_t *ctx, const void *prng_seed, unsigned long prng_seedlen); +int PQCLEAN_RAINBOWIIICLASSIC_CLEAN_prng_gen(prng_t *ctx, unsigned char *out, unsigned long outlen); + +#endif // _UTILS_PRNG_H_ diff --git a/crypto_sign/rainbow/rainbowV-classic/META.yml b/crypto_sign/rainbow/rainbowV-classic/META.yml new file mode 100644 index 00000000..10e524f8 --- /dev/null +++ b/crypto_sign/rainbow/rainbowV-classic/META.yml @@ -0,0 +1,20 @@ +name: "RAINBOW(256,96,36,64) - classic" +type: signature +claimed-nist-level: 5 +length-public-key: 1930600 +length-secret-key: 1408736 +length-signature: 212 +nistkat-sha256: 3b13607b2761cd4b5ccf3206d3fec04f2568e0b01d6f370a336d0de7c70051ff +testvectors-sha256: 079f2020b30dc69ca95499e3cf4bb5eaa70fae40be63fa0d3e542b504c8acd6c +principal-submitters: + - Jintai Ding +auxiliary-submitters: + - Ming-Shing Chen + - Matthias Kannwischer + - Jacques Patarin + - Albrecht Petzoldt + - Dieter Schmidt + - Bo-Yin Yang +implementations: + - name: clean + version: https://github.com/fast-crypto-lab/rainbow-submission-round2/commit/173ada0e077e1b9dbd8e4a78994f87acc0c92263 diff --git a/crypto_sign/rainbow/rainbowV-classic/clean/LICENSE b/crypto_sign/rainbow/rainbowV-classic/clean/LICENSE new file mode 100644 index 00000000..cb00a6e3 --- /dev/null +++ b/crypto_sign/rainbow/rainbowV-classic/clean/LICENSE @@ -0,0 +1,8 @@ +`Software implementation of Rainbow for NIST R2 submission' by Ming-Shing Chen + +To the extent possible under law, the person who associated CC0 with +`Software implementation of Rainbow for NIST R2 submission' has waived all copyright and related or neighboring rights +to `Software implementation of Rainbow for NIST R2 submission'. + +You should have received a copy of the CC0 legalcode along with this +work. If not, see . diff --git a/crypto_sign/rainbow/rainbowV-classic/clean/Makefile.Microsoft_nmake b/crypto_sign/rainbow/rainbowV-classic/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..e828be35 --- /dev/null +++ b/crypto_sign/rainbow/rainbowV-classic/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=librainbowV-classic_clean.lib +OBJECTS = blas_comm.obj parallel_matrix_op.obj rainbow.obj rainbow_keypair.obj rainbow_keypair_computation.obj sign.obj utils_hash.obj utils_prng.obj blas.obj gf.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/rainbow/rainbowV-classic/clean/api.h b/crypto_sign/rainbow/rainbowV-classic/clean/api.h new file mode 100644 index 00000000..b26496e1 --- /dev/null +++ b/crypto_sign/rainbow/rainbowV-classic/clean/api.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_RAINBOWVCLASSIC_CLEAN_API_H +#define PQCLEAN_RAINBOWVCLASSIC_CLEAN_API_H + +#include +#include + +#define PQCLEAN_RAINBOWVCLASSIC_CLEAN_CRYPTO_SECRETKEYBYTES 1408736 +#define PQCLEAN_RAINBOWVCLASSIC_CLEAN_CRYPTO_PUBLICKEYBYTES 1930600 +#define PQCLEAN_RAINBOWVCLASSIC_CLEAN_CRYPTO_BYTES 212 +#define PQCLEAN_RAINBOWVCLASSIC_CLEAN_CRYPTO_ALGNAME "RAINBOW(256,96,36,64) - classic" + +int PQCLEAN_RAINBOWVCLASSIC_CLEAN_crypto_sign_keypair(uint8_t *pk, uint8_t *sk); + + +int PQCLEAN_RAINBOWVCLASSIC_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +int PQCLEAN_RAINBOWVCLASSIC_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +int PQCLEAN_RAINBOWVCLASSIC_CLEAN_crypto_sign(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *sk); + +int PQCLEAN_RAINBOWVCLASSIC_CLEAN_crypto_sign_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *pk); + + +#endif diff --git a/crypto_sign/rainbow/rainbowV-classic/clean/blas.c b/crypto_sign/rainbow/rainbowV-classic/clean/blas.c new file mode 100644 index 00000000..4a0b7d3e --- /dev/null +++ b/crypto_sign/rainbow/rainbowV-classic/clean/blas.c @@ -0,0 +1,31 @@ +#include "blas.h" +#include "gf.h" + +#include + +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_predicated_add(uint8_t *accu_b, uint8_t predicate, const uint8_t *a, size_t _num_byte) { + uint8_t pr_u8 = (uint8_t) ((uint8_t) 0 - predicate); + for (size_t i = 0; i < _num_byte; i++) { + accu_b[i] ^= (a[i] & pr_u8); + } +} + +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_add(uint8_t *accu_b, const uint8_t *a, size_t _num_byte) { + for (size_t i = 0; i < _num_byte; i++) { + accu_b[i] ^= a[i]; + } +} + + +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_mul_scalar(uint8_t *a, uint8_t b, size_t _num_byte) { + for (size_t i = 0; i < _num_byte; i++) { + a[i] = PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256_mul(a[i], b); + } +} + +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_madd(uint8_t *accu_c, const uint8_t *a, uint8_t gf256_b, size_t _num_byte) { + for (size_t i = 0; i < _num_byte; i++) { + accu_c[i] ^= PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256_mul(a[i], gf256_b); + } +} + diff --git a/crypto_sign/rainbow/rainbowV-classic/clean/blas.h b/crypto_sign/rainbow/rainbowV-classic/clean/blas.h new file mode 100644 index 00000000..78dc991a --- /dev/null +++ b/crypto_sign/rainbow/rainbowV-classic/clean/blas.h @@ -0,0 +1,19 @@ +#ifndef _BLAS_H_ +#define _BLAS_H_ +/// @file blas.h +/// @brief Functions for implementing basic linear algebra functions. +/// + +#include "rainbow_config.h" +#include +#include + +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_predicated_add(uint8_t *accu_b, uint8_t predicate, const uint8_t *a, size_t _num_byte); +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_add(uint8_t *accu_b, const uint8_t *a, size_t _num_byte); + + +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_mul_scalar(uint8_t *a, uint8_t b, size_t _num_byte); +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_madd(uint8_t *accu_c, const uint8_t *a, uint8_t gf256_b, size_t _num_byte); + + +#endif // _BLAS_H_ diff --git a/crypto_sign/rainbow/rainbowV-classic/clean/blas_comm.c b/crypto_sign/rainbow/rainbowV-classic/clean/blas_comm.c new file mode 100644 index 00000000..9cb4979a --- /dev/null +++ b/crypto_sign/rainbow/rainbowV-classic/clean/blas_comm.c @@ -0,0 +1,144 @@ +/// @file blas_comm.c +/// @brief The standard implementations for blas_comm.h +/// + +#include "blas_comm.h" +#include "blas.h" +#include "gf.h" +#include "rainbow_config.h" + +#include +#include + +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_set_zero(uint8_t *b, unsigned int _num_byte) { + for (size_t i = 0; i < _num_byte; i++) { + b[i] = 0; + } +} +/// @brief get an element from GF(256) vector . +/// +/// @param[in] a - the input vector a. +/// @param[in] i - the index in the vector a. +/// @return the value of the element. +/// +uint8_t PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_get_ele(const uint8_t *a, unsigned int i) { + return a[i]; +} + +unsigned int PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_is_zero(const uint8_t *a, unsigned int _num_byte) { + uint8_t r = 0; + while (_num_byte--) { + r |= a[0]; + a++; + } + return (0 == r); +} + +/// polynomial multplication +/// School boook +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_polymul(uint8_t *c, const uint8_t *a, const uint8_t *b, unsigned int _num) { + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_set_zero(c, _num * 2 - 1); + for (unsigned int i = 0; i < _num; i++) { + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_madd(c + i, a, b[i], _num); + } +} + +static void gf256mat_prod_ref(uint8_t *c, const uint8_t *matA, unsigned int n_A_vec_byte, unsigned int n_A_width, const uint8_t *b) { + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_set_zero(c, n_A_vec_byte); + for (unsigned int i = 0; i < n_A_width; i++) { + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_madd(c, matA, b[i], n_A_vec_byte); + matA += n_A_vec_byte; + } +} + +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256mat_mul(uint8_t *c, const uint8_t *a, const uint8_t *b, unsigned int len_vec) { + unsigned int n_vec_byte = len_vec; + for (unsigned int k = 0; k < len_vec; k++) { + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_set_zero(c, n_vec_byte); + const uint8_t *bk = b + n_vec_byte * k; + for (unsigned int i = 0; i < len_vec; i++) { + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_madd(c, a + n_vec_byte * i, bk[i], n_vec_byte); + } + c += n_vec_byte; + } +} + +static unsigned int gf256mat_gauss_elim_ref(uint8_t *mat, unsigned int h, unsigned int w) { + unsigned int r8 = 1; + + for (unsigned int i = 0; i < h; i++) { + uint8_t *ai = mat + w * i; + unsigned int skip_len_align4 = i & ((unsigned int)~0x3); + + for (unsigned int j = i + 1; j < h; j++) { + uint8_t *aj = mat + w * j; + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_predicated_add(ai + skip_len_align4, 1 ^ PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256_is_nonzero(ai[i]), aj + skip_len_align4, w - skip_len_align4); + } + r8 &= PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256_is_nonzero(ai[i]); + uint8_t pivot = ai[i]; + pivot = PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256_inv(pivot); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_mul_scalar(ai + skip_len_align4, pivot, w - skip_len_align4); + for (unsigned int j = 0; j < h; j++) { + if (i == j) { + continue; + } + uint8_t *aj = mat + w * j; + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_madd(aj + skip_len_align4, ai + skip_len_align4, aj[i], w - skip_len_align4); + } + } + + return r8; +} + +static unsigned int gf256mat_solve_linear_eq_ref(uint8_t *sol, const uint8_t *inp_mat, const uint8_t *c_terms, unsigned int n) { + uint8_t mat[64 * 64]; + for (unsigned int i = 0; i < n; i++) { + memcpy(mat + i * (n + 1), inp_mat + i * n, n); + mat[i * (n + 1) + n] = c_terms[i]; + } + unsigned int r8 = PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256mat_gauss_elim(mat, n, n + 1); + for (unsigned int i = 0; i < n; i++) { + sol[i] = mat[i * (n + 1) + n]; + } + return r8; +} + +static inline void gf256mat_submat(uint8_t *mat2, unsigned int w2, unsigned int st, const uint8_t *mat, unsigned int w, unsigned int h) { + for (unsigned int i = 0; i < h; i++) { + for (unsigned int j = 0; j < w2; j++) { + mat2[i * w2 + j] = mat[i * w + st + j]; + } + } +} + +unsigned int PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256mat_inv(uint8_t *inv_a, const uint8_t *a, unsigned int H, uint8_t *buffer) { + uint8_t *aa = buffer; + for (unsigned int i = 0; i < H; i++) { + uint8_t *ai = aa + i * 2 * H; + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_set_zero(ai, 2 * H); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_add(ai, a + i * H, H); + ai[H + i] = 1; + } + unsigned int r8 = PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256mat_gauss_elim(aa, H, 2 * H); + gf256mat_submat(inv_a, H, H, aa, 2 * H, H); + return r8; +} + + +// choosing the implementations depends on the macros _BLAS_AVX2_ and _BLAS_SSE + +#define gf256mat_prod_impl gf256mat_prod_ref +#define gf256mat_gauss_elim_impl gf256mat_gauss_elim_ref +#define gf256mat_solve_linear_eq_impl gf256mat_solve_linear_eq_ref +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256mat_prod(uint8_t *c, const uint8_t *matA, unsigned int n_A_vec_byte, unsigned int n_A_width, const uint8_t *b) { + gf256mat_prod_impl(c, matA, n_A_vec_byte, n_A_width, b); +} + +unsigned int PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256mat_gauss_elim(uint8_t *mat, unsigned int h, unsigned int w) { + return gf256mat_gauss_elim_impl(mat, h, w); +} + +unsigned int PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256mat_solve_linear_eq(uint8_t *sol, const uint8_t *inp_mat, const uint8_t *c_terms, unsigned int n) { + return gf256mat_solve_linear_eq_impl(sol, inp_mat, c_terms, n); +} + diff --git a/crypto_sign/rainbow/rainbowV-classic/clean/blas_comm.h b/crypto_sign/rainbow/rainbowV-classic/clean/blas_comm.h new file mode 100644 index 00000000..0db6bb28 --- /dev/null +++ b/crypto_sign/rainbow/rainbowV-classic/clean/blas_comm.h @@ -0,0 +1,90 @@ +#ifndef _BLAS_COMM_H_ +#define _BLAS_COMM_H_ +/// @file blas_comm.h +/// @brief Common functions for linear algebra. +/// + +#include "rainbow_config.h" +#include + +/// @brief set a vector to 0. +/// +/// @param[in,out] b - the vector b. +/// @param[in] _num_byte - number of bytes for the vector b. +/// +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_set_zero(uint8_t *b, unsigned int _num_byte); + +/// @brief get an element from GF(256) vector . +/// +/// @param[in] a - the input vector a. +/// @param[in] i - the index in the vector a. +/// @return the value of the element. +/// +uint8_t PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_get_ele(const uint8_t *a, unsigned int i); + +/// @brief check if a vector is 0. +/// +/// @param[in] a - the vector a. +/// @param[in] _num_byte - number of bytes for the vector a. +/// @return 1(true) if a is 0. 0(false) else. +/// +unsigned int PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_is_zero(const uint8_t *a, unsigned int _num_byte); + +/// @brief polynomial multiplication: c = a*b +/// +/// @param[out] c - the output polynomial c +/// @param[in] a - the vector a. +/// @param[in] b - the vector b. +/// @param[in] _num - number of elements for the polynomials a and b. +/// +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_polymul(uint8_t *c, const uint8_t *a, const uint8_t *b, unsigned int _num); + +/// @brief matrix-vector multiplication: c = matA * b , in GF(256) +/// +/// @param[out] c - the output vector c +/// @param[in] matA - a column-major matrix A. +/// @param[in] n_A_vec_byte - the size of column vectors in bytes. +/// @param[in] n_A_width - the width of matrix A. +/// @param[in] b - the vector b. +/// +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256mat_prod(uint8_t *c, const uint8_t *matA, unsigned int n_A_vec_byte, unsigned int n_A_width, const uint8_t *b); + +/// @brief matrix-matrix multiplication: c = a * b , in GF(256) +/// +/// @param[out] c - the output matrix c +/// @param[in] c - a matrix a. +/// @param[in] b - a matrix b. +/// @param[in] len_vec - the length of column vectors. +/// +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256mat_mul(uint8_t *c, const uint8_t *a, const uint8_t *b, unsigned int len_vec); + +/// @brief Gauss elimination for a matrix, in GF(256) +/// +/// @param[in,out] mat - the matrix. +/// @param[in] h - the height of the matrix. +/// @param[in] w - the width of the matrix. +/// @return 1(true) if success. 0(false) if the matrix is singular. +/// +unsigned int PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256mat_gauss_elim(uint8_t *mat, unsigned int h, unsigned int w); + +/// @brief Solving linear equations, in GF(256) +/// +/// @param[out] sol - the solutions. +/// @param[in] inp_mat - the matrix parts of input equations. +/// @param[in] c_terms - the constant terms of the input equations. +/// @param[in] n - the number of equations. +/// @return 1(true) if success. 0(false) if the matrix is singular. +/// +unsigned int PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256mat_solve_linear_eq(uint8_t *sol, const uint8_t *inp_mat, const uint8_t *c_terms, unsigned int n); + +/// @brief Computing the inverse matrix, in GF(256) +/// +/// @param[out] inv_a - the output of matrix a. +/// @param[in] a - a matrix a. +/// @param[in] H - height of matrix a, i.e., matrix a is an HxH matrix. +/// @param[in] buffer - The buffer for computations. it has to be as large as 2 input matrixes. +/// @return 1(true) if success. 0(false) if the matrix is singular. +/// +unsigned int PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256mat_inv(uint8_t *inv_a, const uint8_t *a, unsigned int H, uint8_t *buffer); + +#endif // _BLAS_COMM_H_ diff --git a/crypto_sign/rainbow/rainbowV-classic/clean/gf.c b/crypto_sign/rainbow/rainbowV-classic/clean/gf.c new file mode 100644 index 00000000..f1abbc4e --- /dev/null +++ b/crypto_sign/rainbow/rainbowV-classic/clean/gf.c @@ -0,0 +1,91 @@ +#include "gf.h" + +//// gf4 := gf2[x]/x^2+x+1 +static inline uint8_t gf4_mul_2(uint8_t a) { + uint8_t r = (uint8_t)(a << 1); + r ^= (uint8_t)((a >> 1) * 7); + return r; +} + +static inline uint8_t gf4_mul(uint8_t a, uint8_t b) { + uint8_t r = (uint8_t)(a * (b & 1)); + return r ^ (uint8_t)(gf4_mul_2(a) * (b >> 1)); +} + +static inline uint8_t gf4_squ(uint8_t a) { + return a ^ (a >> 1); +} + +//// gf16 := gf4[y]/y^2+y+x +uint8_t PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf16_mul(uint8_t a, uint8_t b) { + uint8_t a0 = a & 3; + uint8_t a1 = (a >> 2); + uint8_t b0 = b & 3; + uint8_t b1 = (b >> 2); + uint8_t a0b0 = gf4_mul(a0, b0); + uint8_t a1b1 = gf4_mul(a1, b1); + uint8_t a0b1_a1b0 = gf4_mul(a0 ^ a1, b0 ^ b1) ^ a0b0 ^ a1b1; + uint8_t a1b1_x2 = gf4_mul_2(a1b1); + return (uint8_t)((a0b1_a1b0 ^ a1b1) << 2 ^ a0b0 ^ a1b1_x2); +} + +static inline uint8_t gf16_squ(uint8_t a) { + uint8_t a0 = a & 3; + uint8_t a1 = (a >> 2); + a1 = gf4_squ(a1); + uint8_t a1squ_x2 = gf4_mul_2(a1); + return (uint8_t)((a1 << 2) ^ a1squ_x2 ^ gf4_squ(a0)); +} + +uint8_t PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256_is_nonzero(uint8_t a) { + unsigned int a8 = a; + unsigned int r = ((unsigned int)0) - a8; + r >>= 8; + return r & 1; +} + +static inline uint8_t gf4_mul_3(uint8_t a) { + uint8_t msk = (uint8_t)((a - 2) >> 1); + return (uint8_t)((msk & ((int)a * 3)) | ((~msk) & ((int)a - 1))); +} +static inline uint8_t gf16_mul_8(uint8_t a) { + uint8_t a0 = a & 3; + uint8_t a1 = a >> 2; + return (uint8_t)((gf4_mul_2(a0 ^ a1) << 2) | gf4_mul_3(a1)); +} + +// gf256 := gf16[X]/X^2+X+xy +uint8_t PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256_mul(uint8_t a, uint8_t b) { + uint8_t a0 = a & 15; + uint8_t a1 = (a >> 4); + uint8_t b0 = b & 15; + uint8_t b1 = (b >> 4); + uint8_t a0b0 = PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf16_mul(a0, b0); + uint8_t a1b1 = PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf16_mul(a1, b1); + uint8_t a0b1_a1b0 = PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf16_mul(a0 ^ a1, b0 ^ b1) ^ a0b0 ^ a1b1; + uint8_t a1b1_x8 = gf16_mul_8(a1b1); + return (uint8_t)((a0b1_a1b0 ^ a1b1) << 4 ^ a0b0 ^ a1b1_x8); +} + +static inline uint8_t gf256_squ(uint8_t a) { + uint8_t a0 = a & 15; + uint8_t a1 = (a >> 4); + a1 = gf16_squ(a1); + uint8_t a1squ_x8 = gf16_mul_8(a1); + return (uint8_t)((a1 << 4) ^ a1squ_x8 ^ gf16_squ(a0)); +} + +uint8_t PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256_inv(uint8_t a) { + // 128+64+32+16+8+4+2 = 254 + uint8_t a2 = gf256_squ(a); + uint8_t a4 = gf256_squ(a2); + uint8_t a8 = gf256_squ(a4); + uint8_t a4_2 = PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256_mul(a4, a2); + uint8_t a8_4_2 = PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256_mul(a4_2, a8); + uint8_t a64_ = gf256_squ(a8_4_2); + a64_ = gf256_squ(a64_); + a64_ = gf256_squ(a64_); + uint8_t a64_2 = PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256_mul(a64_, a8_4_2); + uint8_t a128_ = gf256_squ(a64_2); + return PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256_mul(a2, a128_); +} diff --git a/crypto_sign/rainbow/rainbowV-classic/clean/gf.h b/crypto_sign/rainbow/rainbowV-classic/clean/gf.h new file mode 100644 index 00000000..8fa48d2e --- /dev/null +++ b/crypto_sign/rainbow/rainbowV-classic/clean/gf.h @@ -0,0 +1,19 @@ +#ifndef _GF16_H_ +#define _GF16_H_ + +#include "rainbow_config.h" +#include + +/// @file gf16.h +/// @brief Library for arithmetics in GF(16) and GF(256) +/// + +uint8_t PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf16_mul(uint8_t a, uint8_t b); + + +uint8_t PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256_is_nonzero(uint8_t a); +uint8_t PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256_inv(uint8_t a); +uint8_t PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256_mul(uint8_t a, uint8_t b); + + +#endif // _GF16_H_ diff --git a/crypto_sign/rainbow/rainbowV-classic/clean/parallel_matrix_op.c b/crypto_sign/rainbow/rainbowV-classic/clean/parallel_matrix_op.c new file mode 100644 index 00000000..44713eaf --- /dev/null +++ b/crypto_sign/rainbow/rainbowV-classic/clean/parallel_matrix_op.c @@ -0,0 +1,183 @@ +/// @file parallel_matrix_op.c +/// @brief the standard implementations for functions in parallel_matrix_op.h +/// +/// the standard implementations for functions in parallel_matrix_op.h +/// + +#include "parallel_matrix_op.h" +#include "blas.h" +#include "blas_comm.h" + +/// +/// @brief Calculate the corresponding index in an array for an upper-triangle(UT) matrix. +/// +/// @param[in] i_row - the i-th row in an upper-triangle matrix. +/// @param[in] j_col - the j-th column in an upper-triangle matrix. +/// @param[in] dim - the dimension of the upper-triangle matrix, i.e., an dim x dim matrix. +/// @return the corresponding index in an array storage. +/// +unsigned int PQCLEAN_RAINBOWVCLASSIC_CLEAN_idx_of_trimat(unsigned int i_row, unsigned int j_col, unsigned int dim) { + return (dim + dim - i_row + 1) * i_row / 2 + j_col - i_row; +} + +/// +/// @brief Calculate the corresponding index in an array for an upper-triangle or lower-triangle matrix. +/// +/// @param[in] i_row - the i-th row in a triangle matrix. +/// @param[in] j_col - the j-th column in a triangle matrix. +/// @param[in] dim - the dimension of the triangle matrix, i.e., an dim x dim matrix. +/// @return the corresponding index in an array storage. +/// +static inline unsigned int idx_of_2trimat(unsigned int i_row, unsigned int j_col, unsigned int n_var) { + if (i_row > j_col) { + return PQCLEAN_RAINBOWVCLASSIC_CLEAN_idx_of_trimat(j_col, i_row, n_var); + } + return PQCLEAN_RAINBOWVCLASSIC_CLEAN_idx_of_trimat(i_row, j_col, n_var); +} + +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_UpperTrianglize(unsigned char *btriC, const unsigned char *bA, unsigned int Awidth, unsigned int size_batch) { + unsigned char *runningC = btriC; + unsigned int Aheight = Awidth; + for (unsigned int i = 0; i < Aheight; i++) { + for (unsigned int j = 0; j < i; j++) { + unsigned int idx = PQCLEAN_RAINBOWVCLASSIC_CLEAN_idx_of_trimat(j, i, Aheight); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_add(btriC + idx * size_batch, bA + size_batch * (i * Awidth + j), size_batch); + } + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_add(runningC, bA + size_batch * (i * Awidth + i), size_batch * (Aheight - i)); + runningC += size_batch * (Aheight - i); + } +} + +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_trimat_madd_gf256(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch) { + unsigned int Awidth = Bheight; + unsigned int Aheight = Awidth; + for (unsigned int i = 0; i < Aheight; i++) { + for (unsigned int j = 0; j < Bwidth; j++) { + for (unsigned int k = 0; k < Bheight; k++) { + if (k < i) { + continue; + } + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_madd(bC, &btriA[(k - i) * size_batch], PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_get_ele(&B[j * size_Bcolvec], k), size_batch); + } + bC += size_batch; + } + btriA += (Aheight - i) * size_batch; + } +} + +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_trimatTr_madd_gf256(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch) { + unsigned int Aheight = Bheight; + for (unsigned int i = 0; i < Aheight; i++) { + for (unsigned int j = 0; j < Bwidth; j++) { + for (unsigned int k = 0; k < Bheight; k++) { + if (i < k) { + continue; + } + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_madd(bC, &btriA[size_batch * (PQCLEAN_RAINBOWVCLASSIC_CLEAN_idx_of_trimat(k, i, Aheight))], PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_get_ele(&B[j * size_Bcolvec], k), size_batch); + } + bC += size_batch; + } + } +} + +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_2trimat_madd_gf256(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch) { + unsigned int Aheight = Bheight; + for (unsigned int i = 0; i < Aheight; i++) { + for (unsigned int j = 0; j < Bwidth; j++) { + for (unsigned int k = 0; k < Bheight; k++) { + if (i == k) { + continue; + } + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_madd(bC, &btriA[size_batch * (idx_of_2trimat(i, k, Aheight))], PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_get_ele(&B[j * size_Bcolvec], k), size_batch); + } + bC += size_batch; + } + } +} + +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_matTr_madd_gf256(unsigned char *bC, const unsigned char *A_to_tr, unsigned int Aheight, unsigned int size_Acolvec, unsigned int Awidth, + const unsigned char *bB, unsigned int Bwidth, unsigned int size_batch) { + unsigned int Atr_height = Awidth; + unsigned int Atr_width = Aheight; + for (unsigned int i = 0; i < Atr_height; i++) { + for (unsigned int j = 0; j < Atr_width; j++) { + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_madd(bC, &bB[j * Bwidth * size_batch], PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_get_ele(&A_to_tr[size_Acolvec * i], j), size_batch * Bwidth); + } + bC += size_batch * Bwidth; + } +} + +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_bmatTr_madd_gf256(unsigned char *bC, const unsigned char *bA_to_tr, unsigned int Awidth_before_tr, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch) { + const unsigned char *bA = bA_to_tr; + unsigned int Aheight = Awidth_before_tr; + for (unsigned int i = 0; i < Aheight; i++) { + for (unsigned int j = 0; j < Bwidth; j++) { + for (unsigned int k = 0; k < Bheight; k++) { + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_madd(bC, &bA[size_batch * (i + k * Aheight)], PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_get_ele(&B[j * size_Bcolvec], k), size_batch); + } + bC += size_batch; + } + } +} + +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_mat_madd_gf256(unsigned char *bC, const unsigned char *bA, unsigned int Aheight, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch) { + unsigned int Awidth = Bheight; + for (unsigned int i = 0; i < Aheight; i++) { + for (unsigned int j = 0; j < Bwidth; j++) { + for (unsigned int k = 0; k < Bheight; k++) { + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_madd(bC, &bA[k * size_batch], PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_get_ele(&B[j * size_Bcolvec], k), size_batch); + } + bC += size_batch; + } + bA += (Awidth) * size_batch; + } +} + +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_quad_trimat_eval_gf256(unsigned char *y, const unsigned char *trimat, const unsigned char *x, unsigned int dim, unsigned int size_batch) { + unsigned char tmp[256]; + + unsigned char _x[256]; + for (unsigned int i = 0; i < dim; i++) { + _x[i] = PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_get_ele(x, i); + } + + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_set_zero(y, size_batch); + for (unsigned int i = 0; i < dim; i++) { + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_set_zero(tmp, size_batch); + for (unsigned int j = i; j < dim; j++) { + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_madd(tmp, trimat, _x[j], size_batch); + trimat += size_batch; + } + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_madd(y, tmp, _x[i], size_batch); + } +} + +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_quad_recmat_eval_gf256(unsigned char *z, const unsigned char *y, unsigned int dim_y, const unsigned char *mat, + const unsigned char *x, unsigned dim_x, unsigned size_batch) { + unsigned char tmp[128]; + + unsigned char _x[128]; + for (unsigned int i = 0; i < dim_x; i++) { + _x[i] = PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_get_ele(x, i); + } + unsigned char _y[128]; + for (unsigned int i = 0; i < dim_y; i++) { + _y[i] = PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_get_ele(y, i); + } + + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_set_zero(z, size_batch); + for (unsigned int i = 0; i < dim_y; i++) { + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_set_zero(tmp, size_batch); + for (unsigned int j = 0; j < dim_x; j++) { + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_madd(tmp, mat, _x[j], size_batch); + mat += size_batch; + } + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_madd(z, tmp, _y[i], size_batch); + } +} + diff --git a/crypto_sign/rainbow/rainbowV-classic/clean/parallel_matrix_op.h b/crypto_sign/rainbow/rainbowV-classic/clean/parallel_matrix_op.h new file mode 100644 index 00000000..b85c603b --- /dev/null +++ b/crypto_sign/rainbow/rainbowV-classic/clean/parallel_matrix_op.h @@ -0,0 +1,260 @@ +#ifndef _P_MATRIX_OP_H_ +#define _P_MATRIX_OP_H_ +/// @file parallel_matrix_op.h +/// @brief Librarys for operations of batched matrixes. +/// +/// + +//////////////// Section: triangle matrix <-> rectangle matrix /////////////////////////////////// + +/// +/// @brief Calculate the corresponding index in an array for an upper-triangle(UT) matrix. +/// +/// @param[in] i_row - the i-th row in an upper-triangle matrix. +/// @param[in] j_col - the j-th column in an upper-triangle matrix. +/// @param[in] dim - the dimension of the upper-triangle matrix, i.e., an dim x dim matrix. +/// @return the corresponding index in an array storage. +/// +unsigned int PQCLEAN_RAINBOWVCLASSIC_CLEAN_idx_of_trimat(unsigned int i_row, unsigned int j_col, unsigned int dim); + +/// +/// @brief Upper trianglize a rectangle matrix to the corresponding upper-trangle matrix. +/// +/// @param[out] btriC - the batched upper-trianglized matrix C. +/// @param[in] bA - a batched retangle matrix A. +/// @param[in] bwidth - the width of the batched matrix A, i.e., A is a Awidth x Awidth matrix. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_UpperTrianglize(unsigned char *btriC, const unsigned char *bA, unsigned int Awidth, unsigned int size_batch); + +//////////////////// Section: matrix multiplications /////////////////////////////// + +/// +/// @brief bC += btriA * B , in GF(16) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] btriA - a batched UT matrix A. +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_trimat_madd_gf16(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += btriA * B , in GF(256) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] btriA - a batched UT matrix A. +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_trimat_madd_gf256(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += btriA^Tr * B , in GF(16) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] btriA - a batched UT matrix A. A will be transposed while multiplying. +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_trimatTr_madd_gf16(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += btriA^Tr * B , in GF(256) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] btriA - a batched UT matrix A, which will be transposed while multiplying. +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_trimatTr_madd_gf256(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += (btriA + btriA^Tr) *B , in GF(16) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] btriA - a batched UT matrix A. The operand for multiplication is (btriA + btriA^Tr). +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_2trimat_madd_gf16(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += (btriA + btriA^Tr) *B , in GF(256) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] btriA - a batched UT matrix A. The operand for multiplication is (btriA + btriA^Tr). +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_2trimat_madd_gf256(unsigned char *bC, const unsigned char *btriA, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += A^Tr * bB , in GF(16) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] A_to_tr - a column-major matrix A. The operand for multiplication is A^Tr. +/// @param[in] Aheight - the height of A. +/// @param[in] size_Acolvec - the size of a column vector in A. +/// @param[in] Awidth - the width of A. +/// @param[in] bB - a batched matrix B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_matTr_madd_gf16(unsigned char *bC, + const unsigned char *A_to_tr, unsigned int Aheight, unsigned int size_Acolvec, unsigned int Awidth, + const unsigned char *bB, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += A^Tr * bB , in GF(256) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] A_to_tr - a column-major matrix A. The operand for multiplication is A^Tr. +/// @param[in] Aheight - the height of A. +/// @param[in] size_Acolvec - the size of a column vector in A. +/// @param[in] Awidth - the width of A. +/// @param[in] bB - a batched matrix B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_matTr_madd_gf256(unsigned char *bC, + const unsigned char *A_to_tr, unsigned int Aheight, unsigned int size_Acolvec, unsigned int Awidth, + const unsigned char *bB, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += bA^Tr * B , in GF(16) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] bA_to_tr - a batched matrix A. The operand for multiplication is (bA^Tr). +/// @param[in] Awidth_befor_tr - the width of A. +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_bmatTr_madd_gf16(unsigned char *bC, const unsigned char *bA_to_tr, unsigned int Awidth_before_tr, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += bA^Tr * B , in GF(256) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] bA_to_tr - a batched matrix A. The operand for multiplication is (bA^Tr). +/// @param[in] Awidth_befor_tr - the width of A. +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_bmatTr_madd_gf256(unsigned char *bC, const unsigned char *bA_to_tr, unsigned int Awidth_before_tr, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += bA * B , in GF(16) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] bA - a batched matrix A. +/// @param[in] Aheigh - the height of A. +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_mat_madd_gf16(unsigned char *bC, const unsigned char *bA, unsigned int Aheight, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +/// +/// @brief bC += bA * B , in GF(256) +/// +/// @param[out] bC - the batched matrix C. +/// @param[in] bA - a batched matrix A. +/// @param[in] Aheigh - the height of A. +/// @param[in] B - a column-major matrix B. +/// @param[in] Bheight - the height of B. +/// @param[in] size_Bcolvec - the size of the column vector in B. +/// @param[in] Bwidth - the width of B. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_mat_madd_gf256(unsigned char *bC, const unsigned char *bA, unsigned int Aheight, + const unsigned char *B, unsigned int Bheight, unsigned int size_Bcolvec, unsigned int Bwidth, unsigned int size_batch); + +//////////////////// Section: "quadratric" matrix evaluation /////////////////////////////// + +/// +/// @brief y = x^Tr * trimat * x , in GF(16) +/// +/// @param[out] y - the returned batched element y. +/// @param[in] trimat - a batched matrix. +/// @param[in] x - an input vector x. +/// @param[in] dim - the dimension of matrix trimat (and x). +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_quad_trimat_eval_gf16(unsigned char *y, const unsigned char *trimat, const unsigned char *x, unsigned int dim, unsigned int size_batch); + +/// +/// @brief y = x^Tr * trimat * x , in GF(256) +/// +/// @param[out] y - the returned batched element y. +/// @param[in] trimat - a batched matrix. +/// @param[in] x - an input vector x. +/// @param[in] dim - the dimension of matrix trimat (and x). +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_quad_trimat_eval_gf256(unsigned char *y, const unsigned char *trimat, const unsigned char *x, unsigned int dim, unsigned int size_batch); + +/// +/// @brief z = y^Tr * mat * x , in GF(16) +/// +/// @param[out] z - the returned batched element z. +/// @param[in] y - an input vector y. +/// @param[in] dim_y - the length of y. +/// @param[in] mat - a batched matrix. +/// @param[in] x - an input vector x. +/// @param[in] dim_x - the length of x. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_quad_recmat_eval_gf16(unsigned char *z, const unsigned char *y, unsigned int dim_y, + const unsigned char *mat, const unsigned char *x, unsigned int dim_x, unsigned int size_batch); + +/// +/// @brief z = y^Tr * mat * x , in GF(256) +/// +/// @param[out] z - the returned batched element z. +/// @param[in] y - an input vector y. +/// @param[in] dim_y - the length of y. +/// @param[in] mat - a batched matrix. +/// @param[in] x - an input vector x. +/// @param[in] dim_x - the length of x. +/// @param[in] size_batch - number of the batched elements in the corresponding position of the matrix. +/// +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_quad_recmat_eval_gf256(unsigned char *z, const unsigned char *y, unsigned int dim_y, + const unsigned char *mat, const unsigned char *x, unsigned int dim_x, unsigned int size_batch); + +#endif // _P_MATRIX_OP_H_ diff --git a/crypto_sign/rainbow/rainbowV-classic/clean/rainbow.c b/crypto_sign/rainbow/rainbowV-classic/clean/rainbow.c new file mode 100644 index 00000000..85452232 --- /dev/null +++ b/crypto_sign/rainbow/rainbowV-classic/clean/rainbow.c @@ -0,0 +1,168 @@ +/// @file rainbow.c +/// @brief The standard implementations for functions in rainbow.h +/// + +#include "blas.h" +#include "parallel_matrix_op.h" +#include "rainbow.h" +#include "rainbow_blas.h" +#include "rainbow_config.h" +#include "rainbow_keypair.h" +#include "utils_hash.h" +#include "utils_prng.h" +#include +#include +#include + +#define MAX_ATTEMPT_FRMAT 128 + +int PQCLEAN_RAINBOWVCLASSIC_CLEAN_rainbow_sign(uint8_t *signature, const sk_t *sk, const uint8_t *_digest) { + uint8_t mat_l1[_O1 * _O1_BYTE]; + uint8_t mat_l2[_O2 * _O2_BYTE]; + uint8_t mat_buffer[2 * _MAX_O * _MAX_O_BYTE]; + + // setup PRNG + prng_t prng_sign; + uint8_t prng_preseed[LEN_SKSEED + _HASH_LEN]; + memcpy(prng_preseed, sk->sk_seed, LEN_SKSEED); + memcpy(prng_preseed + LEN_SKSEED, _digest, _HASH_LEN); // prng_preseed = sk_seed || digest + uint8_t prng_seed[_HASH_LEN]; + PQCLEAN_RAINBOWVCLASSIC_CLEAN_hash_msg(prng_seed, _HASH_LEN, prng_preseed, _HASH_LEN + LEN_SKSEED); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_prng_set(&prng_sign, prng_seed, _HASH_LEN); // seed = H( sk_seed || digest ) + for (unsigned int i = 0; i < LEN_SKSEED + _HASH_LEN; i++) { + prng_preseed[i] ^= prng_preseed[i]; // clean + } + for (unsigned int i = 0; i < _HASH_LEN; i++) { + prng_seed[i] ^= prng_seed[i]; // clean + } + + // roll vinegars. + uint8_t vinegar[_V1_BYTE]; + unsigned int n_attempt = 0; + unsigned int l1_succ = 0; + while (!l1_succ) { + if (MAX_ATTEMPT_FRMAT <= n_attempt) { + break; + } + PQCLEAN_RAINBOWVCLASSIC_CLEAN_prng_gen(&prng_sign, vinegar, _V1_BYTE); // generating vinegars + gfmat_prod(mat_l1, sk->l1_F2, _O1 * _O1_BYTE, _V1, vinegar); // generating the linear equations for layer 1 + l1_succ = gfmat_inv(mat_l1, mat_l1, _O1, mat_buffer); // check if the linear equation solvable + n_attempt++; + } + + // Given the vinegars, pre-compute variables needed for layer 2 + uint8_t r_l1_F1[_O1_BYTE] = {0}; + uint8_t r_l2_F1[_O2_BYTE] = {0}; + batch_quad_trimat_eval(r_l1_F1, sk->l1_F1, vinegar, _V1, _O1_BYTE); + batch_quad_trimat_eval(r_l2_F1, sk->l2_F1, vinegar, _V1, _O2_BYTE); + uint8_t mat_l2_F3[_O2 * _O2_BYTE]; + uint8_t mat_l2_F2[_O1 * _O2_BYTE]; + gfmat_prod(mat_l2_F3, sk->l2_F3, _O2 * _O2_BYTE, _V1, vinegar); + gfmat_prod(mat_l2_F2, sk->l2_F2, _O1 * _O2_BYTE, _V1, vinegar); + + // Some local variables. + uint8_t _z[_PUB_M_BYTE]; + uint8_t y[_PUB_M_BYTE]; + uint8_t *x_v1 = vinegar; + uint8_t x_o1[_O1_BYTE]; + uint8_t x_o2[_O2_BYTE]; + + uint8_t digest_salt[_HASH_LEN + _SALT_BYTE]; + memcpy(digest_salt, _digest, _HASH_LEN); + uint8_t *salt = digest_salt + _HASH_LEN; + + uint8_t temp_o[_MAX_O_BYTE + 32] = {0}; + unsigned int succ = 0; + while (!succ) { + if (MAX_ATTEMPT_FRMAT <= n_attempt) { + break; + } + // The computation: H(digest||salt) --> z --S--> y --C-map--> x --T--> w + + PQCLEAN_RAINBOWVCLASSIC_CLEAN_prng_gen(&prng_sign, salt, _SALT_BYTE); // roll the salt + PQCLEAN_RAINBOWVCLASSIC_CLEAN_hash_msg(_z, _PUB_M_BYTE, digest_salt, _HASH_LEN + _SALT_BYTE); // H(digest||salt) + + // y = S^-1 * z + memcpy(y, _z, _PUB_M_BYTE); // identity part of S + gfmat_prod(temp_o, sk->s1, _O1_BYTE, _O2, _z + _O1_BYTE); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_add(y, temp_o, _O1_BYTE); + + // Central Map: + // layer 1: calculate x_o1 + memcpy(temp_o, r_l1_F1, _O1_BYTE); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_add(temp_o, y, _O1_BYTE); + gfmat_prod(x_o1, mat_l1, _O1_BYTE, _O1, temp_o); + + // layer 2: calculate x_o2 + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_set_zero(temp_o, _O2_BYTE); + gfmat_prod(temp_o, mat_l2_F2, _O2_BYTE, _O1, x_o1); // F2 + batch_quad_trimat_eval(mat_l2, sk->l2_F5, x_o1, _O1, _O2_BYTE); // F5 + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_add(temp_o, mat_l2, _O2_BYTE); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_add(temp_o, r_l2_F1, _O2_BYTE); // F1 + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_add(temp_o, y + _O1_BYTE, _O2_BYTE); + + // generate the linear equations of the 2nd layer + gfmat_prod(mat_l2, sk->l2_F6, _O2 * _O2_BYTE, _O1, x_o1); // F6 + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_add(mat_l2, mat_l2_F3, _O2 * _O2_BYTE); // F3 + succ = gfmat_inv(mat_l2, mat_l2, _O2, mat_buffer); + gfmat_prod(x_o2, mat_l2, _O2_BYTE, _O2, temp_o); // solve l2 eqs + + n_attempt++; + }; + // w = T^-1 * y + uint8_t w[_PUB_N_BYTE]; + // identity part of T. + memcpy(w, x_v1, _V1_BYTE); + memcpy(w + _V1_BYTE, x_o1, _O1_BYTE); + memcpy(w + _V2_BYTE, x_o2, _O2_BYTE); + // Computing the t1 part. + gfmat_prod(y, sk->t1, _V1_BYTE, _O1, x_o1); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_add(w, y, _V1_BYTE); + // Computing the t4 part. + gfmat_prod(y, sk->t4, _V1_BYTE, _O2, x_o2); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_add(w, y, _V1_BYTE); + // Computing the t3 part. + gfmat_prod(y, sk->t3, _O1_BYTE, _O2, x_o2); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_add(w + _V1_BYTE, y, _O1_BYTE); + + memset(signature, 0, _SIGNATURE_BYTE); // set the output 0 + // clean + memset(&prng_sign, 0, sizeof(prng_t)); + memset(vinegar, 0, _V1_BYTE); + memset(r_l1_F1, 0, _O1_BYTE); + memset(r_l2_F1, 0, _O2_BYTE); + memset(_z, 0, _PUB_M_BYTE); + memset(y, 0, _PUB_M_BYTE); + memset(x_o1, 0, _O1_BYTE); + memset(x_o2, 0, _O2_BYTE); + memset(temp_o, 0, sizeof(temp_o)); + + // return: copy w and salt to the signature. + if (MAX_ATTEMPT_FRMAT <= n_attempt) { + return -1; + } + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_add(signature, w, _PUB_N_BYTE); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_add(signature + _PUB_N_BYTE, salt, _SALT_BYTE); + return 0; +} + +int PQCLEAN_RAINBOWVCLASSIC_CLEAN_rainbow_verify(const uint8_t *digest, const uint8_t *signature, const pk_t *pk) { + unsigned char digest_ck[_PUB_M_BYTE]; + // public_map( digest_ck , pk , signature ); Evaluating the quadratic public polynomials. + batch_quad_trimat_eval(digest_ck, pk->pk, signature, _PUB_N, _PUB_M_BYTE); + + unsigned char correct[_PUB_M_BYTE]; + unsigned char digest_salt[_HASH_LEN + _SALT_BYTE]; + memcpy(digest_salt, digest, _HASH_LEN); + memcpy(digest_salt + _HASH_LEN, signature + _PUB_N_BYTE, _SALT_BYTE); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_hash_msg(correct, _PUB_M_BYTE, digest_salt, _HASH_LEN + _SALT_BYTE); // H( digest || salt ) + + // check consistancy. + unsigned char cc = 0; + for (unsigned int i = 0; i < _PUB_M_BYTE; i++) { + cc |= (digest_ck[i] ^ correct[i]); + } + return (0 == cc) ? 0 : -1; +} + + diff --git a/crypto_sign/rainbow/rainbowV-classic/clean/rainbow.h b/crypto_sign/rainbow/rainbowV-classic/clean/rainbow.h new file mode 100644 index 00000000..1b8e379f --- /dev/null +++ b/crypto_sign/rainbow/rainbowV-classic/clean/rainbow.h @@ -0,0 +1,33 @@ +#ifndef _RAINBOW_H_ +#define _RAINBOW_H_ +/// @file rainbow.h +/// @brief APIs for rainbow. +/// + +#include "rainbow_config.h" +#include "rainbow_keypair.h" + +#include + +/// +/// @brief Signing function for classical secret key. +/// +/// @param[out] signature - the signature. +/// @param[in] sk - the secret key. +/// @param[in] digest - the digest. +/// +int PQCLEAN_RAINBOWVCLASSIC_CLEAN_rainbow_sign(uint8_t *signature, const sk_t *sk, const uint8_t *digest); + +/// +/// @brief Verifying function. +/// +/// @param[in] digest - the digest. +/// @param[in] signature - the signature. +/// @param[in] pk - the public key. +/// @return 0 for successful verified. -1 for failed verification. +/// +int PQCLEAN_RAINBOWVCLASSIC_CLEAN_rainbow_verify(const uint8_t *digest, const uint8_t *signature, const pk_t *pk); + + + +#endif // _RAINBOW_H_ diff --git a/crypto_sign/rainbow/rainbowV-classic/clean/rainbow_blas.h b/crypto_sign/rainbow/rainbowV-classic/clean/rainbow_blas.h new file mode 100644 index 00000000..e0114ab7 --- /dev/null +++ b/crypto_sign/rainbow/rainbowV-classic/clean/rainbow_blas.h @@ -0,0 +1,32 @@ +#ifndef _RAINBOW_BLAS_H_ +#define _RAINBOW_BLAS_H_ +/// @file rainbow_blas.h +/// @brief Defining the functions used in rainbow.c acconding to the definitions in rainbow_config.h +/// +/// Defining the functions used in rainbow.c acconding to the definitions in rainbow_config.h + +#include "blas.h" +#include "blas_comm.h" +#include "parallel_matrix_op.h" +#include "rainbow_config.h" + + +#define gfv_get_ele PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_get_ele +#define gfv_mul_scalar PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_mul_scalar +#define gfv_madd PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_madd + +#define gfmat_prod PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256mat_prod +#define gfmat_inv PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256mat_inv + +#define batch_trimat_madd PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_trimat_madd_gf256 +#define batch_trimatTr_madd PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_trimatTr_madd_gf256 +#define batch_2trimat_madd PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_2trimat_madd_gf256 +#define batch_matTr_madd PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_matTr_madd_gf256 +#define batch_bmatTr_madd PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_bmatTr_madd_gf256 +#define batch_mat_madd PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_mat_madd_gf256 + +#define batch_quad_trimat_eval PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_quad_trimat_eval_gf256 +#define batch_quad_recmat_eval PQCLEAN_RAINBOWVCLASSIC_CLEAN_batch_quad_recmat_eval_gf256 + + +#endif // _RAINBOW_BLAS_H_ diff --git a/crypto_sign/rainbow/rainbowV-classic/clean/rainbow_config.h b/crypto_sign/rainbow/rainbowV-classic/clean/rainbow_config.h new file mode 100644 index 00000000..e668ff3c --- /dev/null +++ b/crypto_sign/rainbow/rainbowV-classic/clean/rainbow_config.h @@ -0,0 +1,48 @@ +#ifndef _H_RAINBOW_CONFIG_H_ +#define _H_RAINBOW_CONFIG_H_ + +/// @file rainbow_config.h +/// @brief Defining the parameters of the Rainbow and the corresponding constants. +/// + +#define _GFSIZE 256 +#define _V1 96 +#define _O1 36 +#define _O2 64 +#define _MAX_O 64 +#define _HASH_LEN 64 + + +#define _V2 ((_V1) + (_O1)) + +/// size of N, in # of gf elements. +#define _PUB_N (_V1 + _O1 + _O2) + +/// size of M, in # gf elements. +#define _PUB_M (_O1 + _O2) + +/// size of variables, in # bytes. + +// GF256 +#define _V1_BYTE (_V1) +#define _V2_BYTE (_V2) +#define _O1_BYTE (_O1) +#define _O2_BYTE (_O2) +#define _MAX_O_BYTE (_MAX_O) +#define _PUB_N_BYTE (_PUB_N) +#define _PUB_M_BYTE (_PUB_M) + + +/// length of seed for public key, in # bytes +#define LEN_PKSEED 32 + +/// length of seed for secret key, in # bytes +#define LEN_SKSEED 32 + +/// length of salt for a signature, in # bytes +#define _SALT_BYTE 16 + +/// length of a signature +#define _SIGNATURE_BYTE (_PUB_N_BYTE + _SALT_BYTE) + +#endif // _H_RAINBOW_CONFIG_H_ diff --git a/crypto_sign/rainbow/rainbowV-classic/clean/rainbow_keypair.c b/crypto_sign/rainbow/rainbowV-classic/clean/rainbow_keypair.c new file mode 100644 index 00000000..7dff94f1 --- /dev/null +++ b/crypto_sign/rainbow/rainbowV-classic/clean/rainbow_keypair.c @@ -0,0 +1,155 @@ +/// @file rainbow_keypair.c +/// @brief implementations of functions in rainbow_keypair.h +/// + +#include "rainbow_keypair.h" +#include "blas.h" +#include "blas_comm.h" +#include "rainbow_blas.h" +#include "rainbow_keypair_computation.h" +#include "utils_prng.h" +#include +#include +#include + +static +void generate_S_T( unsigned char *s_and_t, prng_t *prng0 ) { + sk_t *_sk; + unsigned size; + + size = sizeof(_sk->s1); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_prng_gen( prng0, s_and_t, size ); + s_and_t += size; + + size = sizeof(_sk->t1); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_prng_gen( prng0, s_and_t, size ); + s_and_t += size; + + size = sizeof(_sk->t4); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_prng_gen( prng0, s_and_t, size ); + s_and_t += size; + + size = sizeof(_sk->t3); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_prng_gen( prng0, s_and_t, size ); +} + + +static +unsigned generate_l1_F12( unsigned char *sk, prng_t *prng0 ) { + unsigned n_byte_generated = 0; + sk_t *_sk; + unsigned size; + + size = sizeof(_sk->l1_F1); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_prng_gen( prng0, sk, size ); + sk += size; + n_byte_generated += size; + + size = sizeof(_sk->l1_F2); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_prng_gen( prng0, sk, size ); + n_byte_generated += size; + + return n_byte_generated; +} + + +static +unsigned generate_l2_F12356( unsigned char *sk, prng_t *prng0 ) { + unsigned n_byte_generated = 0; + sk_t *_sk; + unsigned size; + + size = sizeof(_sk->l2_F1); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_prng_gen( prng0, sk, size ); + sk += size; + n_byte_generated += size; + + size = sizeof(_sk->l2_F2); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_prng_gen( prng0, sk, size ); + sk += size; + n_byte_generated += size; + + size = sizeof(_sk->l2_F3); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_prng_gen( prng0, sk, size ); + sk += size; + n_byte_generated += size; + + size = sizeof(_sk->l2_F5); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_prng_gen( prng0, sk, size ); + sk += size; + n_byte_generated += size; + + size = sizeof(_sk->l2_F6); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_prng_gen( prng0, sk, size ); + n_byte_generated += size; + + return n_byte_generated; +} + + +static void generate_B1_B2(unsigned char *sk, prng_t *prng0) { + sk += generate_l1_F12(sk, prng0); + generate_l2_F12356(sk, prng0); +} + +static void calculate_t4(unsigned char *t2_to_t4, const unsigned char *t1, const unsigned char *t3) { + // t4 = T_sk.t1 * T_sk.t3 - T_sk.t2 + unsigned char temp[_V1_BYTE + 32]; + unsigned char *t4 = t2_to_t4; + for (unsigned int i = 0; i < _O2; i++) { /// t3 width + gfmat_prod(temp, t1, _V1_BYTE, _O1, t3); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_add(t4, temp, _V1_BYTE); + t4 += _V1_BYTE; + t3 += _O1_BYTE; + } +} + +static void obsfucate_l1_polys(unsigned char *l1_polys, const unsigned char *l2_polys, unsigned int n_terms, const unsigned char *s1) { + unsigned char temp[_O1_BYTE + 32]; + while (n_terms--) { + gfmat_prod(temp, s1, _O1_BYTE, _O2, l2_polys); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_gf256v_add(l1_polys, temp, _O1_BYTE); + l1_polys += _O1_BYTE; + l2_polys += _O2_BYTE; + } +} + +/////////////////// Classic ////////////////////////////////// + +static void generate_secretkey(sk_t *sk, const unsigned char *sk_seed) { + memcpy(sk->sk_seed, sk_seed, LEN_SKSEED); + + // set up prng + prng_t prng0; + PQCLEAN_RAINBOWVCLASSIC_CLEAN_prng_set(&prng0, sk_seed, LEN_SKSEED); + + // generating secret key with prng. + generate_S_T(sk->s1, &prng0); + generate_B1_B2(sk->l1_F1, &prng0); + + // clean prng + memset(&prng0, 0, sizeof(prng_t)); +} + +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_generate_keypair(pk_t *rpk, sk_t *sk, const unsigned char *sk_seed) { + generate_secretkey(sk, sk_seed); + + // set up a temporary structure ext_cpk_t for calculating public key. + ext_cpk_t pk; + + PQCLEAN_RAINBOWVCLASSIC_CLEAN_calculate_Q_from_F(&pk, sk, sk); // compute the public key in ext_cpk_t format. + calculate_t4(sk->t4, sk->t1, sk->t3); + + obsfucate_l1_polys(pk.l1_Q1, pk.l2_Q1, N_TRIANGLE_TERMS(_V1), sk->s1); + obsfucate_l1_polys(pk.l1_Q2, pk.l2_Q2, _V1 * _O1, sk->s1); + obsfucate_l1_polys(pk.l1_Q3, pk.l2_Q3, _V1 * _O2, sk->s1); + obsfucate_l1_polys(pk.l1_Q5, pk.l2_Q5, N_TRIANGLE_TERMS(_O1), sk->s1); + obsfucate_l1_polys(pk.l1_Q6, pk.l2_Q6, _O1 * _O2, sk->s1); + obsfucate_l1_polys(pk.l1_Q9, pk.l2_Q9, N_TRIANGLE_TERMS(_O2), sk->s1); + // so far, the pk contains the full pk but in ext_cpk_t format. + + PQCLEAN_RAINBOWVCLASSIC_CLEAN_extcpk_to_pk(rpk, &pk); // convert the public key from ext_cpk_t to pk_t. +} + + + diff --git a/crypto_sign/rainbow/rainbowV-classic/clean/rainbow_keypair.h b/crypto_sign/rainbow/rainbowV-classic/clean/rainbow_keypair.h new file mode 100644 index 00000000..d7994033 --- /dev/null +++ b/crypto_sign/rainbow/rainbowV-classic/clean/rainbow_keypair.h @@ -0,0 +1,61 @@ +#ifndef _RAINBOW_KEYPAIR_H_ +#define _RAINBOW_KEYPAIR_H_ +/// @file rainbow_keypair.h +/// @brief Formats of key pairs and functions for generating key pairs. +/// Formats of key pairs and functions for generating key pairs. +/// + +#include "rainbow_config.h" + +#define N_TRIANGLE_TERMS(n_var) ((n_var) * ((n_var) + 1) / 2) + +/// @brief public key for classic rainbow +/// +/// public key for classic rainbow +/// +typedef struct rainbow_publickey { + unsigned char pk[(_PUB_M_BYTE)*N_TRIANGLE_TERMS(_PUB_N)]; +} pk_t; + +/// @brief secret key for classic rainbow +/// +/// secret key for classic rainbow +/// +typedef struct rainbow_secretkey { + /// + /// seed for generating secret key. + /// Generating S, T, and F for classic rainbow. + /// Generating S and T only for cyclic rainbow. + unsigned char sk_seed[LEN_SKSEED]; + + unsigned char s1[_O1_BYTE * _O2]; ///< part of S map + unsigned char t1[_V1_BYTE * _O1]; ///< part of T map + unsigned char t4[_V1_BYTE * _O2]; ///< part of T map + unsigned char t3[_O1_BYTE * _O2]; ///< part of T map + + unsigned char l1_F1[_O1_BYTE * N_TRIANGLE_TERMS(_V1)]; ///< part of C-map, F1, Layer1 + unsigned char l1_F2[_O1_BYTE * _V1 * _O1]; ///< part of C-map, F2, Layer1 + + unsigned char l2_F1[_O2_BYTE * N_TRIANGLE_TERMS(_V1)]; ///< part of C-map, F1, Layer2 + unsigned char l2_F2[_O2_BYTE * _V1 * _O1]; ///< part of C-map, F2, Layer2 + + unsigned char l2_F3[_O2_BYTE * _V1 * _O2]; ///< part of C-map, F3, Layer2 + unsigned char l2_F5[_O2_BYTE * N_TRIANGLE_TERMS(_O1)]; ///< part of C-map, F5, Layer2 + unsigned char l2_F6[_O2_BYTE * _O1 * _O2]; ///< part of C-map, F6, Layer2 +} sk_t; + + +/// +/// @brief Generate key pairs for classic rainbow. +/// +/// @param[out] pk - the public key. +/// @param[out] sk - the secret key. +/// @param[in] sk_seed - seed for generating the secret key. +/// +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_generate_keypair(pk_t *pk, sk_t *sk, const unsigned char *sk_seed); + + + + + +#endif // _RAINBOW_KEYPAIR_H_ diff --git a/crypto_sign/rainbow/rainbowV-classic/clean/rainbow_keypair_computation.c b/crypto_sign/rainbow/rainbowV-classic/clean/rainbow_keypair_computation.c new file mode 100644 index 00000000..a2848619 --- /dev/null +++ b/crypto_sign/rainbow/rainbowV-classic/clean/rainbow_keypair_computation.c @@ -0,0 +1,189 @@ +/// @file rainbow_keypair_computation.c +/// @brief Implementations for functions in rainbow_keypair_computation.h +/// + +#include "rainbow_keypair_computation.h" +#include "blas.h" +#include "blas_comm.h" +#include "rainbow_blas.h" +#include "rainbow_keypair.h" +#include +#include +#include + +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_extcpk_to_pk(pk_t *pk, const ext_cpk_t *cpk) { + const unsigned char *idx_l1 = cpk->l1_Q1; + const unsigned char *idx_l2 = cpk->l2_Q1; + for (unsigned int i = 0; i < _V1; i++) { + for (unsigned int j = i; j < _V1; j++) { + unsigned int pub_idx = PQCLEAN_RAINBOWVCLASSIC_CLEAN_idx_of_trimat(i, j, _PUB_N); + memcpy(&pk->pk[_PUB_M_BYTE * pub_idx], idx_l1, _O1_BYTE); + memcpy((&pk->pk[_PUB_M_BYTE * pub_idx]) + _O1_BYTE, idx_l2, _O2_BYTE); + idx_l1 += _O1_BYTE; + idx_l2 += _O2_BYTE; + } + } + idx_l1 = cpk->l1_Q2; + idx_l2 = cpk->l2_Q2; + for (unsigned int i = 0; i < _V1; i++) { + for (unsigned int j = _V1; j < _V1 + _O1; j++) { + unsigned int pub_idx = PQCLEAN_RAINBOWVCLASSIC_CLEAN_idx_of_trimat(i, j, _PUB_N); + memcpy(&pk->pk[_PUB_M_BYTE * pub_idx], idx_l1, _O1_BYTE); + memcpy((&pk->pk[_PUB_M_BYTE * pub_idx]) + _O1_BYTE, idx_l2, _O2_BYTE); + idx_l1 += _O1_BYTE; + idx_l2 += _O2_BYTE; + } + } + idx_l1 = cpk->l1_Q3; + idx_l2 = cpk->l2_Q3; + for (unsigned int i = 0; i < _V1; i++) { + for (unsigned int j = _V1 + _O1; j < _PUB_N; j++) { + unsigned int pub_idx = PQCLEAN_RAINBOWVCLASSIC_CLEAN_idx_of_trimat(i, j, _PUB_N); + memcpy(&pk->pk[_PUB_M_BYTE * pub_idx], idx_l1, _O1_BYTE); + memcpy((&pk->pk[_PUB_M_BYTE * pub_idx]) + _O1_BYTE, idx_l2, _O2_BYTE); + idx_l1 += _O1_BYTE; + idx_l2 += _O2_BYTE; + } + } + idx_l1 = cpk->l1_Q5; + idx_l2 = cpk->l2_Q5; + for (unsigned int i = _V1; i < _V1 + _O1; i++) { + for (unsigned int j = i; j < _V1 + _O1; j++) { + unsigned int pub_idx = PQCLEAN_RAINBOWVCLASSIC_CLEAN_idx_of_trimat(i, j, _PUB_N); + memcpy(&pk->pk[_PUB_M_BYTE * pub_idx], idx_l1, _O1_BYTE); + memcpy((&pk->pk[_PUB_M_BYTE * pub_idx]) + _O1_BYTE, idx_l2, _O2_BYTE); + idx_l1 += _O1_BYTE; + idx_l2 += _O2_BYTE; + } + } + idx_l1 = cpk->l1_Q6; + idx_l2 = cpk->l2_Q6; + for (unsigned int i = _V1; i < _V1 + _O1; i++) { + for (unsigned int j = _V1 + _O1; j < _PUB_N; j++) { + unsigned int pub_idx = PQCLEAN_RAINBOWVCLASSIC_CLEAN_idx_of_trimat(i, j, _PUB_N); + memcpy(&pk->pk[_PUB_M_BYTE * pub_idx], idx_l1, _O1_BYTE); + memcpy((&pk->pk[_PUB_M_BYTE * pub_idx]) + _O1_BYTE, idx_l2, _O2_BYTE); + idx_l1 += _O1_BYTE; + idx_l2 += _O2_BYTE; + } + } + idx_l1 = cpk->l1_Q9; + idx_l2 = cpk->l2_Q9; + for (unsigned int i = _V1 + _O1; i < _PUB_N; i++) { + for (unsigned int j = i; j < _PUB_N; j++) { + unsigned int pub_idx = PQCLEAN_RAINBOWVCLASSIC_CLEAN_idx_of_trimat(i, j, _PUB_N); + memcpy(&pk->pk[_PUB_M_BYTE * pub_idx], idx_l1, _O1_BYTE); + memcpy((&pk->pk[_PUB_M_BYTE * pub_idx]) + _O1_BYTE, idx_l2, _O2_BYTE); + idx_l1 += _O1_BYTE; + idx_l2 += _O2_BYTE; + } + } +} + +static void calculate_Q_from_F_ref(ext_cpk_t *Qs, const sk_t *Fs, const sk_t *Ts) { + /* + Layer 1 + Computing : + Q_pk.l1_F1s[i] = F_sk.l1_F1s[i] + + Q_pk.l1_F2s[i] = (F1* T1 + F2) + F1tr * t1 + Q_pk.l1_F5s[i] = UT( T1tr* (F1 * T1 + F2) ) + */ + const unsigned char *t2 = Ts->t4; + + memcpy(Qs->l1_Q1, Fs->l1_F1, _O1_BYTE * N_TRIANGLE_TERMS(_V1)); + + memcpy(Qs->l1_Q2, Fs->l1_F2, _O1_BYTE * _V1 * _O1); + batch_trimat_madd(Qs->l1_Q2, Fs->l1_F1, Ts->t1, _V1, _V1_BYTE, _O1, _O1_BYTE); // F1*T1 + F2 + + memset(Qs->l1_Q3, 0, _O1_BYTE * _V1 * _O2); + memset(Qs->l1_Q5, 0, _O1_BYTE * N_TRIANGLE_TERMS(_O1)); + memset(Qs->l1_Q6, 0, _O1_BYTE * _O1 * _O2); + memset(Qs->l1_Q9, 0, _O1_BYTE * N_TRIANGLE_TERMS(_O2)); + + // l1_Q5 : _O1_BYTE * _O1 * _O1 + // l1_Q9 : _O1_BYTE * _O2 * _O2 + // l2_Q5 : _O2_BYTE * _V1 * _O1 + // l2_Q9 : _O2_BYTE * _V1 * _O2 + + unsigned char tempQ[_MAX_O_BYTE * _MAX_O * _MAX_O + 32]; + + memset(tempQ, 0, _O1_BYTE * _O1 * _O1); // l1_Q5 + batch_matTr_madd(tempQ, Ts->t1, _V1, _V1_BYTE, _O1, Qs->l1_Q2, _O1, _O1_BYTE); // t1_tr*(F1*T1 + F2) + PQCLEAN_RAINBOWVCLASSIC_CLEAN_UpperTrianglize(Qs->l1_Q5, tempQ, _O1, _O1_BYTE); // UT( ... ) // Q5 + + batch_trimatTr_madd(Qs->l1_Q2, Fs->l1_F1, Ts->t1, _V1, _V1_BYTE, _O1, _O1_BYTE); // Q2 + /* + Computing: + F1_T2 = F1 * t2 + F2_T3 = F2 * t3 + F1_F1T_T2 + F2_T3 = F1_T2 + F2_T3 + F1tr * t2 + Q_pk.l1_F3s[i] = F1_F1T_T2 + F2_T3 + Q_pk.l1_F6s[i] = T1tr* ( F1_F1T_T2 + F2_T3 ) + F2tr * t2 + Q_pk.l1_F9s[i] = UT( T2tr* ( F1_T2 + F2_T3 ) ) + */ + batch_trimat_madd(Qs->l1_Q3, Fs->l1_F1, t2, _V1, _V1_BYTE, _O2, _O1_BYTE); // F1*T2 + batch_mat_madd(Qs->l1_Q3, Fs->l1_F2, _V1, Ts->t3, _O1, _O1_BYTE, _O2, _O1_BYTE); // F1_T2 + F2_T3 + + memset(tempQ, 0, _O1_BYTE * _O2 * _O2); // l1_Q9 + batch_matTr_madd(tempQ, t2, _V1, _V1_BYTE, _O2, Qs->l1_Q3, _O2, _O1_BYTE); // T2tr * ( F1_T2 + F2_T3 ) + PQCLEAN_RAINBOWVCLASSIC_CLEAN_UpperTrianglize(Qs->l1_Q9, tempQ, _O2, _O1_BYTE); // Q9 + + batch_trimatTr_madd(Qs->l1_Q3, Fs->l1_F1, t2, _V1, _V1_BYTE, _O2, _O1_BYTE); // F1_F1T_T2 + F2_T3 // Q3 + + batch_bmatTr_madd(Qs->l1_Q6, Fs->l1_F2, _O1, t2, _V1, _V1_BYTE, _O2, _O1_BYTE); // F2tr*T2 + batch_matTr_madd(Qs->l1_Q6, Ts->t1, _V1, _V1_BYTE, _O1, Qs->l1_Q3, _O2, _O1_BYTE); // Q6 + + /* + layer 2 + Computing: + Q1 = F1 + Q2 = F1_F1T*T1 + F2 + Q5 = UT( T1tr( F1*T1 + F2 ) + F5 ) + */ + memcpy(Qs->l2_Q1, Fs->l2_F1, _O2_BYTE * N_TRIANGLE_TERMS(_V1)); + + memcpy(Qs->l2_Q2, Fs->l2_F2, _O2_BYTE * _V1 * _O1); + batch_trimat_madd(Qs->l2_Q2, Fs->l2_F1, Ts->t1, _V1, _V1_BYTE, _O1, _O2_BYTE); // F1*T1 + F2 + + memcpy(Qs->l2_Q5, Fs->l2_F5, _O2_BYTE * N_TRIANGLE_TERMS(_O1)); + memset(tempQ, 0, _O2_BYTE * _O1 * _O1); // l2_Q5 + batch_matTr_madd(tempQ, Ts->t1, _V1, _V1_BYTE, _O1, Qs->l2_Q2, _O1, _O2_BYTE); // t1_tr*(F1*T1 + F2) + PQCLEAN_RAINBOWVCLASSIC_CLEAN_UpperTrianglize(Qs->l2_Q5, tempQ, _O1, _O2_BYTE); // UT( ... ) // Q5 + + batch_trimatTr_madd(Qs->l2_Q2, Fs->l2_F1, Ts->t1, _V1, _V1_BYTE, _O1, _O2_BYTE); // Q2 + + /* + Computing: + F1_T2 = F1 * t2 + F2_T3 = F2 * t3 + F1_F1T_T2 + F2_T3 = F1_T2 + F2_T3 + F1tr * t2 + + Q3 = F1_F1T*T2 + F2*T3 + F3 + Q9 = UT( T2tr*( F1*T2 + F2*T3 + F3 ) + T3tr*( F5*T3 + F6 ) ) + Q6 = T1tr*( F1_F1T*T2 + F2*T3 + F3 ) + F2Tr*T2 + F5_F5T*T3 + F6 + */ + memcpy(Qs->l2_Q3, Fs->l2_F3, _O2_BYTE * _V1 * _O2); + batch_trimat_madd(Qs->l2_Q3, Fs->l2_F1, t2, _V1, _V1_BYTE, _O2, _O2_BYTE); // F1*T2 + F3 + batch_mat_madd(Qs->l2_Q3, Fs->l2_F2, _V1, Ts->t3, _O1, _O1_BYTE, _O2, _O2_BYTE); // F1_T2 + F2_T3 + F3 + + memset(tempQ, 0, _O2_BYTE * _O2 * _O2); // l2_Q9 + batch_matTr_madd(tempQ, t2, _V1, _V1_BYTE, _O2, Qs->l2_Q3, _O2, _O2_BYTE); // T2tr * ( ..... ) + + memcpy(Qs->l2_Q6, Fs->l2_F6, _O2_BYTE * _O1 * _O2); + + batch_trimat_madd(Qs->l2_Q6, Fs->l2_F5, Ts->t3, _O1, _O1_BYTE, _O2, _O2_BYTE); // F5*T3 + F6 + batch_matTr_madd(tempQ, Ts->t3, _O1, _O1_BYTE, _O2, Qs->l2_Q6, _O2, _O2_BYTE); // T2tr*( ..... ) + T3tr*( ..... ) + memset(Qs->l2_Q9, 0, _O2_BYTE * N_TRIANGLE_TERMS(_O2)); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_UpperTrianglize(Qs->l2_Q9, tempQ, _O2, _O2_BYTE); // Q9 + + batch_trimatTr_madd(Qs->l2_Q3, Fs->l2_F1, t2, _V1, _V1_BYTE, _O2, _O2_BYTE); // F1_F1T_T2 + F2_T3 + F3 // Q3 + + batch_bmatTr_madd(Qs->l2_Q6, Fs->l2_F2, _O1, t2, _V1, _V1_BYTE, _O2, _O2_BYTE); // F5*T3 + F6 + F2tr*T2 + batch_trimatTr_madd(Qs->l2_Q6, Fs->l2_F5, Ts->t3, _O1, _O1_BYTE, _O2, _O2_BYTE); // F2tr*T2 + F5_F5T*T3 + F6 + batch_matTr_madd(Qs->l2_Q6, Ts->t1, _V1, _V1_BYTE, _O1, Qs->l2_Q3, _O2, _O2_BYTE); // Q6 +} +#define calculate_Q_from_F_impl calculate_Q_from_F_ref +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_calculate_Q_from_F(ext_cpk_t *Qs, const sk_t *Fs, const sk_t *Ts) { + calculate_Q_from_F_impl(Qs, Fs, Ts); +} diff --git a/crypto_sign/rainbow/rainbowV-classic/clean/rainbow_keypair_computation.h b/crypto_sign/rainbow/rainbowV-classic/clean/rainbow_keypair_computation.h new file mode 100644 index 00000000..d37923e3 --- /dev/null +++ b/crypto_sign/rainbow/rainbowV-classic/clean/rainbow_keypair_computation.h @@ -0,0 +1,53 @@ +#ifndef _RAINBOW_KEYPAIR_COMP_H_ +#define _RAINBOW_KEYPAIR_COMP_H_ +/// @file rainbow_keypair_computation.h +/// @brief Functions for calculating pk/sk while generating keys. +/// +/// Defining an internal structure of public key. +/// Functions for calculating pk/sk for key generation. +/// + +#include "rainbow_keypair.h" + +/// @brief The (internal use) public key for rainbow +/// +/// The (internal use) public key for rainbow. The public +/// polynomials are divided into l1_Q1, l1_Q2, ... l1_Q9, +/// l2_Q1, .... , l2_Q9. +/// +typedef struct rainbow_extend_publickey { + unsigned char l1_Q1[_O1_BYTE * N_TRIANGLE_TERMS(_V1)]; + unsigned char l1_Q2[_O1_BYTE * _V1 * _O1]; + unsigned char l1_Q3[_O1_BYTE * _V1 * _O2]; + unsigned char l1_Q5[_O1_BYTE * N_TRIANGLE_TERMS(_O1)]; + unsigned char l1_Q6[_O1_BYTE * _O1 * _O2]; + unsigned char l1_Q9[_O1_BYTE * N_TRIANGLE_TERMS(_O2)]; + + unsigned char l2_Q1[_O2_BYTE * N_TRIANGLE_TERMS(_V1)]; + unsigned char l2_Q2[_O2_BYTE * _V1 * _O1]; + unsigned char l2_Q3[_O2_BYTE * _V1 * _O2]; + unsigned char l2_Q5[_O2_BYTE * N_TRIANGLE_TERMS(_O1)]; + unsigned char l2_Q6[_O2_BYTE * _O1 * _O2]; + unsigned char l2_Q9[_O2_BYTE * N_TRIANGLE_TERMS(_O2)]; +} ext_cpk_t; + +/// +/// @brief converting formats of public keys : from ext_cpk_t version to pk_t +/// +/// @param[out] pk - the classic public key. +/// @param[in] cpk - the internel public key. +/// +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_extcpk_to_pk(pk_t *pk, const ext_cpk_t *cpk); +///////////////////////////////////////////////// + +/// +/// @brief Computing public key from secret key +/// +/// @param[out] Qs - the public key +/// @param[in] Fs - parts of the secret key: l1_F1, l1_F2, l2_F1, l2_F2, l2_F3, l2_F5, l2_F6 +/// @param[in] Ts - parts of the secret key: T1, T4, T3 +/// +void PQCLEAN_RAINBOWVCLASSIC_CLEAN_calculate_Q_from_F(ext_cpk_t *Qs, const sk_t *Fs, const sk_t *Ts); + + +#endif // _RAINBOW_KEYPAIR_COMP_H_ diff --git a/crypto_sign/rainbow/rainbowV-classic/clean/sign.c b/crypto_sign/rainbow/rainbowV-classic/clean/sign.c new file mode 100644 index 00000000..1cf228e9 --- /dev/null +++ b/crypto_sign/rainbow/rainbowV-classic/clean/sign.c @@ -0,0 +1,74 @@ +/// @file sign.c +/// @brief the implementations for functions in api.h +/// +/// + +#include "api.h" +#include "rainbow.h" +#include "rainbow_config.h" +#include "rainbow_keypair.h" +#include "randombytes.h" +#include "utils_hash.h" +#include +#include + +int PQCLEAN_RAINBOWVCLASSIC_CLEAN_crypto_sign_keypair(unsigned char *pk, unsigned char *sk) { + unsigned char sk_seed[LEN_SKSEED] = {0}; + randombytes(sk_seed, LEN_SKSEED); + + PQCLEAN_RAINBOWVCLASSIC_CLEAN_generate_keypair((pk_t *)pk, (sk_t *)sk, sk_seed); + return 0; +} + +int PQCLEAN_RAINBOWVCLASSIC_CLEAN_crypto_sign(unsigned char *sm, size_t *smlen, const unsigned char *m, size_t mlen, const unsigned char *sk) { + unsigned char digest[_HASH_LEN]; + + PQCLEAN_RAINBOWVCLASSIC_CLEAN_hash_msg(digest, _HASH_LEN, m, mlen); + + memcpy(sm, m, mlen); + smlen[0] = mlen + _SIGNATURE_BYTE; + + return PQCLEAN_RAINBOWVCLASSIC_CLEAN_rainbow_sign(sm + mlen, (const sk_t *)sk, digest); +} + +int PQCLEAN_RAINBOWVCLASSIC_CLEAN_crypto_sign_open(unsigned char *m, size_t *mlen, const unsigned char *sm, size_t smlen, const unsigned char *pk) { + int rc; + if (_SIGNATURE_BYTE > smlen) { + rc = -1; + } else { + *mlen = smlen - _SIGNATURE_BYTE; + + unsigned char digest[_HASH_LEN]; + PQCLEAN_RAINBOWVCLASSIC_CLEAN_hash_msg(digest, _HASH_LEN, sm, *mlen); + + rc = PQCLEAN_RAINBOWVCLASSIC_CLEAN_rainbow_verify(digest, sm + mlen[0], (const pk_t *)pk); + } + if (!rc) { + memmove(m, sm, smlen - _SIGNATURE_BYTE); + } else { // bad signature + *mlen = (size_t) -1; + memset(m, 0, smlen); + } + return rc; +} + +int PQCLEAN_RAINBOWVCLASSIC_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + unsigned char digest[_HASH_LEN]; + + PQCLEAN_RAINBOWVCLASSIC_CLEAN_hash_msg(digest, _HASH_LEN, m, mlen); + *siglen = _SIGNATURE_BYTE; + return PQCLEAN_RAINBOWVCLASSIC_CLEAN_rainbow_sign(sig, (const sk_t *)sk, digest); +} + +int PQCLEAN_RAINBOWVCLASSIC_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + if (siglen != _SIGNATURE_BYTE) { + return -1; + } + unsigned char digest[_HASH_LEN]; + PQCLEAN_RAINBOWVCLASSIC_CLEAN_hash_msg(digest, _HASH_LEN, m, mlen); + return PQCLEAN_RAINBOWVCLASSIC_CLEAN_rainbow_verify(digest, sig, (const pk_t *)pk); +} diff --git a/crypto_sign/rainbow/rainbowV-classic/clean/utils_hash.c b/crypto_sign/rainbow/rainbowV-classic/clean/utils_hash.c new file mode 100644 index 00000000..ca045fbf --- /dev/null +++ b/crypto_sign/rainbow/rainbowV-classic/clean/utils_hash.c @@ -0,0 +1,50 @@ +/// @file utils_hash.c +/// @brief the adapter for SHA2 families. +/// +/// + +#include "utils_hash.h" +#include "rainbow_config.h" +#include "sha2.h" + +static inline int h(unsigned char *digest, const unsigned char *m, size_t mlen) { + sha512(digest, m, mlen); + return 0; +} + +static inline int expand_hash(unsigned char *digest, size_t n_digest, const unsigned char *hash) { + if (_HASH_LEN >= n_digest) { + for (size_t i = 0; i < n_digest; i++) { + digest[i] = hash[i]; + } + return 0; + } + for (size_t i = 0; i < _HASH_LEN; i++) { + digest[i] = hash[i]; + } + n_digest -= _HASH_LEN; + + while (_HASH_LEN <= n_digest) { + h(digest + _HASH_LEN, digest, _HASH_LEN); + + n_digest -= _HASH_LEN; + digest += _HASH_LEN; + } + unsigned char temp[_HASH_LEN]; + if (n_digest) { + h(temp, digest, _HASH_LEN); + for (size_t i = 0; i < n_digest; i++) { + digest[_HASH_LEN + i] = temp[i]; + } + } + return 0; +} + +int PQCLEAN_RAINBOWVCLASSIC_CLEAN_hash_msg(unsigned char *digest, + size_t len_digest, + const unsigned char *m, + size_t mlen) { + unsigned char buf[_HASH_LEN]; + h(buf, m, mlen); + return expand_hash(digest, len_digest, buf); +} diff --git a/crypto_sign/rainbow/rainbowV-classic/clean/utils_hash.h b/crypto_sign/rainbow/rainbowV-classic/clean/utils_hash.h new file mode 100644 index 00000000..9624ee69 --- /dev/null +++ b/crypto_sign/rainbow/rainbowV-classic/clean/utils_hash.h @@ -0,0 +1,11 @@ +#ifndef _UTILS_HASH_H_ +#define _UTILS_HASH_H_ +/// @file utils_hash.h +/// @brief the interface for adapting hash functions. +/// + +#include + +int PQCLEAN_RAINBOWVCLASSIC_CLEAN_hash_msg(unsigned char *digest, size_t len_digest, const unsigned char *m, size_t mlen); + +#endif // _UTILS_HASH_H_ diff --git a/crypto_sign/rainbow/rainbowV-classic/clean/utils_prng.c b/crypto_sign/rainbow/rainbowV-classic/clean/utils_prng.c new file mode 100644 index 00000000..4e552b6a --- /dev/null +++ b/crypto_sign/rainbow/rainbowV-classic/clean/utils_prng.c @@ -0,0 +1,97 @@ +/// @file utils_prng.c +/// @brief The implementation of PRNG related functions. +/// + +#include "utils_prng.h" +#include "aes.h" +#include "randombytes.h" +#include "utils_hash.h" +#include +#include + +static void prng_update(const unsigned char *provided_data, + unsigned char *Key, + unsigned char *V) { + unsigned char temp[48]; + aes256ctx ctx; + aes256_ecb_keyexp(&ctx, Key); + for (int i = 0; i < 3; i++) { + //increment V + for (int j = 15; j >= 0; j--) { + if (V[j] == 0xff) { + V[j] = 0x00; + } else { + V[j]++; + break; + } + } + aes256_ecb(temp + 16 * i, V, 1, &ctx); + } + if (provided_data != NULL) { + for (int i = 0; i < 48; i++) { + temp[i] ^= provided_data[i]; + } + } + aes256_ctx_release(&ctx); + memcpy(Key, temp, 32); + memcpy(V, temp + 32, 16); +} +static void randombytes_init_with_state(prng_t *state, + unsigned char *entropy_input_48bytes) { + memset(state->Key, 0x00, 32); + memset(state->V, 0x00, 16); + prng_update(entropy_input_48bytes, state->Key, state->V); +} + +static int randombytes_with_state(prng_t *state, + unsigned char *x, + size_t xlen) { + + unsigned char block[16]; + int i = 0; + + aes256ctx ctx; + aes256_ecb_keyexp(&ctx, state->Key); + + while (xlen > 0) { + //increment V + for (int j = 15; j >= 0; j--) { + if (state->V[j] == 0xff) { + state->V[j] = 0x00; + } else { + state->V[j]++; + break; + } + } + aes256_ecb(block, state->V, 1, &ctx); + if (xlen > 15) { + memcpy(x + i, block, 16); + i += 16; + xlen -= 16; + } else { + memcpy(x + i, block, xlen); + xlen = 0; + } + } + aes256_ctx_release(&ctx); + prng_update(NULL, state->Key, state->V); + return 0; +} + +int PQCLEAN_RAINBOWVCLASSIC_CLEAN_prng_set(prng_t *ctx, const void *prng_seed, unsigned long prng_seedlen) { + unsigned char seed[48]; + if (prng_seedlen >= 48) { + memcpy(seed, prng_seed, 48); + } else { + memcpy(seed, prng_seed, prng_seedlen); + PQCLEAN_RAINBOWVCLASSIC_CLEAN_hash_msg(seed + prng_seedlen, 48 - (unsigned)prng_seedlen, (const unsigned char *)prng_seed, prng_seedlen); + } + + randombytes_init_with_state(ctx, seed); + + return 0; +} + +int PQCLEAN_RAINBOWVCLASSIC_CLEAN_prng_gen(prng_t *ctx, unsigned char *out, unsigned long outlen) { + return randombytes_with_state(ctx, out, outlen); +} diff --git a/crypto_sign/rainbow/rainbowV-classic/clean/utils_prng.h b/crypto_sign/rainbow/rainbowV-classic/clean/utils_prng.h new file mode 100644 index 00000000..c0e58288 --- /dev/null +++ b/crypto_sign/rainbow/rainbowV-classic/clean/utils_prng.h @@ -0,0 +1,18 @@ +#ifndef _UTILS_PRNG_H_ +#define _UTILS_PRNG_H_ +/// @file utils_prng.h +/// @brief the interface for adapting PRNG functions. +/// +/// + +#include "randombytes.h" + +typedef struct { + unsigned char Key[32]; + unsigned char V[16]; +} prng_t; + +int PQCLEAN_RAINBOWVCLASSIC_CLEAN_prng_set(prng_t *ctx, const void *prng_seed, unsigned long prng_seedlen); +int PQCLEAN_RAINBOWVCLASSIC_CLEAN_prng_gen(prng_t *ctx, unsigned char *out, unsigned long outlen); + +#endif // _UTILS_PRNG_H_ diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/META.yml b/crypto_sign/sphincs/sphincs-sha256-128f-robust/META.yml new file mode 100644 index 00000000..e81515cb --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/META.yml @@ -0,0 +1,35 @@ +name: SPHINCS+ +type: signature +claimed-nist-level: 1 +length-public-key: 32 +length-secret-key: 64 +length-signature: 16976 +testvectors-sha256: 3e7c782b25e405940160468c2d777a5ab6eb9b6cfe318efed257f3270cca8c72 +nistkat-sha256: cf7935fc0277099a7453f6c5dc54e40d5cf34fbe989909940a77a3fbbab6c42e +principal-submitters: + - Andreas Hülsing +auxiliary-submitters: + - Jean-Philippe Aumasson + - Daniel J. Bernstein, + - Christoph Dobraunig + - Maria Eichlseder + - Scott Fluhrer + - Stefan-Lukas Gazdag + - Panos Kampanakis + - Stefan Kölbl + - Tanja Lange + - Martin M. Lauridsen + - Florian Mendel + - Ruben Niederhagen + - Christian Rechberger + - Joost Rijneveld + - Peter Schwabe +implementations: + - name: clean + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + - name: avx2 + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + supported_platforms: + - architecture: x86_64 + required_flags: + - avx2 diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/LICENSE b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/Makefile.Microsoft_nmake new file mode 100644 index 00000000..59cdd00d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-sha256-128f-robust_avx2.lib +OBJECTS=address.obj wots.obj utils.obj utilsx8.obj sha256avx.obj sha256x8.obj fors.obj sign.obj hash_sha256.obj thash_sha256_robust.obj hash_sha256x8.obj thash_sha256_robustx8.obj sha256.obj + +CFLAGS=/nologo /arch:AVX2 /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/address.c b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/address.c new file mode 100644 index 00000000..2c49eb20 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/address.h b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/address.h new file mode 100644 index 00000000..e1d4e22c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDRESS_H +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/api.h b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/api.h new file mode 100644 index 00000000..a62b3a1f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_API_H +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_CRYPTO_SECRETKEYBYTES 64 +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES 32 +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_CRYPTO_BYTES 16976 +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_CRYPTO_SEEDBYTES 48 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/fors.c b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/fors.c new file mode 100644 index 00000000..e2b80f6c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/fors.c @@ -0,0 +1,240 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hashx8.h" +#include "thash.h" +#include "thashx8.h" +#include "utils.h" +#include "utilsx8.h" + +static void fors_gen_skx8(unsigned char *sk0, + unsigned char *sk1, + unsigned char *sk2, + unsigned char *sk3, + unsigned char *sk4, + unsigned char *sk5, + unsigned char *sk6, + unsigned char *sk7, const unsigned char *sk_seed, + uint32_t fors_leaf_addrx8[8 * 8]) { + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_prf_addrx8(sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7, + sk_seed, fors_leaf_addrx8); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_thash_1(leaf, sk, pub_seed, fors_leaf_addr, state_seeded); +} + +static void fors_sk_to_leafx8(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + unsigned char *leaf4, + unsigned char *leaf5, + unsigned char *leaf6, + unsigned char *leaf7, + const unsigned char *sk0, + const unsigned char *sk1, + const unsigned char *sk2, + const unsigned char *sk3, + const unsigned char *sk4, + const unsigned char *sk5, + const unsigned char *sk6, + const unsigned char *sk7, + const unsigned char *pub_seed, + uint32_t fors_leaf_addrx8[8 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_thashx8_1(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7, + pub_seed, fors_leaf_addrx8, state_seeded); +} + +static void fors_gen_leafx8(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + unsigned char *leaf4, + unsigned char *leaf5, + unsigned char *leaf6, + unsigned char *leaf7, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx0, + uint32_t addr_idx1, + uint32_t addr_idx2, + uint32_t addr_idx3, + uint32_t addr_idx4, + uint32_t addr_idx5, + uint32_t addr_idx6, + uint32_t addr_idx7, + const uint32_t fors_tree_addr[8], + const hash_state *state_seeded) { + uint32_t fors_leaf_addrx8[8 * 8] = {0}; + unsigned int j; + + /* Only copy the parts that must be kept in fors_leaf_addrx8. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_copy_keypair_addr(fors_leaf_addrx8 + j * 8, fors_tree_addr); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_type(fors_leaf_addrx8 + j * 8, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 0 * 8, addr_idx0); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 1 * 8, addr_idx1); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 2 * 8, addr_idx2); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 3 * 8, addr_idx3); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 4 * 8, addr_idx4); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 5 * 8, addr_idx5); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 6 * 8, addr_idx6); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 7 * 8, addr_idx7); + + fors_gen_skx8(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + sk_seed, fors_leaf_addrx8); + fors_sk_to_leafx8(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + pub_seed, fors_leaf_addrx8, state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + /* Round up to multiple of 8 to prevent out-of-bounds for x8 parallelism */ + uint32_t indices[(PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_TREES + 7) & ~7] = {0}; + unsigned char roots[((PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_TREES + 7) & ~7) * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N]; + /* Sign to a buffer, since we may not have a nice multiple of 8 and would + otherwise overrun the signature. */ + unsigned char sigbufx8[8 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N * (1 + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_HEIGHT)]; + uint32_t fors_tree_addrx8[8 * 8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset[8] = {0}; + unsigned int i, j; + + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_copy_keypair_addr(fors_tree_addrx8 + j * 8, fors_addr); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_type(fors_tree_addrx8 + j * 8, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < ((PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_TREES + 7) & ~0x7); i += 8) { + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_TREES) { + idx_offset[j] = (i + j) * (1 << PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_height(fors_tree_addrx8 + j * 8, 0); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_index(fors_tree_addrx8 + j * 8, + indices[i + j] + idx_offset[j]); + } + } + + /* Include the secret key part that produces the selected leaf nodes. */ + fors_gen_skx8(sigbufx8 + 0 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + sigbufx8 + 1 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + sigbufx8 + 2 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + sigbufx8 + 3 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + sigbufx8 + 4 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + sigbufx8 + 5 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + sigbufx8 + 6 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + sigbufx8 + 7 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + sk_seed, fors_tree_addrx8); + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_treehashx8_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, sigbufx8 + 8 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, sk_seed, pub_seed, + &indices[i], idx_offset, fors_gen_leafx8, fors_tree_addrx8, + state_seeded); + + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_TREES) { + memcpy(sig, sigbufx8 + j * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + memcpy(sig + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + sigbufx8 + 8 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + j * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_HEIGHT, + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_HEIGHT); + sig += PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N * (1 + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_HEIGHT); + } + } + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_fors_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_compute_root(roots + i * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, leaf, indices[i], idx_offset, + sig, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_HEIGHT, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/fors.h b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/fors.h new file mode 100644 index 00000000..9f4aabb4 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_H +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/hash.h b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/hash.h new file mode 100644 index 00000000..bf63b7de --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_HASH_H +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/hash_sha256.c b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/hash_sha256.c new file mode 100644 index 00000000..8b0aa087 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/hash_sha256.c @@ -0,0 +1,166 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "sha2.h" +#include "sha256.h" +#include "sha256x8.h" + +/** + * Initializes the hash function states + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed) { + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_seed_state(&hash_state_seeded->x1, pub_seed); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_seed_statex8(&hash_state_seeded->x8, pub_seed); + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/** + * Cleans up the hash function states + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_destroy_hash_function(hash_state *hash_state_seeded) { + sha256_inc_ctx_release(&hash_state_seeded->x1); +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N bytes and an address + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_prf_addr(unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES]; + unsigned char outbuf[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_compress_address(buf + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, addr); + + sha256(outbuf, buf, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed as a key + * for HMAC, and an optional randomization value prefixed to the message. + * This requires m to have at least PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N space + * available in front of the pointer, i.e. before the message to use for the + * prefix. This is necessary to prevent having to move the message around (and + * allocate memory for it). + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES]; + sha256ctx state; + int i; + + /* This implements HMAC-SHA256 */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N; i++) { + buf[i] = 0x36 ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, 0x36, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + + sha256_inc_init(&state); + sha256_inc_blocks(&state, buf, 1); + + memcpy(buf, optrand, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + + /* If optrand + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + mlen < PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES) { + memcpy(buf + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, m, mlen); + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES, &state, + buf, mlen + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(buf + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, m, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + sha256_inc_blocks(&state, buf, 1); + + m += PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N; + mlen -= PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N; + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES, &state, m, mlen); + } + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N; i++) { + buf[i] = 0x5c ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, 0x5c, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + + sha256(buf, buf, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES); + memcpy(R, buf, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_TREE_BITS (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_TREE_HEIGHT * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_D - 1)) +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_TREE_BYTES ((PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_LEAF_BITS PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_LEAF_BYTES ((PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_DGST_BYTES (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_TREE_BYTES + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_LEAF_BYTES) + + unsigned char seed[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES + 4]; + + /* Round to nearest multiple of PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_INBLOCKS (((PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_PK_BYTES + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES - 1) & \ + -PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES) / PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES) + unsigned char inbuf[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES]; + + unsigned char buf[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_DGST_BYTES]; + unsigned char *bufp = buf; + sha256ctx state; + + sha256_inc_init(&state); + + memcpy(inbuf, R, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + memcpy(inbuf + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, pk, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_PK_BYTES); + + /* If R + pk + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_PK_BYTES + mlen < PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES) { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_PK_BYTES, m, mlen); + sha256_inc_finalize(seed, &state, inbuf, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_PK_BYTES + mlen); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_PK_BYTES, m, + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N - PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_PK_BYTES); + sha256_inc_blocks(&state, inbuf, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_INBLOCKS); + + m += PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N - PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_PK_BYTES; + mlen -= PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N - PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_PK_BYTES; + sha256_inc_finalize(seed, &state, m, mlen); + } + + /* By doing this in two steps, we prevent hashing the message twice; + otherwise each iteration in MGF1 would hash the message again. */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_mgf1(bufp, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_DGST_BYTES, seed, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_bytes_to_ull(bufp, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/hash_sha256x8.c b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/hash_sha256x8.c new file mode 100644 index 00000000..d90bd977 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/hash_sha256x8.c @@ -0,0 +1,61 @@ +#include +#include + +#include "address.h" +#include "hashx8.h" +#include "params.h" +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "utils.h" + +/* + * 8-way parallel version of prf_addr; takes 8x as much input and output + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_prf_addrx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *key, + const uint32_t addrx8[8 * 8]) { + unsigned char bufx8[8 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES)]; + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned int j; + + for (j = 0; j < 8; j++) { + memcpy(bufx8 + j * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES), key, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_compress_address(bufx8 + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + j * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES), + addrx8 + j * 8); + } + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256x8(outbufx8 + 0 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, + bufx8 + 0 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 1 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 2 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 3 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 4 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 5 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 6 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 7 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES), + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES); + + memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/hash_state.h b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/hash_state.h new file mode 100644 index 00000000..3c11872e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/hash_state.h @@ -0,0 +1,33 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_HASH_STATE_H +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * We use a struct to differentiate between the x1 and x8 variants of SHA256. + */ + +#include "sha2.h" +#include "sha256avx.h" + +typedef struct { + sha256ctx x1; + sha256ctxx8 x8; +} hash_state; + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/hashx8.h b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/hashx8.h new file mode 100644 index 00000000..502ebf96 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/hashx8.h @@ -0,0 +1,19 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_HASHX8_H +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_HASHX8_H + +#include + +#include "params.h" + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_prf_addrx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *key, + const uint32_t addrx8[8 * 8]); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/params.h b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/params.h new file mode 100644 index 00000000..14a7ed47 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_PARAMS_H +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N 16 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FULL_HEIGHT 60 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_D 20 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_HEIGHT 9 +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_TREES 30 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N / PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LEN (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LEN1 + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_BYTES (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_PK_BYTES PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_TREE_HEIGHT (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FULL_HEIGHT / PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_BYTES ((PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_PK_BYTES PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_BYTES (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_BYTES + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_D * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FULL_HEIGHT * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_PK_BYTES (2 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SK_BYTES (2 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/sha256.c b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/sha256.c new file mode 100644 index 00000000..dfb4280c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/sha256.c @@ -0,0 +1,71 @@ +/* Based on the public domain implementation in + * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html + * by D. J. Bernstein */ + +#include +#include +#include + +#include "sha2.h" +#include "sha256.h" +#include "utils.h" + +/* + * Compresses an address to a 22-byte sequence. + * This reduces the number of required SHA256 compression calls, as the last + * block of input is padded with at least 65 bits. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_compress_address(unsigned char *out, const uint32_t addr[8]) { + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ull_to_bytes(out, 1, addr[0]); /* drop 3 bytes of the layer field */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ull_to_bytes(out + 1, 4, addr[2]); /* drop the highest tree address word */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ull_to_bytes(out + 5, 4, addr[3]); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ull_to_bytes(out + 9, 1, addr[4]); /* drop 3 bytes of the type field */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ull_to_bytes(out + 10, 4, addr[5]); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ull_to_bytes(out + 14, 4, addr[6]); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ull_to_bytes(out + 18, 4, addr[7]); +} + +/** + * Requires 'input_plus_four_bytes' to have 'inlen' + 4 bytes, so that the last + * four bytes can be used for the counter. Typically 'input' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen) { + unsigned char outbuf[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned long i; + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES <= outlen; i++) { + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(out, input_plus_four_bytes, inlen + 4); + out += PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + if (outlen > i * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES) { + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(outbuf, input_plus_four_bytes, inlen + 4); + memcpy(out, outbuf, outlen - i * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES); + } +} + + +/** + * Absorb the constant pub_seed using one round of the compression function + * This initializes hash_state_seeded, which can then be reused in thash + **/ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N; i < PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + sha256_inc_init(hash_state_seeded); + sha256_inc_blocks(hash_state_seeded, block, 1); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/sha256.h b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/sha256.h new file mode 100644 index 00000000..6f40d278 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/sha256.h @@ -0,0 +1,21 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_H +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_H + +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES 22 + +#include +#include + +#include "sha2.h" + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_compress_address(unsigned char *out, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/sha256avx.c b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/sha256avx.c new file mode 100644 index 00000000..29669d9f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/sha256avx.c @@ -0,0 +1,296 @@ +#include +#include +#include + +#include "sha256avx.h" + +// Transpose 8 vectors containing 32-bit values +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_transpose(u256 s[8]) { + u256 tmp0[8]; + u256 tmp1[8]; + tmp0[0] = _mm256_unpacklo_epi32(s[0], s[1]); + tmp0[1] = _mm256_unpackhi_epi32(s[0], s[1]); + tmp0[2] = _mm256_unpacklo_epi32(s[2], s[3]); + tmp0[3] = _mm256_unpackhi_epi32(s[2], s[3]); + tmp0[4] = _mm256_unpacklo_epi32(s[4], s[5]); + tmp0[5] = _mm256_unpackhi_epi32(s[4], s[5]); + tmp0[6] = _mm256_unpacklo_epi32(s[6], s[7]); + tmp0[7] = _mm256_unpackhi_epi32(s[6], s[7]); + tmp1[0] = _mm256_unpacklo_epi64(tmp0[0], tmp0[2]); + tmp1[1] = _mm256_unpackhi_epi64(tmp0[0], tmp0[2]); + tmp1[2] = _mm256_unpacklo_epi64(tmp0[1], tmp0[3]); + tmp1[3] = _mm256_unpackhi_epi64(tmp0[1], tmp0[3]); + tmp1[4] = _mm256_unpacklo_epi64(tmp0[4], tmp0[6]); + tmp1[5] = _mm256_unpackhi_epi64(tmp0[4], tmp0[6]); + tmp1[6] = _mm256_unpacklo_epi64(tmp0[5], tmp0[7]); + tmp1[7] = _mm256_unpackhi_epi64(tmp0[5], tmp0[7]); + s[0] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x20); + s[1] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x20); + s[2] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x20); + s[3] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x20); + s[4] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x31); + s[5] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x31); + s[6] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x31); + s[7] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x31); +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256_clone_statex8(sha256ctxx8 *outctx, const sha256ctxx8 *inctx) { + memcpy(outctx, inctx, sizeof(sha256ctxx8)); +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256_init8x(sha256ctxx8 *ctx) { + ctx->s[0] = _mm256_set_epi32((int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667); + ctx->s[1] = _mm256_set_epi32((int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85); + ctx->s[2] = _mm256_set_epi32((int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372); + ctx->s[3] = _mm256_set_epi32((int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a); + ctx->s[4] = _mm256_set_epi32((int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f); + ctx->s[5] = _mm256_set_epi32((int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c); + ctx->s[6] = _mm256_set_epi32((int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab); + ctx->s[7] = _mm256_set_epi32((int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19); + + ctx->datalen = 0; + ctx->msglen = 0; +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256_update8x(sha256ctxx8 *ctx, + const unsigned char *d0, + const unsigned char *d1, + const unsigned char *d2, + const unsigned char *d3, + const unsigned char *d4, + const unsigned char *d5, + const unsigned char *d6, + const unsigned char *d7, + unsigned long long len) { + size_t i = 0; + size_t bytes_to_copy; + + while (i < len) { + bytes_to_copy = (size_t)len - i; + if (bytes_to_copy > 64) { + bytes_to_copy = 64; + } + memcpy(&ctx->msgblocks[64 * 0], d0 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 1], d1 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 2], d2 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 3], d3 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 4], d4 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 5], d5 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 6], d6 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 7], d7 + i, bytes_to_copy); + ctx->datalen += (unsigned int)bytes_to_copy; + i += bytes_to_copy; + if (ctx->datalen == 64) { + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + ctx->msglen += 512; + ctx->datalen = 0; + } + } +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256_final8x(sha256ctxx8 *ctx, + unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7) { + unsigned int i, curlen; + + // Padding + if (ctx->datalen < 56) { + for (i = 0; i < 8; ++i) { + curlen = ctx->datalen; + ctx->msgblocks[64 * i + curlen++] = 0x80; + while (curlen < 64) { + ctx->msgblocks[64 * i + curlen++] = 0x00; + } + } + } else { + for (i = 0; i < 8; ++i) { + curlen = ctx->datalen; + ctx->msgblocks[64 * i + curlen++] = 0x80; + while (curlen < 64) { + ctx->msgblocks[64 * i + curlen++] = 0x00; + } + } + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + memset(ctx->msgblocks, 0, 8 * 64); + } + + // Add length of the message to each block + ctx->msglen += ctx->datalen * 8; + for (i = 0; i < 8; i++) { + ctx->msgblocks[64 * i + 63] = (unsigned char)ctx->msglen; + ctx->msgblocks[64 * i + 62] = (unsigned char)(ctx->msglen >> 8); + ctx->msgblocks[64 * i + 61] = (unsigned char)(ctx->msglen >> 16); + ctx->msgblocks[64 * i + 60] = (unsigned char)(ctx->msglen >> 24); + ctx->msgblocks[64 * i + 59] = (unsigned char)(ctx->msglen >> 32); + ctx->msgblocks[64 * i + 58] = (unsigned char)(ctx->msglen >> 40); + ctx->msgblocks[64 * i + 57] = (unsigned char)(ctx->msglen >> 48); + ctx->msgblocks[64 * i + 56] = (unsigned char)(ctx->msglen >> 56); + } + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + + // Compute final hash output + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_transpose(ctx->s); + + // Store Hash value + STORE(out0, BYTESWAP(ctx->s[0])); + STORE(out1, BYTESWAP(ctx->s[1])); + STORE(out2, BYTESWAP(ctx->s[2])); + STORE(out3, BYTESWAP(ctx->s[3])); + STORE(out4, BYTESWAP(ctx->s[4])); + STORE(out5, BYTESWAP(ctx->s[5])); + STORE(out6, BYTESWAP(ctx->s[6])); + STORE(out7, BYTESWAP(ctx->s[7])); +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256_transform8x(sha256ctxx8 *ctx, const unsigned char *data) { + u256 s[8], w[64], T0, T1; + int i; + + // Load words and transform data correctly + for (i = 0; i < 8; i++) { + w[i] = BYTESWAP(LOAD(data + 64 * i)); + w[i + 8] = BYTESWAP(LOAD(data + 32 + 64 * i)); + } + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_transpose(w); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_transpose(w + 8); + + // Initial State + s[0] = ctx->s[0]; + s[1] = ctx->s[1]; + s[2] = ctx->s[2]; + s[3] = ctx->s[3]; + s[4] = ctx->s[4]; + s[5] = ctx->s[5]; + s[6] = ctx->s[6]; + s[7] = ctx->s[7]; + + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 0, w[0]); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 1, w[1]); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 2, w[2]); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 3, w[3]); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 4, w[4]); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 5, w[5]); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 6, w[6]); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 7, w[7]); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 8, w[8]); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 9, w[9]); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 10, w[10]); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 11, w[11]); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 12, w[12]); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 13, w[13]); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 14, w[14]); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 15, w[15]); + w[16] = ADD4_32(WSIGMA1_AVX(w[14]), w[0], w[9], WSIGMA0_AVX(w[1])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 16, w[16]); + w[17] = ADD4_32(WSIGMA1_AVX(w[15]), w[1], w[10], WSIGMA0_AVX(w[2])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 17, w[17]); + w[18] = ADD4_32(WSIGMA1_AVX(w[16]), w[2], w[11], WSIGMA0_AVX(w[3])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 18, w[18]); + w[19] = ADD4_32(WSIGMA1_AVX(w[17]), w[3], w[12], WSIGMA0_AVX(w[4])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 19, w[19]); + w[20] = ADD4_32(WSIGMA1_AVX(w[18]), w[4], w[13], WSIGMA0_AVX(w[5])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 20, w[20]); + w[21] = ADD4_32(WSIGMA1_AVX(w[19]), w[5], w[14], WSIGMA0_AVX(w[6])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 21, w[21]); + w[22] = ADD4_32(WSIGMA1_AVX(w[20]), w[6], w[15], WSIGMA0_AVX(w[7])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 22, w[22]); + w[23] = ADD4_32(WSIGMA1_AVX(w[21]), w[7], w[16], WSIGMA0_AVX(w[8])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 23, w[23]); + w[24] = ADD4_32(WSIGMA1_AVX(w[22]), w[8], w[17], WSIGMA0_AVX(w[9])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 24, w[24]); + w[25] = ADD4_32(WSIGMA1_AVX(w[23]), w[9], w[18], WSIGMA0_AVX(w[10])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 25, w[25]); + w[26] = ADD4_32(WSIGMA1_AVX(w[24]), w[10], w[19], WSIGMA0_AVX(w[11])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 26, w[26]); + w[27] = ADD4_32(WSIGMA1_AVX(w[25]), w[11], w[20], WSIGMA0_AVX(w[12])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 27, w[27]); + w[28] = ADD4_32(WSIGMA1_AVX(w[26]), w[12], w[21], WSIGMA0_AVX(w[13])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 28, w[28]); + w[29] = ADD4_32(WSIGMA1_AVX(w[27]), w[13], w[22], WSIGMA0_AVX(w[14])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 29, w[29]); + w[30] = ADD4_32(WSIGMA1_AVX(w[28]), w[14], w[23], WSIGMA0_AVX(w[15])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 30, w[30]); + w[31] = ADD4_32(WSIGMA1_AVX(w[29]), w[15], w[24], WSIGMA0_AVX(w[16])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 31, w[31]); + w[32] = ADD4_32(WSIGMA1_AVX(w[30]), w[16], w[25], WSIGMA0_AVX(w[17])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 32, w[32]); + w[33] = ADD4_32(WSIGMA1_AVX(w[31]), w[17], w[26], WSIGMA0_AVX(w[18])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 33, w[33]); + w[34] = ADD4_32(WSIGMA1_AVX(w[32]), w[18], w[27], WSIGMA0_AVX(w[19])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 34, w[34]); + w[35] = ADD4_32(WSIGMA1_AVX(w[33]), w[19], w[28], WSIGMA0_AVX(w[20])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 35, w[35]); + w[36] = ADD4_32(WSIGMA1_AVX(w[34]), w[20], w[29], WSIGMA0_AVX(w[21])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 36, w[36]); + w[37] = ADD4_32(WSIGMA1_AVX(w[35]), w[21], w[30], WSIGMA0_AVX(w[22])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 37, w[37]); + w[38] = ADD4_32(WSIGMA1_AVX(w[36]), w[22], w[31], WSIGMA0_AVX(w[23])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 38, w[38]); + w[39] = ADD4_32(WSIGMA1_AVX(w[37]), w[23], w[32], WSIGMA0_AVX(w[24])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 39, w[39]); + w[40] = ADD4_32(WSIGMA1_AVX(w[38]), w[24], w[33], WSIGMA0_AVX(w[25])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 40, w[40]); + w[41] = ADD4_32(WSIGMA1_AVX(w[39]), w[25], w[34], WSIGMA0_AVX(w[26])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 41, w[41]); + w[42] = ADD4_32(WSIGMA1_AVX(w[40]), w[26], w[35], WSIGMA0_AVX(w[27])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 42, w[42]); + w[43] = ADD4_32(WSIGMA1_AVX(w[41]), w[27], w[36], WSIGMA0_AVX(w[28])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 43, w[43]); + w[44] = ADD4_32(WSIGMA1_AVX(w[42]), w[28], w[37], WSIGMA0_AVX(w[29])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 44, w[44]); + w[45] = ADD4_32(WSIGMA1_AVX(w[43]), w[29], w[38], WSIGMA0_AVX(w[30])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 45, w[45]); + w[46] = ADD4_32(WSIGMA1_AVX(w[44]), w[30], w[39], WSIGMA0_AVX(w[31])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 46, w[46]); + w[47] = ADD4_32(WSIGMA1_AVX(w[45]), w[31], w[40], WSIGMA0_AVX(w[32])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 47, w[47]); + w[48] = ADD4_32(WSIGMA1_AVX(w[46]), w[32], w[41], WSIGMA0_AVX(w[33])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 48, w[48]); + w[49] = ADD4_32(WSIGMA1_AVX(w[47]), w[33], w[42], WSIGMA0_AVX(w[34])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 49, w[49]); + w[50] = ADD4_32(WSIGMA1_AVX(w[48]), w[34], w[43], WSIGMA0_AVX(w[35])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 50, w[50]); + w[51] = ADD4_32(WSIGMA1_AVX(w[49]), w[35], w[44], WSIGMA0_AVX(w[36])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 51, w[51]); + w[52] = ADD4_32(WSIGMA1_AVX(w[50]), w[36], w[45], WSIGMA0_AVX(w[37])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 52, w[52]); + w[53] = ADD4_32(WSIGMA1_AVX(w[51]), w[37], w[46], WSIGMA0_AVX(w[38])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 53, w[53]); + w[54] = ADD4_32(WSIGMA1_AVX(w[52]), w[38], w[47], WSIGMA0_AVX(w[39])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 54, w[54]); + w[55] = ADD4_32(WSIGMA1_AVX(w[53]), w[39], w[48], WSIGMA0_AVX(w[40])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 55, w[55]); + w[56] = ADD4_32(WSIGMA1_AVX(w[54]), w[40], w[49], WSIGMA0_AVX(w[41])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 56, w[56]); + w[57] = ADD4_32(WSIGMA1_AVX(w[55]), w[41], w[50], WSIGMA0_AVX(w[42])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 57, w[57]); + w[58] = ADD4_32(WSIGMA1_AVX(w[56]), w[42], w[51], WSIGMA0_AVX(w[43])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 58, w[58]); + w[59] = ADD4_32(WSIGMA1_AVX(w[57]), w[43], w[52], WSIGMA0_AVX(w[44])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 59, w[59]); + w[60] = ADD4_32(WSIGMA1_AVX(w[58]), w[44], w[53], WSIGMA0_AVX(w[45])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 60, w[60]); + w[61] = ADD4_32(WSIGMA1_AVX(w[59]), w[45], w[54], WSIGMA0_AVX(w[46])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 61, w[61]); + w[62] = ADD4_32(WSIGMA1_AVX(w[60]), w[46], w[55], WSIGMA0_AVX(w[47])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 62, w[62]); + w[63] = ADD4_32(WSIGMA1_AVX(w[61]), w[47], w[56], WSIGMA0_AVX(w[48])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 63, w[63]); + + // Feed Forward + ctx->s[0] = ADD32(s[0], ctx->s[0]); + ctx->s[1] = ADD32(s[1], ctx->s[1]); + ctx->s[2] = ADD32(s[2], ctx->s[2]); + ctx->s[3] = ADD32(s[3], ctx->s[3]); + ctx->s[4] = ADD32(s[4], ctx->s[4]); + ctx->s[5] = ADD32(s[5], ctx->s[5]); + ctx->s[6] = ADD32(s[6], ctx->s[6]); + ctx->s[7] = ADD32(s[7], ctx->s[7]); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/sha256avx.h b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/sha256avx.h new file mode 100644 index 00000000..6cbbcc7b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/sha256avx.h @@ -0,0 +1,103 @@ +#ifndef SHA256AVX_H +#define SHA256AVX_H + +#include +#include + +static const unsigned int RC[] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +#define u32 uint32_t +#define u256 __m256i + +#define XOR _mm256_xor_si256 +#define OR _mm256_or_si256 +#define AND _mm256_and_si256 +#define ADD32 _mm256_add_epi32 +#define NOT(x) _mm256_xor_si256(x, _mm256_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1)) + +#define LOAD(src) _mm256_loadu_si256((__m256i *)(src)) +#define STORE(dest,src) _mm256_storeu_si256((__m256i *)(dest),src) + +#define BYTESWAP(x) _mm256_shuffle_epi8(x, _mm256_set_epi8(0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3,0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3)) + +#define SHIFTR32(x, y) _mm256_srli_epi32(x, y) +#define SHIFTL32(x, y) _mm256_slli_epi32(x, y) + +#define ROTR32(x, y) OR(SHIFTR32(x, y), SHIFTL32(x, 32 - (y))) +#define ROTL32(x, y) OR(SHIFTL32(x, y), SHIFTR32(x, 32 - (y))) + +#define XOR3(a, b, c) XOR(XOR(a, b), c) + +#define ADD3_32(a, b, c) ADD32(ADD32(a, b), c) +#define ADD4_32(a, b, c, d) ADD32(ADD32(ADD32(a, b), c), d) +#define ADD5_32(a, b, c, d, e) ADD32(ADD32(ADD32(ADD32(a, b), c), d), e) + +#define MAJ_AVX(a, b, c) XOR3(AND(a, b), AND(a, c), AND(b, c)) +#define CH_AVX(a, b, c) XOR(AND(a, b), AND(NOT(a), c)) + +#define SIGMA1_AVX(x) XOR3(ROTR32(x, 6), ROTR32(x, 11), ROTR32(x, 25)) +#define SIGMA0_AVX(x) XOR3(ROTR32(x, 2), ROTR32(x, 13), ROTR32(x, 22)) + +#define WSIGMA1_AVX(x) XOR3(ROTR32(x, 17), ROTR32(x, 19), SHIFTR32(x, 10)) +#define WSIGMA0_AVX(x) XOR3(ROTR32(x, 7), ROTR32(x, 18), SHIFTR32(x, 3)) + +#define SHA256ROUND_AVX(a, b, c, d, e, f, g, h, rc, w) \ + T0 = ADD5_32(h, SIGMA1_AVX(e), CH_AVX(e, f, g), _mm256_set1_epi32((int)RC[rc]), w); \ + (d) = ADD32(d, T0); \ + T1 = ADD32(SIGMA0_AVX(a), MAJ_AVX(a, b, c)); \ + (h) = ADD32(T0, T1); + +typedef struct SHA256state { + u256 s[8]; + uint8_t msgblocks[8 * 64]; + unsigned int datalen; + uint64_t msglen; +} sha256ctxx8; + + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_transpose(u256 s[8]); +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256_init_frombytes_x8(sha256ctxx8 *ctx, const uint8_t *s, unsigned long long msglen); +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256_init8x(sha256ctxx8 *ctx); +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256_update8x(sha256ctxx8 *ctx, + const unsigned char *d0, + const unsigned char *d1, + const unsigned char *d2, + const unsigned char *d3, + const unsigned char *d4, + const unsigned char *d5, + const unsigned char *d6, + const unsigned char *d7, + unsigned long long len); +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256_final8x(sha256ctxx8 *ctx, + unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7); + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256_transform8x(sha256ctxx8 *ctx, const unsigned char *data); + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256_clone_statex8(sha256ctxx8 *outctx, const sha256ctxx8 *inctx); + + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/sha256x8.c b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/sha256x8.c new file mode 100644 index 00000000..74eac233 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/sha256x8.c @@ -0,0 +1,128 @@ +#include + +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_seed_statex8(sha256ctxx8 *ctx, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N; i < PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256_init8x(ctx); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256_update8x(ctx, block, block, block, block, block, block, block, block, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES); + +} + +/* This provides a wrapper around the internals of 8x parallel SHA256 */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256x8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned long long inlen) { + sha256ctxx8 ctx; + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256_init8x(&ctx); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256_update8x(&ctx, in0, in1, in2, in3, in4, in5, in6, in7, inlen); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256_final8x(&ctx, out0, out1, out2, out3, out4, out5, out6, out7); +} + +/** + * Note that inlen should be sufficiently small that it still allows for + * an array to be allocated on the stack. Typically 'in' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_mgf1x8( + unsigned char *outx8, + unsigned long outlen, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, + unsigned long inlen) { + unsigned char inbufx8[8 * ((PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES) + 4)]; + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned long i; + unsigned int j; + + memcpy(inbufx8 + 0 * (inlen + 4), in0, inlen); + memcpy(inbufx8 + 1 * (inlen + 4), in1, inlen); + memcpy(inbufx8 + 2 * (inlen + 4), in2, inlen); + memcpy(inbufx8 + 3 * (inlen + 4), in3, inlen); + memcpy(inbufx8 + 4 * (inlen + 4), in4, inlen); + memcpy(inbufx8 + 5 * (inlen + 4), in5, inlen); + memcpy(inbufx8 + 6 * (inlen + 4), in6, inlen); + memcpy(inbufx8 + 7 * (inlen + 4), in7, inlen); + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES <= outlen; i++) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ull_to_bytes(inbufx8 + inlen + j * (inlen + 4), 4, i); + } + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256x8(outx8 + 0 * outlen, + outx8 + 1 * outlen, + outx8 + 2 * outlen, + outx8 + 3 * outlen, + outx8 + 4 * outlen, + outx8 + 5 * outlen, + outx8 + 6 * outlen, + outx8 + 7 * outlen, + inbufx8 + 0 * (inlen + 4), + inbufx8 + 1 * (inlen + 4), + inbufx8 + 2 * (inlen + 4), + inbufx8 + 3 * (inlen + 4), + inbufx8 + 4 * (inlen + 4), + inbufx8 + 5 * (inlen + 4), + inbufx8 + 6 * (inlen + 4), + inbufx8 + 7 * (inlen + 4), inlen + 4); + outx8 += PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ull_to_bytes(inbufx8 + inlen + j * (inlen + 4), 4, i); + } + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256x8(outbufx8 + 0 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, + inbufx8 + 0 * (inlen + 4), + inbufx8 + 1 * (inlen + 4), + inbufx8 + 2 * (inlen + 4), + inbufx8 + 3 * (inlen + 4), + inbufx8 + 4 * (inlen + 4), + inbufx8 + 5 * (inlen + 4), + inbufx8 + 6 * (inlen + 4), + inbufx8 + 7 * (inlen + 4), inlen + 4); + + for (j = 0; j < 8; j++) { + memcpy(outx8 + j * outlen, + outbufx8 + j * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outlen - i * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/sha256x8.h b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/sha256x8.h new file mode 100644 index 00000000..e8b9954e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/sha256x8.h @@ -0,0 +1,44 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256X8_H +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256X8_H + +#include "sha256avx.h" + +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N */ + +/* This provides a wrapper around the internals of 8x parallel SHA256 */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256x8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned long long inlen); + +/** + * Note that inlen should be sufficiently small that it still allows for + * an array to be allocated on the stack. Typically 'in' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_mgf1x8(unsigned char *outx8, unsigned long outlen, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, + unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_seed_statex8(sha256ctxx8 *ctx, const unsigned char *pub_seed); +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/sign.c b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/sign.c new file mode 100644 index 00000000..86eef285 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_D - 1); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, sk + 3 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N; + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_D; i++) { + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_BYTES; + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N; + unsigned char mhash[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_D; i++) { + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_BYTES; + + if (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_BYTES, sm + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/thash.h b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/thash.h new file mode 100644 index 00000000..43ea918b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_THASH_H +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/thash_sha256_robust.c b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/thash_sha256_robust.c new file mode 100644 index 00000000..f2c72de0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/thash_sha256_robust.c @@ -0,0 +1,78 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "sha2.h" +#include "sha256.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N bytes. + */ +static void thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char outbuf[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned char *bitmask = buf + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + 4; + sha256ctx sha2_state; + unsigned int i; + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_compress_address(buf + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, addr); + /* MGF1 requires us to have 4 extra bytes in 'buf' */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_mgf1(bitmask, inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, buf, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES); + + /* Retrieve precomputed state containing pub_seed */ + sha256_inc_ctx_clone(&sha2_state, &hash_state_seeded->x1); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N; i++) { + buf[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + i] = in[i] ^ bitmask[i]; + } + + sha256_inc_finalize(outbuf, &sha2_state, buf + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + 4 + 1 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N]; + thash(out, buf, in, 1, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + 4 + 2 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N]; + thash(out, buf, in, 2, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + 4 + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N]; + thash(out, buf, in, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LEN, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + 4 + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N]; + thash(out, buf, in, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_TREES, pub_seed, addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/thash_sha256_robustx8.c b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/thash_sha256_robustx8.c new file mode 100644 index 00000000..e3be245a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/thash_sha256_robustx8.c @@ -0,0 +1,156 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "thashx8.h" +#include "utils.h" + +/** + * 8-way parallel version of thash; takes 8x as much input and output + */ +static void thashx8(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + uint8_t *out4, + uint8_t *out5, + uint8_t *out6, + uint8_t *out7, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + const uint8_t *in4, + const uint8_t *in5, + const uint8_t *in6, + const uint8_t *in7, + unsigned int inblocks, + const uint8_t *pub_seed, + uint32_t addrx8[8 * 8], + uint8_t *bufx8, + uint8_t *bitmaskx8, + const hash_state *state_seeded) { + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned int i; + sha256ctxx8 ctx; + + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + + for (i = 0; i < 8; i++) { + memcpy(bufx8 + i * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N), + pub_seed, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_compress_address(bufx8 + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + + i * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N), + addrx8 + i * 8); + } + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_mgf1x8(bitmaskx8, inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + bufx8 + 0 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N), + bufx8 + 1 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N), + bufx8 + 2 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N), + bufx8 + 3 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N), + bufx8 + 4 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N), + bufx8 + 5 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N), + bufx8 + 6 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N), + bufx8 + 7 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N), + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + ); + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256_clone_statex8(&ctx, &state_seeded->x8); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N; i++) { + bufx8[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + i + + 0 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N)] = + in0[i] ^ bitmaskx8[i + 0 * (inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + i + + 1 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N)] = + in1[i] ^ bitmaskx8[i + 1 * (inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + i + + 2 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N)] = + in2[i] ^ bitmaskx8[i + 2 * (inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + i + + 3 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N)] = + in3[i] ^ bitmaskx8[i + 3 * (inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + i + + 4 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N)] = + in4[i] ^ bitmaskx8[i + 4 * (inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + i + + 5 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N)] = + in5[i] ^ bitmaskx8[i + 5 * (inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + i + + 6 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N)] = + in6[i] ^ bitmaskx8[i + 6 * (inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + i + + 7 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N)] = + in7[i] ^ bitmaskx8[i + 7 * (inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N)]; + } + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256_update8x(&ctx, + bufx8 + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + 0 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + 1 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + 2 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + 3 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + 4 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + 5 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + 6 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + 7 * (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N), + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256_final8x(&ctx, + outbufx8 + 0 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES); + + memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); +} + +#define thash_size_variant(name, size) \ + void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_thashx8_##name(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + unsigned char *out4, \ + unsigned char *out5, \ + unsigned char *out6, \ + unsigned char *out7, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *in4, \ + const unsigned char *in5, \ + const unsigned char *in6, \ + const unsigned char *in7, \ + const unsigned char *pub_seed, \ + uint32_t addrx8[8*8], \ + const hash_state *state_seeded) { \ + const unsigned int inblocks = (size); \ + uint8_t bufx8[8*(PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_SHA256_ADDR_BYTES + (size)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N)]; \ + uint8_t bitmaskx8[8*((size) * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N)]; \ + thashx8(out0, out1, out2, out3, out4, out5, out6, out7, \ + in0, in1, in2, in3, in4, in5, in6, in7, inblocks, \ + pub_seed, addrx8, bufx8, bitmaskx8, state_seeded); \ + } + +thash_size_variant(1, 1) +thash_size_variant(2, 2) +thash_size_variant(WOTS_LEN, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LEN) +thash_size_variant(FORS_TREES, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_TREES) + +#undef thash_size_variant diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/thashx8.h b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/thashx8.h new file mode 100644 index 00000000..10a5a1cb --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/thashx8.h @@ -0,0 +1,39 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_THASHX8_H +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_THASHX8_H + +#include + +#include "hash_state.h" +#include "sha256avx.h" + + +#define thashx8_variant(name) \ + void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_thashx8_##name( \ + unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + unsigned char *out4, \ + unsigned char *out5, \ + unsigned char *out6, \ + unsigned char *out7, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *in4, \ + const unsigned char *in5, \ + const unsigned char *in6, \ + const unsigned char *in7, \ + const unsigned char *pub_seed, \ + uint32_t addrx8[8*8], \ + const hash_state *state_seeded) + + +thashx8_variant(1); +thashx8_variant(2); +thashx8_variant(WOTS_LEN); +thashx8_variant(FORS_TREES); + +#undef thashx8_variant +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/utils.c b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/utils.c new file mode 100644 index 00000000..c3b52cc2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, leaf, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + memcpy(buffer + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, auth_path, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_thash_2( + buffer + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + } else { + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, auth_path, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/utils.h b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/utils.h new file mode 100644 index 00000000..18175fa0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_UTILS_H +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/utilsx8.c b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/utilsx8.c new file mode 100644 index 00000000..470821ae --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/utilsx8.c @@ -0,0 +1,172 @@ +#include + +#include "address.h" +#include "params.h" +#include "thashx8.h" +#include "utils.h" + +#include "utilsx8.h" + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void treehashx8(unsigned char *rootx8, unsigned char *auth_pathx8, + unsigned char *stackx8, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t leaf_idx[8], uint32_t idx_offset[8], + uint32_t tree_height, + void (*gen_leafx8)( + unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + unsigned char * /* leaf4 */, + unsigned char * /* leaf5 */, + unsigned char * /* leaf6 */, + unsigned char * /* leaf7 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + uint32_t /* addr_idx4 */, + uint32_t /* addr_idx5 */, + uint32_t /* addr_idx6 */, + uint32_t /* addr_idx7 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx8[8 * 8], + const hash_state *state_seeded) { + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + unsigned int j; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leafx8(stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + sk_seed, pub_seed, + idx + idx_offset[0], + idx + idx_offset[1], + idx + idx_offset[2], + idx + idx_offset[3], + idx + idx_offset[4], + idx + idx_offset[5], + idx + idx_offset[6], + idx + idx_offset[7], + tree_addrx8, + state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + for (j = 0; j < 8; j++) { + if ((leaf_idx[j] ^ 0x1) == idx) { + memcpy(auth_pathx8 + j * tree_height * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + (offset - 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + } + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_height(tree_addrx8 + j * 8, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_tree_index(tree_addrx8 + j * 8, + tree_idx + (idx_offset[j] >> (heights[offset - 1] + 1))); + } + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_thashx8_2(stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + pub_seed, tree_addrx8, state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + for (j = 0; j < 8; j++) { + if (((leaf_idx[j] >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_pathx8 + j * tree_height * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + heights[offset - 1]*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N + (offset - 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + } + } + } + } + + for (j = 0; j < 8; j++) { + memcpy(rootx8 + j * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + } +} + +/* The wrappers below ensure we used fixed-size buffers on the stack (no VLAs) */ + + +#define treehashx8_variant(name, size) \ + void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_treehashx8_##name( \ + unsigned char *rootx8, unsigned char *auth_pathx8, \ + const unsigned char *sk_seed, const unsigned char *pub_seed, \ + const uint32_t leaf_idx[8], uint32_t idx_offset[8], \ + void (*gen_leafx8)( \ + unsigned char* /* leaf0 */, \ + unsigned char* /* leaf1 */, \ + unsigned char* /* leaf2 */, \ + unsigned char* /* leaf3 */, \ + unsigned char* /* leaf4 */, \ + unsigned char* /* leaf5 */, \ + unsigned char* /* leaf6 */, \ + unsigned char* /* leaf7 */, \ + const unsigned char* /* sk_seed */, \ + const unsigned char* /* pub_seed */, \ + uint32_t /* addr_idx0 */, \ + uint32_t /* addr_idx1 */, \ + uint32_t /* addr_idx2 */, \ + uint32_t /* addr_idx3 */, \ + uint32_t /* addr_idx4 */, \ + uint32_t /* addr_idx5 */, \ + uint32_t /* addr_idx6 */, \ + uint32_t /* addr_idx7 */, \ + const uint32_t[8] /* tree_addr */, \ + const hash_state* /* state_seeded */), \ + uint32_t tree_addrx8[8*8], \ + const hash_state *state_seeded) \ + { \ + const uint32_t tree_height = (size); \ + unsigned char stackx8[8*((size) + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N]; \ + unsigned int heights[(size) + 1]; \ + treehashx8(rootx8, auth_pathx8, stackx8, heights, sk_seed, pub_seed, \ + leaf_idx, idx_offset, tree_height, gen_leafx8, tree_addrx8, state_seeded); \ + } + +treehashx8_variant(FORS_HEIGHT, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_FORS_HEIGHT) + +#undef treehashx8_variant diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/utilsx8.h b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/utilsx8.h new file mode 100644 index 00000000..69aa1efe --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/utilsx8.h @@ -0,0 +1,46 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_UTILSX8_H +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_UTILSX8_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ + +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_treehashx8_FORS_HEIGHT( + unsigned char *rootx8, unsigned char *auth_pathx8, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t leaf_idx[8], uint32_t idx_offset[8], + void (*gen_leafx8)( + unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + unsigned char * /* leaf4 */, + unsigned char * /* leaf5 */, + unsigned char * /* leaf6 */, + unsigned char * /* leaf7 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + uint32_t /* addr_idx4 */, + uint32_t /* addr_idx5 */, + uint32_t /* addr_idx6 */, + uint32_t /* addr_idx7 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx8[8 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/wots.c b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/wots.c new file mode 100644 index 00000000..7ce8538c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/wots.c @@ -0,0 +1,240 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "hashx8.h" +#include "params.h" +#include "thash.h" +#include "thashx8.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], const hash_state *state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_prf_addr(sk, sk_seed, wots_addr, state_seeded); +} + +/** + * 8-way parallel version of wots_gen_sk; expects 8x as much space in sk + */ +static void wots_gen_skx8(unsigned char *skx8, const unsigned char *sk_seed, + uint32_t wots_addrx8[8 * 8]) { + unsigned int j; + + /* Make sure that the hash address is actually zeroed. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_hash_addr(wots_addrx8 + j * 8, 0); + } + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_prf_addrx8(skx8 + 0 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + skx8 + 1 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + skx8 + 2 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + skx8 + 3 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + skx8 + 4 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + skx8 + 5 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + skx8 + 6 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + skx8 + 7 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + sk_seed, wots_addrx8); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_W; i++) { + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_thash_1(out, out, pub_seed, addr, state_seeded); + } +} + +/** + * 8-way parallel version of gen_chain; expects 8x as much space in out, and + * 8x as much space in inx8. Assumes start and step identical across chains. + */ +static void gen_chainx8(unsigned char *outx8, const unsigned char *inx8, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addrx8[8 * 8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + /* Initialize outx8 with the value at position 'start'. */ + memcpy(outx8, inx8, 8 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_W; i++) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_hash_addr(addrx8 + j * 8, i); + } + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_thashx8_1(outx8 + 0 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + outx8 + 1 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + outx8 + 2 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + outx8 + 3 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + outx8 + 4 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + outx8 + 5 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + outx8 + 6 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + outx8 + 7 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + outx8 + 0 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + outx8 + 1 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + outx8 + 2 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + outx8 + 3 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + outx8 + 4 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + outx8 + 5 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + outx8 + 6 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + outx8 + 7 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + pub_seed, addrx8, state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { + int in = 0; + int out = 0; + unsigned char total = 0; + int bits = 0; + int consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LOGW; + output[out] = (unsigned int)(total >> bits) & (PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_W - 1); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_wots_gen_pk(unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + uint32_t addrx8[8 * 8]; + unsigned char pkbuf[8 * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N]; + + for (j = 0; j < 8; j++) { + memcpy(addrx8 + j * 8, addr, sizeof(uint32_t) * 8); + } + + /* The last iteration typically does not have complete set of 4 chains, + but because we use pkbuf, this is not an issue -- we still do as many + in parallel as possible. */ + for (i = 0; i < ((PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LEN + 7) & ~0x7); i += 8) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_chain_addr(addrx8 + j * 8, i + j); + } + wots_gen_skx8(pkbuf, sk_seed, addrx8); + gen_chainx8(pkbuf, pkbuf, 0, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_W - 1, pub_seed, addrx8, state_seeded); + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LEN) { + memcpy(pk + (i + j)*PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, pkbuf + j * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N); + } + } + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_wots_sign(unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, sk_seed, addr, state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, sig + i * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, 0, lengths[i], pub_seed, addr, state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_wots_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, sig + i * PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_N, + lengths[i], PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_W - 1 - lengths[i], pub_seed, addr, state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/wots.h b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/wots.h new file mode 100644 index 00000000..a0c7d522 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/avx2/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_H +#define PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/LICENSE b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..87e8e4e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-sha256-128f-robust_clean.lib +OBJECTS=address.obj wots.obj utils.obj fors.obj sign.obj hash_sha256.obj thash_sha256_robust.obj sha256.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/address.c b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/address.c new file mode 100644 index 00000000..eff46d7f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/address.h b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/address.h new file mode 100644 index 00000000..33aae431 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ADDRESS_H +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/api.h b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/api.h new file mode 100644 index 00000000..cda169d1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_API_H +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_CRYPTO_SECRETKEYBYTES 64 +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_CRYPTO_PUBLICKEYBYTES 32 +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_CRYPTO_BYTES 16976 +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_CRYPTO_SEEDBYTES 48 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/fors.c b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/fors.c new file mode 100644 index 00000000..82d7d3db --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/fors.c @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "thash.h" +#include "utils.h" + +static void fors_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t fors_leaf_addr[8], const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_prf_addr( + sk, sk_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_thash_1( + leaf, sk, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t fors_tree_addr[8], + const hash_state *hash_state_seeded) { + uint32_t fors_leaf_addr[8] = {0}; + + /* Only copy the parts that must be kept in fors_leaf_addr. */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_copy_keypair_addr( + fors_leaf_addr, fors_tree_addr); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_type( + fors_leaf_addr, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_tree_index( + fors_leaf_addr, addr_idx); + + fors_gen_sk(leaf, sk_seed, fors_leaf_addr, hash_state_seeded); + fors_sk_to_leaf(leaf, leaf, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_copy_keypair_addr( + fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_copy_keypair_addr( + fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_type( + fors_tree_addr, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_type( + fors_pk_addr, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_tree_height( + fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_tree_index( + fors_tree_addr, indices[i] + idx_offset); + + /* Include the secret key part that produces the selected leaf node. */ + fors_gen_sk(sig, sk_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N; + + /* Compute the authentication path for this leaf node. */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_treehash_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, sig, sk_seed, pub_seed, + indices[i], idx_offset, fors_gen_leaf, fors_tree_addr, + hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_thash_FORS_TREES( + pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_compute_root(roots + i * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, leaf, indices[i], idx_offset, sig, + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_HEIGHT, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/fors.h b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/fors.h new file mode 100644 index 00000000..ee11d3ea --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_H +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/hash.h b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/hash.h new file mode 100644 index 00000000..8d2071f2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_HASH_H +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/hash_sha256.c b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/hash_sha256.c new file mode 100644 index 00000000..65e79907 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/hash_sha256.c @@ -0,0 +1,162 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "sha2.h" +#include "sha256.h" + +/* For SHA256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed) { + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_seed_state(hash_state_seeded, pub_seed); + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* Clean up hash state */ +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_destroy_hash_function(hash_state *hash_state_seeded) { + sha256_inc_ctx_release(hash_state_seeded); +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N bytes and an address + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_ADDR_BYTES]; + unsigned char outbuf[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_OUTPUT_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_compress_address(buf + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, addr); + + sha256(outbuf, buf, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_ADDR_BYTES); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed as a key + * for HMAC, and an optional randomization value prefixed to the message. + * This requires m to have at least PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N space + * available in front of the pointer, i.e. before the message to use for the + * prefix. This is necessary to prevent having to move the message around (and + * allocate memory for it). + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_OUTPUT_BYTES]; + sha256ctx state; + int i; + + /* This implements HMAC-SHA256 */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N; i++) { + buf[i] = 0x36 ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, 0x36, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N); + + sha256_inc_init(&state); + sha256_inc_blocks(&state, buf, 1); + + memcpy(buf, optrand, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N); + + /* If optrand + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N + mlen < PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_BLOCK_BYTES) { + memcpy(buf + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, m, mlen); + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_BLOCK_BYTES, &state, + buf, mlen + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(buf + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, m, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N); + sha256_inc_blocks(&state, buf, 1); + + m += PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N; + mlen -= PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N; + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_BLOCK_BYTES, &state, m, mlen); + } + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N; i++) { + buf[i] = 0x5c ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, 0x5c, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N); + + sha256(buf, buf, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_OUTPUT_BYTES); + memcpy(R, buf, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_TREE_BITS (PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_TREE_HEIGHT * (PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_D - 1)) +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_TREE_BYTES ((PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_LEAF_BITS PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_LEAF_BYTES ((PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_DGST_BYTES (PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_TREE_BYTES + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_LEAF_BYTES) + + unsigned char seed[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_OUTPUT_BYTES + 4]; + + /* Round to nearest multiple of PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_BLOCK_BYTES */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_INBLOCKS (((PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_PK_BYTES + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_BLOCK_BYTES - 1) & \ + -PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_BLOCK_BYTES) / PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_BLOCK_BYTES) + unsigned char inbuf[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_BLOCK_BYTES]; + + unsigned char buf[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_DGST_BYTES]; + unsigned char *bufp = buf; + sha256ctx state; + + sha256_inc_init(&state); + + memcpy(inbuf, R, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N); + memcpy(inbuf + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, pk, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_PK_BYTES); + + /* If R + pk + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_PK_BYTES + mlen < PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_BLOCK_BYTES) { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_PK_BYTES, m, mlen); + sha256_inc_finalize(seed, &state, inbuf, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_PK_BYTES + mlen); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_PK_BYTES, m, + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N - PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_PK_BYTES); + sha256_inc_blocks(&state, inbuf, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_INBLOCKS); + + m += PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N - PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_PK_BYTES; + mlen -= PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N - PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_PK_BYTES; + sha256_inc_finalize(seed, &state, m, mlen); + } + + /* By doing this in two steps, we prevent hashing the message twice; + otherwise each iteration in MGF1 would hash the message again. */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_mgf1(bufp, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_DGST_BYTES, seed, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_OUTPUT_BYTES); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_bytes_to_ull(bufp, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/hash_state.h b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/hash_state.h new file mode 100644 index 00000000..19fc335e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/hash_state.h @@ -0,0 +1,26 @@ +#ifndef SPX_HASH_STATE_H +#define SPX_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + */ + +#include "sha2.h" +#define hash_state sha256ctx + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/params.h b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/params.h new file mode 100644 index 00000000..e071c6b1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_PARAMS_H +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N 16 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FULL_HEIGHT 60 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_D 20 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_HEIGHT 9 +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_TREES 30 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N / PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_LEN (PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_LEN1 + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_BYTES (PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_PK_BYTES PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_TREE_HEIGHT (PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FULL_HEIGHT / PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_BYTES ((PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_PK_BYTES PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_BYTES (PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_BYTES + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_D * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FULL_HEIGHT * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_PK_BYTES (2 * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SK_BYTES (2 * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/sha256.c b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/sha256.c new file mode 100644 index 00000000..d1856e51 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/sha256.c @@ -0,0 +1,71 @@ +/* Based on the public domain implementation in + * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html + * by D. J. Bernstein */ + +#include +#include +#include + +#include "sha2.h" +#include "sha256.h" +#include "utils.h" + +/* + * Compresses an address to a 22-byte sequence. + * This reduces the number of required SHA256 compression calls, as the last + * block of input is padded with at least 65 bits. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_compress_address(unsigned char *out, const uint32_t addr[8]) { + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ull_to_bytes(out, 1, addr[0]); /* drop 3 bytes of the layer field */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ull_to_bytes(out + 1, 4, addr[2]); /* drop the highest tree address word */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ull_to_bytes(out + 5, 4, addr[3]); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ull_to_bytes(out + 9, 1, addr[4]); /* drop 3 bytes of the type field */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ull_to_bytes(out + 10, 4, addr[5]); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ull_to_bytes(out + 14, 4, addr[6]); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ull_to_bytes(out + 18, 4, addr[7]); +} + +/** + * Requires 'input_plus_four_bytes' to have 'inlen' + 4 bytes, so that the last + * four bytes can be used for the counter. Typically 'input' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen) { + unsigned char outbuf[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_OUTPUT_BYTES]; + unsigned long i; + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_OUTPUT_BYTES <= outlen; i++) { + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(out, input_plus_four_bytes, inlen + 4); + out += PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + if (outlen > i * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_OUTPUT_BYTES) { + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(outbuf, input_plus_four_bytes, inlen + 4); + memcpy(out, outbuf, outlen - i * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_OUTPUT_BYTES); + } +} + + +/** + * Absorb the constant pub_seed using one round of the compression function + * This initializes hash_state_seeded, which can then be reused in thash + **/ +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N; i < PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + sha256_inc_init(hash_state_seeded); + sha256_inc_blocks(hash_state_seeded, block, 1); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/sha256.h b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/sha256.h new file mode 100644 index 00000000..8a3b16bd --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/sha256.h @@ -0,0 +1,21 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_H +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_H + +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N */ +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_ADDR_BYTES 22 + +#include +#include + +#include "sha2.h" + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_compress_address(unsigned char *out, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/sign.c b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/sign.c new file mode 100644 index 00000000..22d82f40 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_D - 1); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, sk + 3 * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N); + + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N; + + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_BYTES; + + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N; + unsigned char mhash[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_BYTES; + + if (PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_BYTES, sm + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/thash.h b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/thash.h new file mode 100644 index 00000000..84383feb --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_THASH_H +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/thash_sha256_robust.c b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/thash_sha256_robust.c new file mode 100644 index 00000000..27d59115 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/thash_sha256_robust.c @@ -0,0 +1,82 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "sha2.h" +#include "sha256.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N bytes. + */ +static void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char outbuf[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_OUTPUT_BYTES]; + unsigned char *bitmask = buf + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_ADDR_BYTES + 4; + sha256ctx sha2_state; + unsigned int i; + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_compress_address(buf + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, addr); + /* MGF1 requires us to have 4 extra bytes in 'buf' */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_mgf1(bitmask, inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, buf, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_ADDR_BYTES); + + /* Retrieve precomputed state containing pub_seed */ + sha256_inc_ctx_clone(&sha2_state, hash_state_seeded); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N; i++) { + buf[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_ADDR_BYTES + i] = in[i] ^ bitmask[i]; + } + + sha256_inc_finalize(outbuf, &sha2_state, buf + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_ADDR_BYTES + 4 + 1 * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_thash( + out, buf, in, 1, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_ADDR_BYTES + 4 + 2 * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_thash( + out, buf, in, 2, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_ADDR_BYTES + 4 + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_LEN, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_SHA256_ADDR_BYTES + 4 + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_TREES, pub_seed, addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/utils.c b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/utils.c new file mode 100644 index 00000000..320e7943 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, leaf, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N); + memcpy(buffer + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_thash_2( + buffer + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N); + } else { + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/utils.h b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/utils.h new file mode 100644 index 00000000..7c950838 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_UTILS_H +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/wots.c b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/wots.c new file mode 100644 index 00000000..b46d746a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/wots.c @@ -0,0 +1,167 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], + const hash_state *hash_state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_prf_addr(sk, sk_seed, wots_addr, hash_state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_W; i++) { + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_thash_1( + out, out, pub_seed, addr, hash_state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const size_t out_len, + const unsigned char *input) { + size_t in = 0; + size_t out = 0; + unsigned char total = 0; + unsigned int bits = 0; + size_t consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_LOGW; + output[out] = (unsigned int)((total >> bits) & (PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_W - 1)); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, + const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_ull_to_bytes( + csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(pk + i * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, pk + i * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, + 0, PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_W - 1, pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, 0, lengths[i], pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_N, + lengths[i], PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_W - 1 - lengths[i], pub_seed, addr, + hash_state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/wots.h b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/wots.h new file mode 100644 index 00000000..499113e4 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-robust/clean/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_H +#define PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128FROBUST_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/META.yml b/crypto_sign/sphincs/sphincs-sha256-128f-simple/META.yml new file mode 100644 index 00000000..2ee7637b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/META.yml @@ -0,0 +1,35 @@ +name: SPHINCS+ +type: signature +claimed-nist-level: 1 +length-public-key: 32 +length-secret-key: 64 +length-signature: 16976 +testvectors-sha256: 5ce16422e028eb7a6198d0a276a1760a6bbcd4ba9457ddbbfd5e08f34985c0ce +nistkat-sha256: 4375bc4276fa44654979db0da886ba5cf754011db268fc63fa7584d50f5dfb63 +principal-submitters: + - Andreas Hülsing +auxiliary-submitters: + - Jean-Philippe Aumasson + - Daniel J. Bernstein, + - Christoph Dobraunig + - Maria Eichlseder + - Scott Fluhrer + - Stefan-Lukas Gazdag + - Panos Kampanakis + - Stefan Kölbl + - Tanja Lange + - Martin M. Lauridsen + - Florian Mendel + - Ruben Niederhagen + - Christian Rechberger + - Joost Rijneveld + - Peter Schwabe +implementations: + - name: clean + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + - name: avx2 + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + supported_platforms: + - architecture: x86_64 + required_flags: + - avx2 diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/LICENSE b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/Makefile.Microsoft_nmake new file mode 100644 index 00000000..46808d05 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-sha256-128f-simple_avx2.lib +OBJECTS=address.obj wots.obj utils.obj utilsx8.obj sha256avx.obj sha256x8.obj fors.obj sign.obj hash_sha256.obj thash_sha256_simple.obj hash_sha256x8.obj thash_sha256_simplex8.obj sha256.obj + +CFLAGS=/nologo /arch:AVX2 /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/address.c b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/address.c new file mode 100644 index 00000000..f467aeb5 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/address.h b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/address.h new file mode 100644 index 00000000..44dbd213 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDRESS_H +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/api.h b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/api.h new file mode 100644 index 00000000..f6deb162 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_API_H +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES 64 +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES 32 +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_CRYPTO_BYTES 16976 +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_CRYPTO_SEEDBYTES 48 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/fors.c b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/fors.c new file mode 100644 index 00000000..b86ae830 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/fors.c @@ -0,0 +1,240 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hashx8.h" +#include "thash.h" +#include "thashx8.h" +#include "utils.h" +#include "utilsx8.h" + +static void fors_gen_skx8(unsigned char *sk0, + unsigned char *sk1, + unsigned char *sk2, + unsigned char *sk3, + unsigned char *sk4, + unsigned char *sk5, + unsigned char *sk6, + unsigned char *sk7, const unsigned char *sk_seed, + uint32_t fors_leaf_addrx8[8 * 8]) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_prf_addrx8(sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7, + sk_seed, fors_leaf_addrx8); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thash_1(leaf, sk, pub_seed, fors_leaf_addr, state_seeded); +} + +static void fors_sk_to_leafx8(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + unsigned char *leaf4, + unsigned char *leaf5, + unsigned char *leaf6, + unsigned char *leaf7, + const unsigned char *sk0, + const unsigned char *sk1, + const unsigned char *sk2, + const unsigned char *sk3, + const unsigned char *sk4, + const unsigned char *sk5, + const unsigned char *sk6, + const unsigned char *sk7, + const unsigned char *pub_seed, + uint32_t fors_leaf_addrx8[8 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thashx8_1(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7, + pub_seed, fors_leaf_addrx8, state_seeded); +} + +static void fors_gen_leafx8(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + unsigned char *leaf4, + unsigned char *leaf5, + unsigned char *leaf6, + unsigned char *leaf7, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx0, + uint32_t addr_idx1, + uint32_t addr_idx2, + uint32_t addr_idx3, + uint32_t addr_idx4, + uint32_t addr_idx5, + uint32_t addr_idx6, + uint32_t addr_idx7, + const uint32_t fors_tree_addr[8], + const hash_state *state_seeded) { + uint32_t fors_leaf_addrx8[8 * 8] = {0}; + unsigned int j; + + /* Only copy the parts that must be kept in fors_leaf_addrx8. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_copy_keypair_addr(fors_leaf_addrx8 + j * 8, fors_tree_addr); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_type(fors_leaf_addrx8 + j * 8, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 0 * 8, addr_idx0); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 1 * 8, addr_idx1); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 2 * 8, addr_idx2); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 3 * 8, addr_idx3); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 4 * 8, addr_idx4); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 5 * 8, addr_idx5); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 6 * 8, addr_idx6); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 7 * 8, addr_idx7); + + fors_gen_skx8(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + sk_seed, fors_leaf_addrx8); + fors_sk_to_leafx8(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + pub_seed, fors_leaf_addrx8, state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + /* Round up to multiple of 8 to prevent out-of-bounds for x8 parallelism */ + uint32_t indices[(PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_TREES + 7) & ~7] = {0}; + unsigned char roots[((PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_TREES + 7) & ~7) * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N]; + /* Sign to a buffer, since we may not have a nice multiple of 8 and would + otherwise overrun the signature. */ + unsigned char sigbufx8[8 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N * (1 + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_HEIGHT)]; + uint32_t fors_tree_addrx8[8 * 8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset[8] = {0}; + unsigned int i, j; + + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_copy_keypair_addr(fors_tree_addrx8 + j * 8, fors_addr); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_type(fors_tree_addrx8 + j * 8, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < ((PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_TREES + 7) & ~0x7); i += 8) { + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_TREES) { + idx_offset[j] = (i + j) * (1 << PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_height(fors_tree_addrx8 + j * 8, 0); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_index(fors_tree_addrx8 + j * 8, + indices[i + j] + idx_offset[j]); + } + } + + /* Include the secret key part that produces the selected leaf nodes. */ + fors_gen_skx8(sigbufx8 + 0 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + sigbufx8 + 1 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + sigbufx8 + 2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + sigbufx8 + 3 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + sigbufx8 + 4 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + sigbufx8 + 5 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + sigbufx8 + 6 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + sigbufx8 + 7 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + sk_seed, fors_tree_addrx8); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_treehashx8_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, sigbufx8 + 8 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, sk_seed, pub_seed, + &indices[i], idx_offset, fors_gen_leafx8, fors_tree_addrx8, + state_seeded); + + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_TREES) { + memcpy(sig, sigbufx8 + j * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + memcpy(sig + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + sigbufx8 + 8 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + j * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_HEIGHT, + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_HEIGHT); + sig += PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N * (1 + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_HEIGHT); + } + } + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_fors_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_compute_root(roots + i * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, leaf, indices[i], idx_offset, + sig, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_HEIGHT, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/fors.h b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/fors.h new file mode 100644 index 00000000..ac6897ac --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_H +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/hash.h b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/hash.h new file mode 100644 index 00000000..579f1949 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_HASH_H +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/hash_sha256.c b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/hash_sha256.c new file mode 100644 index 00000000..27ad6331 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/hash_sha256.c @@ -0,0 +1,166 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "sha2.h" +#include "sha256.h" +#include "sha256x8.h" + +/** + * Initializes the hash function states + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_seed_state(&hash_state_seeded->x1, pub_seed); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_seed_statex8(&hash_state_seeded->x8, pub_seed); + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/** + * Cleans up the hash function states + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_destroy_hash_function(hash_state *hash_state_seeded) { + sha256_inc_ctx_release(&hash_state_seeded->x1); +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N bytes and an address + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_prf_addr(unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES]; + unsigned char outbuf[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_compress_address(buf + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, addr); + + sha256(outbuf, buf, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed as a key + * for HMAC, and an optional randomization value prefixed to the message. + * This requires m to have at least PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N space + * available in front of the pointer, i.e. before the message to use for the + * prefix. This is necessary to prevent having to move the message around (and + * allocate memory for it). + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + sha256ctx state; + int i; + + /* This implements HMAC-SHA256 */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N; i++) { + buf[i] = 0x36 ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, 0x36, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + + sha256_inc_init(&state); + sha256_inc_blocks(&state, buf, 1); + + memcpy(buf, optrand, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + + /* If optrand + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + mlen < PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES) { + memcpy(buf + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, m, mlen); + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES, &state, + buf, mlen + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(buf + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, m, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + sha256_inc_blocks(&state, buf, 1); + + m += PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N; + mlen -= PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N; + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES, &state, m, mlen); + } + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N; i++) { + buf[i] = 0x5c ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, 0x5c, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + + sha256(buf, buf, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + memcpy(R, buf, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_TREE_BITS (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_TREE_HEIGHT * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_D - 1)) +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_TREE_BYTES ((PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_LEAF_BITS PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_LEAF_BYTES ((PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_DGST_BYTES (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_TREE_BYTES + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_LEAF_BYTES) + + unsigned char seed[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES + 4]; + + /* Round to nearest multiple of PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_INBLOCKS (((PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_PK_BYTES + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES - 1) & \ + -PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES) / PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES) + unsigned char inbuf[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES]; + + unsigned char buf[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_DGST_BYTES]; + unsigned char *bufp = buf; + sha256ctx state; + + sha256_inc_init(&state); + + memcpy(inbuf, R, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + memcpy(inbuf + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, pk, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_PK_BYTES); + + /* If R + pk + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_PK_BYTES + mlen < PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES) { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_PK_BYTES, m, mlen); + sha256_inc_finalize(seed, &state, inbuf, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_PK_BYTES + mlen); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_PK_BYTES, m, + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N - PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_PK_BYTES); + sha256_inc_blocks(&state, inbuf, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_INBLOCKS); + + m += PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N - PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_PK_BYTES; + mlen -= PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N - PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_PK_BYTES; + sha256_inc_finalize(seed, &state, m, mlen); + } + + /* By doing this in two steps, we prevent hashing the message twice; + otherwise each iteration in MGF1 would hash the message again. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_mgf1(bufp, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_DGST_BYTES, seed, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_bytes_to_ull(bufp, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/hash_sha256x8.c b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/hash_sha256x8.c new file mode 100644 index 00000000..38c7375d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/hash_sha256x8.c @@ -0,0 +1,61 @@ +#include +#include + +#include "address.h" +#include "hashx8.h" +#include "params.h" +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "utils.h" + +/* + * 8-way parallel version of prf_addr; takes 8x as much input and output + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_prf_addrx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *key, + const uint32_t addrx8[8 * 8]) { + unsigned char bufx8[8 * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES)]; + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + unsigned int j; + + for (j = 0; j < 8; j++) { + memcpy(bufx8 + j * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES), key, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_compress_address(bufx8 + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + j * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES), + addrx8 + j * 8); + } + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256x8(outbufx8 + 0 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + bufx8 + 0 * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 1 * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 2 * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 3 * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 4 * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 5 * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 6 * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 7 * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES), + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES); + + memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/hash_state.h b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/hash_state.h new file mode 100644 index 00000000..94e632d8 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/hash_state.h @@ -0,0 +1,33 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_HASH_STATE_H +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * We use a struct to differentiate between the x1 and x8 variants of SHA256. + */ + +#include "sha2.h" +#include "sha256avx.h" + +typedef struct { + sha256ctx x1; + sha256ctxx8 x8; +} hash_state; + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/hashx8.h b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/hashx8.h new file mode 100644 index 00000000..f1c3e370 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/hashx8.h @@ -0,0 +1,19 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_HASHX8_H +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_HASHX8_H + +#include + +#include "params.h" + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_prf_addrx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *key, + const uint32_t addrx8[8 * 8]); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/params.h b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/params.h new file mode 100644 index 00000000..b1c86674 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_PARAMS_H +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N 16 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FULL_HEIGHT 60 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_D 20 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_HEIGHT 9 +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_TREES 30 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N / PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LEN (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LEN1 + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_BYTES (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_PK_BYTES PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_TREE_HEIGHT (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FULL_HEIGHT / PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_BYTES ((PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_PK_BYTES PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_BYTES (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_BYTES + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_D * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FULL_HEIGHT * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_PK_BYTES (2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SK_BYTES (2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/sha256.c b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/sha256.c new file mode 100644 index 00000000..4489756c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/sha256.c @@ -0,0 +1,71 @@ +/* Based on the public domain implementation in + * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html + * by D. J. Bernstein */ + +#include +#include +#include + +#include "sha2.h" +#include "sha256.h" +#include "utils.h" + +/* + * Compresses an address to a 22-byte sequence. + * This reduces the number of required SHA256 compression calls, as the last + * block of input is padded with at least 65 bits. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_compress_address(unsigned char *out, const uint32_t addr[8]) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ull_to_bytes(out, 1, addr[0]); /* drop 3 bytes of the layer field */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ull_to_bytes(out + 1, 4, addr[2]); /* drop the highest tree address word */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ull_to_bytes(out + 5, 4, addr[3]); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ull_to_bytes(out + 9, 1, addr[4]); /* drop 3 bytes of the type field */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ull_to_bytes(out + 10, 4, addr[5]); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ull_to_bytes(out + 14, 4, addr[6]); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ull_to_bytes(out + 18, 4, addr[7]); +} + +/** + * Requires 'input_plus_four_bytes' to have 'inlen' + 4 bytes, so that the last + * four bytes can be used for the counter. Typically 'input' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen) { + unsigned char outbuf[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + unsigned long i; + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES <= outlen; i++) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(out, input_plus_four_bytes, inlen + 4); + out += PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + if (outlen > i * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(outbuf, input_plus_four_bytes, inlen + 4); + memcpy(out, outbuf, outlen - i * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + } +} + + +/** + * Absorb the constant pub_seed using one round of the compression function + * This initializes hash_state_seeded, which can then be reused in thash + **/ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N; i < PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + sha256_inc_init(hash_state_seeded); + sha256_inc_blocks(hash_state_seeded, block, 1); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/sha256.h b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/sha256.h new file mode 100644 index 00000000..8790182b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/sha256.h @@ -0,0 +1,21 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_H +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_H + +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES 22 + +#include +#include + +#include "sha2.h" + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_compress_address(unsigned char *out, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/sha256avx.c b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/sha256avx.c new file mode 100644 index 00000000..0ee0c434 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/sha256avx.c @@ -0,0 +1,296 @@ +#include +#include +#include + +#include "sha256avx.h" + +// Transpose 8 vectors containing 32-bit values +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_transpose(u256 s[8]) { + u256 tmp0[8]; + u256 tmp1[8]; + tmp0[0] = _mm256_unpacklo_epi32(s[0], s[1]); + tmp0[1] = _mm256_unpackhi_epi32(s[0], s[1]); + tmp0[2] = _mm256_unpacklo_epi32(s[2], s[3]); + tmp0[3] = _mm256_unpackhi_epi32(s[2], s[3]); + tmp0[4] = _mm256_unpacklo_epi32(s[4], s[5]); + tmp0[5] = _mm256_unpackhi_epi32(s[4], s[5]); + tmp0[6] = _mm256_unpacklo_epi32(s[6], s[7]); + tmp0[7] = _mm256_unpackhi_epi32(s[6], s[7]); + tmp1[0] = _mm256_unpacklo_epi64(tmp0[0], tmp0[2]); + tmp1[1] = _mm256_unpackhi_epi64(tmp0[0], tmp0[2]); + tmp1[2] = _mm256_unpacklo_epi64(tmp0[1], tmp0[3]); + tmp1[3] = _mm256_unpackhi_epi64(tmp0[1], tmp0[3]); + tmp1[4] = _mm256_unpacklo_epi64(tmp0[4], tmp0[6]); + tmp1[5] = _mm256_unpackhi_epi64(tmp0[4], tmp0[6]); + tmp1[6] = _mm256_unpacklo_epi64(tmp0[5], tmp0[7]); + tmp1[7] = _mm256_unpackhi_epi64(tmp0[5], tmp0[7]); + s[0] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x20); + s[1] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x20); + s[2] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x20); + s[3] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x20); + s[4] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x31); + s[5] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x31); + s[6] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x31); + s[7] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x31); +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256_clone_statex8(sha256ctxx8 *outctx, const sha256ctxx8 *inctx) { + memcpy(outctx, inctx, sizeof(sha256ctxx8)); +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256_init8x(sha256ctxx8 *ctx) { + ctx->s[0] = _mm256_set_epi32((int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667); + ctx->s[1] = _mm256_set_epi32((int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85); + ctx->s[2] = _mm256_set_epi32((int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372); + ctx->s[3] = _mm256_set_epi32((int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a); + ctx->s[4] = _mm256_set_epi32((int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f); + ctx->s[5] = _mm256_set_epi32((int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c); + ctx->s[6] = _mm256_set_epi32((int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab); + ctx->s[7] = _mm256_set_epi32((int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19); + + ctx->datalen = 0; + ctx->msglen = 0; +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256_update8x(sha256ctxx8 *ctx, + const unsigned char *d0, + const unsigned char *d1, + const unsigned char *d2, + const unsigned char *d3, + const unsigned char *d4, + const unsigned char *d5, + const unsigned char *d6, + const unsigned char *d7, + unsigned long long len) { + size_t i = 0; + size_t bytes_to_copy; + + while (i < len) { + bytes_to_copy = (size_t)len - i; + if (bytes_to_copy > 64) { + bytes_to_copy = 64; + } + memcpy(&ctx->msgblocks[64 * 0], d0 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 1], d1 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 2], d2 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 3], d3 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 4], d4 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 5], d5 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 6], d6 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 7], d7 + i, bytes_to_copy); + ctx->datalen += (unsigned int)bytes_to_copy; + i += bytes_to_copy; + if (ctx->datalen == 64) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + ctx->msglen += 512; + ctx->datalen = 0; + } + } +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256_final8x(sha256ctxx8 *ctx, + unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7) { + unsigned int i, curlen; + + // Padding + if (ctx->datalen < 56) { + for (i = 0; i < 8; ++i) { + curlen = ctx->datalen; + ctx->msgblocks[64 * i + curlen++] = 0x80; + while (curlen < 64) { + ctx->msgblocks[64 * i + curlen++] = 0x00; + } + } + } else { + for (i = 0; i < 8; ++i) { + curlen = ctx->datalen; + ctx->msgblocks[64 * i + curlen++] = 0x80; + while (curlen < 64) { + ctx->msgblocks[64 * i + curlen++] = 0x00; + } + } + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + memset(ctx->msgblocks, 0, 8 * 64); + } + + // Add length of the message to each block + ctx->msglen += ctx->datalen * 8; + for (i = 0; i < 8; i++) { + ctx->msgblocks[64 * i + 63] = (unsigned char)ctx->msglen; + ctx->msgblocks[64 * i + 62] = (unsigned char)(ctx->msglen >> 8); + ctx->msgblocks[64 * i + 61] = (unsigned char)(ctx->msglen >> 16); + ctx->msgblocks[64 * i + 60] = (unsigned char)(ctx->msglen >> 24); + ctx->msgblocks[64 * i + 59] = (unsigned char)(ctx->msglen >> 32); + ctx->msgblocks[64 * i + 58] = (unsigned char)(ctx->msglen >> 40); + ctx->msgblocks[64 * i + 57] = (unsigned char)(ctx->msglen >> 48); + ctx->msgblocks[64 * i + 56] = (unsigned char)(ctx->msglen >> 56); + } + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + + // Compute final hash output + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_transpose(ctx->s); + + // Store Hash value + STORE(out0, BYTESWAP(ctx->s[0])); + STORE(out1, BYTESWAP(ctx->s[1])); + STORE(out2, BYTESWAP(ctx->s[2])); + STORE(out3, BYTESWAP(ctx->s[3])); + STORE(out4, BYTESWAP(ctx->s[4])); + STORE(out5, BYTESWAP(ctx->s[5])); + STORE(out6, BYTESWAP(ctx->s[6])); + STORE(out7, BYTESWAP(ctx->s[7])); +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256_transform8x(sha256ctxx8 *ctx, const unsigned char *data) { + u256 s[8], w[64], T0, T1; + int i; + + // Load words and transform data correctly + for (i = 0; i < 8; i++) { + w[i] = BYTESWAP(LOAD(data + 64 * i)); + w[i + 8] = BYTESWAP(LOAD(data + 32 + 64 * i)); + } + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_transpose(w); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_transpose(w + 8); + + // Initial State + s[0] = ctx->s[0]; + s[1] = ctx->s[1]; + s[2] = ctx->s[2]; + s[3] = ctx->s[3]; + s[4] = ctx->s[4]; + s[5] = ctx->s[5]; + s[6] = ctx->s[6]; + s[7] = ctx->s[7]; + + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 0, w[0]); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 1, w[1]); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 2, w[2]); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 3, w[3]); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 4, w[4]); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 5, w[5]); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 6, w[6]); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 7, w[7]); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 8, w[8]); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 9, w[9]); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 10, w[10]); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 11, w[11]); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 12, w[12]); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 13, w[13]); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 14, w[14]); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 15, w[15]); + w[16] = ADD4_32(WSIGMA1_AVX(w[14]), w[0], w[9], WSIGMA0_AVX(w[1])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 16, w[16]); + w[17] = ADD4_32(WSIGMA1_AVX(w[15]), w[1], w[10], WSIGMA0_AVX(w[2])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 17, w[17]); + w[18] = ADD4_32(WSIGMA1_AVX(w[16]), w[2], w[11], WSIGMA0_AVX(w[3])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 18, w[18]); + w[19] = ADD4_32(WSIGMA1_AVX(w[17]), w[3], w[12], WSIGMA0_AVX(w[4])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 19, w[19]); + w[20] = ADD4_32(WSIGMA1_AVX(w[18]), w[4], w[13], WSIGMA0_AVX(w[5])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 20, w[20]); + w[21] = ADD4_32(WSIGMA1_AVX(w[19]), w[5], w[14], WSIGMA0_AVX(w[6])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 21, w[21]); + w[22] = ADD4_32(WSIGMA1_AVX(w[20]), w[6], w[15], WSIGMA0_AVX(w[7])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 22, w[22]); + w[23] = ADD4_32(WSIGMA1_AVX(w[21]), w[7], w[16], WSIGMA0_AVX(w[8])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 23, w[23]); + w[24] = ADD4_32(WSIGMA1_AVX(w[22]), w[8], w[17], WSIGMA0_AVX(w[9])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 24, w[24]); + w[25] = ADD4_32(WSIGMA1_AVX(w[23]), w[9], w[18], WSIGMA0_AVX(w[10])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 25, w[25]); + w[26] = ADD4_32(WSIGMA1_AVX(w[24]), w[10], w[19], WSIGMA0_AVX(w[11])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 26, w[26]); + w[27] = ADD4_32(WSIGMA1_AVX(w[25]), w[11], w[20], WSIGMA0_AVX(w[12])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 27, w[27]); + w[28] = ADD4_32(WSIGMA1_AVX(w[26]), w[12], w[21], WSIGMA0_AVX(w[13])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 28, w[28]); + w[29] = ADD4_32(WSIGMA1_AVX(w[27]), w[13], w[22], WSIGMA0_AVX(w[14])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 29, w[29]); + w[30] = ADD4_32(WSIGMA1_AVX(w[28]), w[14], w[23], WSIGMA0_AVX(w[15])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 30, w[30]); + w[31] = ADD4_32(WSIGMA1_AVX(w[29]), w[15], w[24], WSIGMA0_AVX(w[16])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 31, w[31]); + w[32] = ADD4_32(WSIGMA1_AVX(w[30]), w[16], w[25], WSIGMA0_AVX(w[17])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 32, w[32]); + w[33] = ADD4_32(WSIGMA1_AVX(w[31]), w[17], w[26], WSIGMA0_AVX(w[18])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 33, w[33]); + w[34] = ADD4_32(WSIGMA1_AVX(w[32]), w[18], w[27], WSIGMA0_AVX(w[19])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 34, w[34]); + w[35] = ADD4_32(WSIGMA1_AVX(w[33]), w[19], w[28], WSIGMA0_AVX(w[20])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 35, w[35]); + w[36] = ADD4_32(WSIGMA1_AVX(w[34]), w[20], w[29], WSIGMA0_AVX(w[21])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 36, w[36]); + w[37] = ADD4_32(WSIGMA1_AVX(w[35]), w[21], w[30], WSIGMA0_AVX(w[22])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 37, w[37]); + w[38] = ADD4_32(WSIGMA1_AVX(w[36]), w[22], w[31], WSIGMA0_AVX(w[23])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 38, w[38]); + w[39] = ADD4_32(WSIGMA1_AVX(w[37]), w[23], w[32], WSIGMA0_AVX(w[24])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 39, w[39]); + w[40] = ADD4_32(WSIGMA1_AVX(w[38]), w[24], w[33], WSIGMA0_AVX(w[25])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 40, w[40]); + w[41] = ADD4_32(WSIGMA1_AVX(w[39]), w[25], w[34], WSIGMA0_AVX(w[26])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 41, w[41]); + w[42] = ADD4_32(WSIGMA1_AVX(w[40]), w[26], w[35], WSIGMA0_AVX(w[27])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 42, w[42]); + w[43] = ADD4_32(WSIGMA1_AVX(w[41]), w[27], w[36], WSIGMA0_AVX(w[28])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 43, w[43]); + w[44] = ADD4_32(WSIGMA1_AVX(w[42]), w[28], w[37], WSIGMA0_AVX(w[29])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 44, w[44]); + w[45] = ADD4_32(WSIGMA1_AVX(w[43]), w[29], w[38], WSIGMA0_AVX(w[30])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 45, w[45]); + w[46] = ADD4_32(WSIGMA1_AVX(w[44]), w[30], w[39], WSIGMA0_AVX(w[31])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 46, w[46]); + w[47] = ADD4_32(WSIGMA1_AVX(w[45]), w[31], w[40], WSIGMA0_AVX(w[32])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 47, w[47]); + w[48] = ADD4_32(WSIGMA1_AVX(w[46]), w[32], w[41], WSIGMA0_AVX(w[33])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 48, w[48]); + w[49] = ADD4_32(WSIGMA1_AVX(w[47]), w[33], w[42], WSIGMA0_AVX(w[34])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 49, w[49]); + w[50] = ADD4_32(WSIGMA1_AVX(w[48]), w[34], w[43], WSIGMA0_AVX(w[35])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 50, w[50]); + w[51] = ADD4_32(WSIGMA1_AVX(w[49]), w[35], w[44], WSIGMA0_AVX(w[36])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 51, w[51]); + w[52] = ADD4_32(WSIGMA1_AVX(w[50]), w[36], w[45], WSIGMA0_AVX(w[37])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 52, w[52]); + w[53] = ADD4_32(WSIGMA1_AVX(w[51]), w[37], w[46], WSIGMA0_AVX(w[38])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 53, w[53]); + w[54] = ADD4_32(WSIGMA1_AVX(w[52]), w[38], w[47], WSIGMA0_AVX(w[39])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 54, w[54]); + w[55] = ADD4_32(WSIGMA1_AVX(w[53]), w[39], w[48], WSIGMA0_AVX(w[40])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 55, w[55]); + w[56] = ADD4_32(WSIGMA1_AVX(w[54]), w[40], w[49], WSIGMA0_AVX(w[41])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 56, w[56]); + w[57] = ADD4_32(WSIGMA1_AVX(w[55]), w[41], w[50], WSIGMA0_AVX(w[42])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 57, w[57]); + w[58] = ADD4_32(WSIGMA1_AVX(w[56]), w[42], w[51], WSIGMA0_AVX(w[43])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 58, w[58]); + w[59] = ADD4_32(WSIGMA1_AVX(w[57]), w[43], w[52], WSIGMA0_AVX(w[44])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 59, w[59]); + w[60] = ADD4_32(WSIGMA1_AVX(w[58]), w[44], w[53], WSIGMA0_AVX(w[45])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 60, w[60]); + w[61] = ADD4_32(WSIGMA1_AVX(w[59]), w[45], w[54], WSIGMA0_AVX(w[46])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 61, w[61]); + w[62] = ADD4_32(WSIGMA1_AVX(w[60]), w[46], w[55], WSIGMA0_AVX(w[47])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 62, w[62]); + w[63] = ADD4_32(WSIGMA1_AVX(w[61]), w[47], w[56], WSIGMA0_AVX(w[48])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 63, w[63]); + + // Feed Forward + ctx->s[0] = ADD32(s[0], ctx->s[0]); + ctx->s[1] = ADD32(s[1], ctx->s[1]); + ctx->s[2] = ADD32(s[2], ctx->s[2]); + ctx->s[3] = ADD32(s[3], ctx->s[3]); + ctx->s[4] = ADD32(s[4], ctx->s[4]); + ctx->s[5] = ADD32(s[5], ctx->s[5]); + ctx->s[6] = ADD32(s[6], ctx->s[6]); + ctx->s[7] = ADD32(s[7], ctx->s[7]); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/sha256avx.h b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/sha256avx.h new file mode 100644 index 00000000..f28a9e7c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/sha256avx.h @@ -0,0 +1,103 @@ +#ifndef SHA256AVX_H +#define SHA256AVX_H + +#include +#include + +static const unsigned int RC[] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +#define u32 uint32_t +#define u256 __m256i + +#define XOR _mm256_xor_si256 +#define OR _mm256_or_si256 +#define AND _mm256_and_si256 +#define ADD32 _mm256_add_epi32 +#define NOT(x) _mm256_xor_si256(x, _mm256_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1)) + +#define LOAD(src) _mm256_loadu_si256((__m256i *)(src)) +#define STORE(dest,src) _mm256_storeu_si256((__m256i *)(dest),src) + +#define BYTESWAP(x) _mm256_shuffle_epi8(x, _mm256_set_epi8(0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3,0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3)) + +#define SHIFTR32(x, y) _mm256_srli_epi32(x, y) +#define SHIFTL32(x, y) _mm256_slli_epi32(x, y) + +#define ROTR32(x, y) OR(SHIFTR32(x, y), SHIFTL32(x, 32 - (y))) +#define ROTL32(x, y) OR(SHIFTL32(x, y), SHIFTR32(x, 32 - (y))) + +#define XOR3(a, b, c) XOR(XOR(a, b), c) + +#define ADD3_32(a, b, c) ADD32(ADD32(a, b), c) +#define ADD4_32(a, b, c, d) ADD32(ADD32(ADD32(a, b), c), d) +#define ADD5_32(a, b, c, d, e) ADD32(ADD32(ADD32(ADD32(a, b), c), d), e) + +#define MAJ_AVX(a, b, c) XOR3(AND(a, b), AND(a, c), AND(b, c)) +#define CH_AVX(a, b, c) XOR(AND(a, b), AND(NOT(a), c)) + +#define SIGMA1_AVX(x) XOR3(ROTR32(x, 6), ROTR32(x, 11), ROTR32(x, 25)) +#define SIGMA0_AVX(x) XOR3(ROTR32(x, 2), ROTR32(x, 13), ROTR32(x, 22)) + +#define WSIGMA1_AVX(x) XOR3(ROTR32(x, 17), ROTR32(x, 19), SHIFTR32(x, 10)) +#define WSIGMA0_AVX(x) XOR3(ROTR32(x, 7), ROTR32(x, 18), SHIFTR32(x, 3)) + +#define SHA256ROUND_AVX(a, b, c, d, e, f, g, h, rc, w) \ + T0 = ADD5_32(h, SIGMA1_AVX(e), CH_AVX(e, f, g), _mm256_set1_epi32((int)RC[rc]), w); \ + (d) = ADD32(d, T0); \ + T1 = ADD32(SIGMA0_AVX(a), MAJ_AVX(a, b, c)); \ + (h) = ADD32(T0, T1); + +typedef struct SHA256state { + u256 s[8]; + uint8_t msgblocks[8 * 64]; + unsigned int datalen; + uint64_t msglen; +} sha256ctxx8; + + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_transpose(u256 s[8]); +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256_init_frombytes_x8(sha256ctxx8 *ctx, const uint8_t *s, unsigned long long msglen); +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256_init8x(sha256ctxx8 *ctx); +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256_update8x(sha256ctxx8 *ctx, + const unsigned char *d0, + const unsigned char *d1, + const unsigned char *d2, + const unsigned char *d3, + const unsigned char *d4, + const unsigned char *d5, + const unsigned char *d6, + const unsigned char *d7, + unsigned long long len); +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256_final8x(sha256ctxx8 *ctx, + unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256_transform8x(sha256ctxx8 *ctx, const unsigned char *data); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256_clone_statex8(sha256ctxx8 *outctx, const sha256ctxx8 *inctx); + + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/sha256x8.c b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/sha256x8.c new file mode 100644 index 00000000..36767bca --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/sha256x8.c @@ -0,0 +1,128 @@ +#include + +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_seed_statex8(sha256ctxx8 *ctx, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N; i < PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256_init8x(ctx); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256_update8x(ctx, block, block, block, block, block, block, block, block, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES); + +} + +/* This provides a wrapper around the internals of 8x parallel SHA256 */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256x8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned long long inlen) { + sha256ctxx8 ctx; + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256_init8x(&ctx); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256_update8x(&ctx, in0, in1, in2, in3, in4, in5, in6, in7, inlen); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256_final8x(&ctx, out0, out1, out2, out3, out4, out5, out6, out7); +} + +/** + * Note that inlen should be sufficiently small that it still allows for + * an array to be allocated on the stack. Typically 'in' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_mgf1x8( + unsigned char *outx8, + unsigned long outlen, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, + unsigned long inlen) { + unsigned char inbufx8[8 * ((PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES) + 4)]; + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + unsigned long i; + unsigned int j; + + memcpy(inbufx8 + 0 * (inlen + 4), in0, inlen); + memcpy(inbufx8 + 1 * (inlen + 4), in1, inlen); + memcpy(inbufx8 + 2 * (inlen + 4), in2, inlen); + memcpy(inbufx8 + 3 * (inlen + 4), in3, inlen); + memcpy(inbufx8 + 4 * (inlen + 4), in4, inlen); + memcpy(inbufx8 + 5 * (inlen + 4), in5, inlen); + memcpy(inbufx8 + 6 * (inlen + 4), in6, inlen); + memcpy(inbufx8 + 7 * (inlen + 4), in7, inlen); + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES <= outlen; i++) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ull_to_bytes(inbufx8 + inlen + j * (inlen + 4), 4, i); + } + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256x8(outx8 + 0 * outlen, + outx8 + 1 * outlen, + outx8 + 2 * outlen, + outx8 + 3 * outlen, + outx8 + 4 * outlen, + outx8 + 5 * outlen, + outx8 + 6 * outlen, + outx8 + 7 * outlen, + inbufx8 + 0 * (inlen + 4), + inbufx8 + 1 * (inlen + 4), + inbufx8 + 2 * (inlen + 4), + inbufx8 + 3 * (inlen + 4), + inbufx8 + 4 * (inlen + 4), + inbufx8 + 5 * (inlen + 4), + inbufx8 + 6 * (inlen + 4), + inbufx8 + 7 * (inlen + 4), inlen + 4); + outx8 += PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ull_to_bytes(inbufx8 + inlen + j * (inlen + 4), 4, i); + } + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256x8(outbufx8 + 0 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + inbufx8 + 0 * (inlen + 4), + inbufx8 + 1 * (inlen + 4), + inbufx8 + 2 * (inlen + 4), + inbufx8 + 3 * (inlen + 4), + inbufx8 + 4 * (inlen + 4), + inbufx8 + 5 * (inlen + 4), + inbufx8 + 6 * (inlen + 4), + inbufx8 + 7 * (inlen + 4), inlen + 4); + + for (j = 0; j < 8; j++) { + memcpy(outx8 + j * outlen, + outbufx8 + j * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outlen - i * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/sha256x8.h b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/sha256x8.h new file mode 100644 index 00000000..d174694d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/sha256x8.h @@ -0,0 +1,44 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256X8_H +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256X8_H + +#include "sha256avx.h" + +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N */ + +/* This provides a wrapper around the internals of 8x parallel SHA256 */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256x8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned long long inlen); + +/** + * Note that inlen should be sufficiently small that it still allows for + * an array to be allocated on the stack. Typically 'in' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_mgf1x8(unsigned char *outx8, unsigned long outlen, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, + unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_seed_statex8(sha256ctxx8 *ctx, const unsigned char *pub_seed); +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/sign.c b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/sign.c new file mode 100644 index 00000000..12409917 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_D - 1); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, sk + 3 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N; + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_D; i++) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_BYTES; + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N; + unsigned char mhash[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_D; i++) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_BYTES; + + if (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_BYTES, sm + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/thash.h b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/thash.h new file mode 100644 index 00000000..e12735ee --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_THASH_H +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/thash_sha256_simple.c b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/thash_sha256_simple.c new file mode 100644 index 00000000..2f900c56 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/thash_sha256_simple.c @@ -0,0 +1,75 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "sha2.h" +#include "sha256.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N bytes. + */ +static void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char outbuf[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + sha256ctx sha2_state; + + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + + /* Retrieve precomputed state containing pub_seed */ + sha256_inc_ctx_clone(&sha2_state, &hash_state_seeded->x1); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_compress_address(buf, addr); + memcpy(buf + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES, in, inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + + sha256_inc_finalize(outbuf, &sha2_state, buf, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thash( + out, buf, in, 1, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thash( + out, buf, in, 2, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LEN, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_TREES, pub_seed, addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/thash_sha256_simplex8.c b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/thash_sha256_simplex8.c new file mode 100644 index 00000000..127e3128 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/thash_sha256_simplex8.c @@ -0,0 +1,129 @@ +#include +#include + +#include "address.h" +#include "hash_state.h" +#include "params.h" +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "thashx8.h" +#include "utils.h" + +/** + * 8-way parallel version of thash; takes 8x as much input and output + */ +static void thashx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addrx8[8 * 8], + uint8_t *bufx8, + const hash_state *state_seeded) { + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + unsigned int i; + sha256ctxx8 ctx; + + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256_clone_statex8(&ctx, &state_seeded->x8); + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_compress_address(bufx8 + i * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N), + addrx8 + i * 8); + } + + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + + 0 * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N), in0, inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + + 1 * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N), in1, inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + + 2 * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N), in2, inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + + 3 * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N), in3, inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + + 4 * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N), in4, inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + + 5 * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N), in5, inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + + 6 * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N), in6, inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + + 7 * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N), in7, inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256_update8x(&ctx, + bufx8 + 0 * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N), + bufx8 + 1 * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N), + bufx8 + 2 * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N), + bufx8 + 3 * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N), + bufx8 + 4 * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N), + bufx8 + 5 * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N), + bufx8 + 6 * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N), + bufx8 + 7 * (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N), + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256_final8x(&ctx, + outbufx8 + 0 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + + memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); +} + +#define thashx8_variant_impl(name, size) \ + void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thashx8_##name(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + unsigned char *out4, \ + unsigned char *out5, \ + unsigned char *out6, \ + unsigned char *out7, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *in4, \ + const unsigned char *in5, \ + const unsigned char *in6, \ + const unsigned char *in7, \ + const unsigned char *pub_seed, \ + uint32_t addrx8[8*8], \ + const hash_state *state_seeded) \ + { \ + const unsigned int inblocks = (size); \ + uint8_t bufx8[8*(PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_SHA256_ADDR_BYTES + (size)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N)]; \ + thashx8(out0, out1, out2, out3, out4, out5, out6, out7, \ + in0, in1, in2, in3, in4, in5, in6, in7, inblocks, \ + pub_seed, addrx8, bufx8, state_seeded); \ + } + +thashx8_variant_impl(1, 1) +thashx8_variant_impl(2, 2) +thashx8_variant_impl(WOTS_LEN, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LEN) +thashx8_variant_impl(FORS_TREES, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_TREES) + +#undef thashx8_variant_impl diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/thashx8.h b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/thashx8.h new file mode 100644 index 00000000..6b91ada3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/thashx8.h @@ -0,0 +1,39 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_THASHX8_H +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_THASHX8_H + +#include + +#include "hash_state.h" +#include "sha256avx.h" + + +#define thashx8_variant(name) \ + void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thashx8_##name( \ + unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + unsigned char *out4, \ + unsigned char *out5, \ + unsigned char *out6, \ + unsigned char *out7, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *in4, \ + const unsigned char *in5, \ + const unsigned char *in6, \ + const unsigned char *in7, \ + const unsigned char *pub_seed, \ + uint32_t addrx8[8*8], \ + const hash_state *state_seeded) + + +thashx8_variant(1); +thashx8_variant(2); +thashx8_variant(WOTS_LEN); +thashx8_variant(FORS_TREES); + +#undef thashx8_variant +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/utils.c b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/utils.c new file mode 100644 index 00000000..795c553f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, leaf, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + memcpy(buffer + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, auth_path, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thash_2( + buffer + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + } else { + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, auth_path, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/utils.h b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/utils.h new file mode 100644 index 00000000..a3eba0d4 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_UTILS_H +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/utilsx8.c b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/utilsx8.c new file mode 100644 index 00000000..99673117 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/utilsx8.c @@ -0,0 +1,172 @@ +#include + +#include "address.h" +#include "params.h" +#include "thashx8.h" +#include "utils.h" + +#include "utilsx8.h" + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void treehashx8(unsigned char *rootx8, unsigned char *auth_pathx8, + unsigned char *stackx8, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t leaf_idx[8], uint32_t idx_offset[8], + uint32_t tree_height, + void (*gen_leafx8)( + unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + unsigned char * /* leaf4 */, + unsigned char * /* leaf5 */, + unsigned char * /* leaf6 */, + unsigned char * /* leaf7 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + uint32_t /* addr_idx4 */, + uint32_t /* addr_idx5 */, + uint32_t /* addr_idx6 */, + uint32_t /* addr_idx7 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx8[8 * 8], + const hash_state *state_seeded) { + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + unsigned int j; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leafx8(stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + sk_seed, pub_seed, + idx + idx_offset[0], + idx + idx_offset[1], + idx + idx_offset[2], + idx + idx_offset[3], + idx + idx_offset[4], + idx + idx_offset[5], + idx + idx_offset[6], + idx + idx_offset[7], + tree_addrx8, + state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + for (j = 0; j < 8; j++) { + if ((leaf_idx[j] ^ 0x1) == idx) { + memcpy(auth_pathx8 + j * tree_height * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + (offset - 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + } + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_height(tree_addrx8 + j * 8, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_tree_index(tree_addrx8 + j * 8, + tree_idx + (idx_offset[j] >> (heights[offset - 1] + 1))); + } + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thashx8_2(stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + pub_seed, tree_addrx8, state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + for (j = 0; j < 8; j++) { + if (((leaf_idx[j] >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_pathx8 + j * tree_height * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + heights[offset - 1]*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N + (offset - 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + } + } + } + } + + for (j = 0; j < 8; j++) { + memcpy(rootx8 + j * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + } +} + +/* The wrappers below ensure we used fixed-size buffers on the stack (no VLAs) */ + + +#define treehashx8_variant(name, size) \ + void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_treehashx8_##name( \ + unsigned char *rootx8, unsigned char *auth_pathx8, \ + const unsigned char *sk_seed, const unsigned char *pub_seed, \ + const uint32_t leaf_idx[8], uint32_t idx_offset[8], \ + void (*gen_leafx8)( \ + unsigned char* /* leaf0 */, \ + unsigned char* /* leaf1 */, \ + unsigned char* /* leaf2 */, \ + unsigned char* /* leaf3 */, \ + unsigned char* /* leaf4 */, \ + unsigned char* /* leaf5 */, \ + unsigned char* /* leaf6 */, \ + unsigned char* /* leaf7 */, \ + const unsigned char* /* sk_seed */, \ + const unsigned char* /* pub_seed */, \ + uint32_t /* addr_idx0 */, \ + uint32_t /* addr_idx1 */, \ + uint32_t /* addr_idx2 */, \ + uint32_t /* addr_idx3 */, \ + uint32_t /* addr_idx4 */, \ + uint32_t /* addr_idx5 */, \ + uint32_t /* addr_idx6 */, \ + uint32_t /* addr_idx7 */, \ + const uint32_t[8] /* tree_addr */, \ + const hash_state* /* state_seeded */), \ + uint32_t tree_addrx8[8*8], \ + const hash_state *state_seeded) \ + { \ + const uint32_t tree_height = (size); \ + unsigned char stackx8[8*((size) + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N]; \ + unsigned int heights[(size) + 1]; \ + treehashx8(rootx8, auth_pathx8, stackx8, heights, sk_seed, pub_seed, \ + leaf_idx, idx_offset, tree_height, gen_leafx8, tree_addrx8, state_seeded); \ + } + +treehashx8_variant(FORS_HEIGHT, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_FORS_HEIGHT) + +#undef treehashx8_variant diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/utilsx8.h b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/utilsx8.h new file mode 100644 index 00000000..d63de069 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/utilsx8.h @@ -0,0 +1,46 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_UTILSX8_H +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_UTILSX8_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_treehashx8_FORS_HEIGHT( + unsigned char *rootx8, unsigned char *auth_pathx8, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t leaf_idx[8], uint32_t idx_offset[8], + void (*gen_leafx8)( + unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + unsigned char * /* leaf4 */, + unsigned char * /* leaf5 */, + unsigned char * /* leaf6 */, + unsigned char * /* leaf7 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + uint32_t /* addr_idx4 */, + uint32_t /* addr_idx5 */, + uint32_t /* addr_idx6 */, + uint32_t /* addr_idx7 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx8[8 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/wots.c b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/wots.c new file mode 100644 index 00000000..ca4af0c5 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/wots.c @@ -0,0 +1,240 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "hashx8.h" +#include "params.h" +#include "thash.h" +#include "thashx8.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], const hash_state *state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_prf_addr(sk, sk_seed, wots_addr, state_seeded); +} + +/** + * 8-way parallel version of wots_gen_sk; expects 8x as much space in sk + */ +static void wots_gen_skx8(unsigned char *skx8, const unsigned char *sk_seed, + uint32_t wots_addrx8[8 * 8]) { + unsigned int j; + + /* Make sure that the hash address is actually zeroed. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_hash_addr(wots_addrx8 + j * 8, 0); + } + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_prf_addrx8(skx8 + 0 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + skx8 + 1 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + skx8 + 2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + skx8 + 3 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + skx8 + 4 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + skx8 + 5 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + skx8 + 6 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + skx8 + 7 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + sk_seed, wots_addrx8); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_W; i++) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thash_1(out, out, pub_seed, addr, state_seeded); + } +} + +/** + * 8-way parallel version of gen_chain; expects 8x as much space in out, and + * 8x as much space in inx8. Assumes start and step identical across chains. + */ +static void gen_chainx8(unsigned char *outx8, const unsigned char *inx8, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addrx8[8 * 8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + /* Initialize outx8 with the value at position 'start'. */ + memcpy(outx8, inx8, 8 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_W; i++) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_hash_addr(addrx8 + j * 8, i); + } + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_thashx8_1(outx8 + 0 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + outx8 + 1 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + outx8 + 2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + outx8 + 3 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + outx8 + 4 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + outx8 + 5 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + outx8 + 6 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + outx8 + 7 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + outx8 + 0 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + outx8 + 1 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + outx8 + 2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + outx8 + 3 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + outx8 + 4 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + outx8 + 5 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + outx8 + 6 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + outx8 + 7 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + pub_seed, addrx8, state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { + int in = 0; + int out = 0; + unsigned char total = 0; + int bits = 0; + int consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LOGW; + output[out] = (unsigned int)(total >> bits) & (PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_W - 1); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_wots_gen_pk(unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + uint32_t addrx8[8 * 8]; + unsigned char pkbuf[8 * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N]; + + for (j = 0; j < 8; j++) { + memcpy(addrx8 + j * 8, addr, sizeof(uint32_t) * 8); + } + + /* The last iteration typically does not have complete set of 4 chains, + but because we use pkbuf, this is not an issue -- we still do as many + in parallel as possible. */ + for (i = 0; i < ((PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LEN + 7) & ~0x7); i += 8) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_chain_addr(addrx8 + j * 8, i + j); + } + wots_gen_skx8(pkbuf, sk_seed, addrx8); + gen_chainx8(pkbuf, pkbuf, 0, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_W - 1, pub_seed, addrx8, state_seeded); + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LEN) { + memcpy(pk + (i + j)*PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, pkbuf + j * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N); + } + } + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_wots_sign(unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, sk_seed, addr, state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, sig + i * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, 0, lengths[i], pub_seed, addr, state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_wots_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, sig + i * PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_N, + lengths[i], PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_W - 1 - lengths[i], pub_seed, addr, state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/wots.h b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/wots.h new file mode 100644 index 00000000..dd8c2c5c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/avx2/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_H +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/LICENSE b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..350ddf6b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-sha256-128f-simple_clean.lib +OBJECTS=address.obj wots.obj utils.obj fors.obj sign.obj hash_sha256.obj thash_sha256_simple.obj sha256.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/address.c b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/address.c new file mode 100644 index 00000000..6e5311ff --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/address.h b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/address.h new file mode 100644 index 00000000..79a572be --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ADDRESS_H +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/api.h b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/api.h new file mode 100644 index 00000000..cdd6b60d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_API_H +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_CRYPTO_SECRETKEYBYTES 64 +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_CRYPTO_PUBLICKEYBYTES 32 +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_CRYPTO_BYTES 16976 +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_CRYPTO_SEEDBYTES 48 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/fors.c b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/fors.c new file mode 100644 index 00000000..22c50b0c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/fors.c @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "thash.h" +#include "utils.h" + +static void fors_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t fors_leaf_addr[8], const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_prf_addr( + sk, sk_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_thash_1( + leaf, sk, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t fors_tree_addr[8], + const hash_state *hash_state_seeded) { + uint32_t fors_leaf_addr[8] = {0}; + + /* Only copy the parts that must be kept in fors_leaf_addr. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_copy_keypair_addr( + fors_leaf_addr, fors_tree_addr); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_type( + fors_leaf_addr, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_tree_index( + fors_leaf_addr, addr_idx); + + fors_gen_sk(leaf, sk_seed, fors_leaf_addr, hash_state_seeded); + fors_sk_to_leaf(leaf, leaf, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_copy_keypair_addr( + fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_copy_keypair_addr( + fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_type( + fors_tree_addr, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_type( + fors_pk_addr, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_tree_height( + fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_tree_index( + fors_tree_addr, indices[i] + idx_offset); + + /* Include the secret key part that produces the selected leaf node. */ + fors_gen_sk(sig, sk_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N; + + /* Compute the authentication path for this leaf node. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_treehash_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, sig, sk_seed, pub_seed, + indices[i], idx_offset, fors_gen_leaf, fors_tree_addr, + hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_thash_FORS_TREES( + pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_compute_root(roots + i * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, leaf, indices[i], idx_offset, sig, + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_HEIGHT, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/fors.h b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/fors.h new file mode 100644 index 00000000..4e2aba01 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_H +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/hash.h b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/hash.h new file mode 100644 index 00000000..1e6eef65 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_HASH_H +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/hash_sha256.c b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/hash_sha256.c new file mode 100644 index 00000000..0420a41c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/hash_sha256.c @@ -0,0 +1,162 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "sha2.h" +#include "sha256.h" + +/* For SHA256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_seed_state(hash_state_seeded, pub_seed); + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* Clean up hash state */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_destroy_hash_function(hash_state *hash_state_seeded) { + sha256_inc_ctx_release(hash_state_seeded); +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N bytes and an address + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_ADDR_BYTES]; + unsigned char outbuf[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_compress_address(buf + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, addr); + + sha256(outbuf, buf, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_ADDR_BYTES); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed as a key + * for HMAC, and an optional randomization value prefixed to the message. + * This requires m to have at least PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N space + * available in front of the pointer, i.e. before the message to use for the + * prefix. This is necessary to prevent having to move the message around (and + * allocate memory for it). + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES]; + sha256ctx state; + int i; + + /* This implements HMAC-SHA256 */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N; i++) { + buf[i] = 0x36 ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, 0x36, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N); + + sha256_inc_init(&state); + sha256_inc_blocks(&state, buf, 1); + + memcpy(buf, optrand, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N); + + /* If optrand + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N + mlen < PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_BLOCK_BYTES) { + memcpy(buf + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, m, mlen); + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_BLOCK_BYTES, &state, + buf, mlen + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(buf + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, m, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N); + sha256_inc_blocks(&state, buf, 1); + + m += PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N; + mlen -= PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N; + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_BLOCK_BYTES, &state, m, mlen); + } + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N; i++) { + buf[i] = 0x5c ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, 0x5c, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N); + + sha256(buf, buf, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES); + memcpy(R, buf, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_TREE_BITS (PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_TREE_HEIGHT * (PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_D - 1)) +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_TREE_BYTES ((PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_LEAF_BITS PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_LEAF_BYTES ((PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_DGST_BYTES (PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_TREE_BYTES + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_LEAF_BYTES) + + unsigned char seed[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES + 4]; + + /* Round to nearest multiple of PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_BLOCK_BYTES */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_INBLOCKS (((PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_PK_BYTES + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - 1) & \ + -PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_BLOCK_BYTES) / PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_BLOCK_BYTES) + unsigned char inbuf[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_BLOCK_BYTES]; + + unsigned char buf[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_DGST_BYTES]; + unsigned char *bufp = buf; + sha256ctx state; + + sha256_inc_init(&state); + + memcpy(inbuf, R, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N); + memcpy(inbuf + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, pk, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_PK_BYTES); + + /* If R + pk + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_PK_BYTES + mlen < PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_BLOCK_BYTES) { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_PK_BYTES, m, mlen); + sha256_inc_finalize(seed, &state, inbuf, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_PK_BYTES + mlen); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_PK_BYTES, m, + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N - PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_PK_BYTES); + sha256_inc_blocks(&state, inbuf, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_INBLOCKS); + + m += PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N - PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_PK_BYTES; + mlen -= PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N - PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_PK_BYTES; + sha256_inc_finalize(seed, &state, m, mlen); + } + + /* By doing this in two steps, we prevent hashing the message twice; + otherwise each iteration in MGF1 would hash the message again. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_mgf1(bufp, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_DGST_BYTES, seed, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_bytes_to_ull(bufp, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/hash_state.h b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/hash_state.h new file mode 100644 index 00000000..19fc335e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/hash_state.h @@ -0,0 +1,26 @@ +#ifndef SPX_HASH_STATE_H +#define SPX_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + */ + +#include "sha2.h" +#define hash_state sha256ctx + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/params.h b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/params.h new file mode 100644 index 00000000..2809f0ae --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_PARAMS_H +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N 16 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FULL_HEIGHT 60 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_D 20 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_HEIGHT 9 +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_TREES 30 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N / PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_LEN (PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_LEN1 + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_BYTES (PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_PK_BYTES PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_TREE_HEIGHT (PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FULL_HEIGHT / PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_BYTES ((PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_PK_BYTES PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_BYTES (PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_BYTES + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_D * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FULL_HEIGHT * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_PK_BYTES (2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SK_BYTES (2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/sha256.c b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/sha256.c new file mode 100644 index 00000000..03d0eb99 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/sha256.c @@ -0,0 +1,71 @@ +/* Based on the public domain implementation in + * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html + * by D. J. Bernstein */ + +#include +#include +#include + +#include "sha2.h" +#include "sha256.h" +#include "utils.h" + +/* + * Compresses an address to a 22-byte sequence. + * This reduces the number of required SHA256 compression calls, as the last + * block of input is padded with at least 65 bits. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_compress_address(unsigned char *out, const uint32_t addr[8]) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ull_to_bytes(out, 1, addr[0]); /* drop 3 bytes of the layer field */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ull_to_bytes(out + 1, 4, addr[2]); /* drop the highest tree address word */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ull_to_bytes(out + 5, 4, addr[3]); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ull_to_bytes(out + 9, 1, addr[4]); /* drop 3 bytes of the type field */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ull_to_bytes(out + 10, 4, addr[5]); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ull_to_bytes(out + 14, 4, addr[6]); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ull_to_bytes(out + 18, 4, addr[7]); +} + +/** + * Requires 'input_plus_four_bytes' to have 'inlen' + 4 bytes, so that the last + * four bytes can be used for the counter. Typically 'input' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen) { + unsigned char outbuf[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES]; + unsigned long i; + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES <= outlen; i++) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(out, input_plus_four_bytes, inlen + 4); + out += PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + if (outlen > i * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(outbuf, input_plus_four_bytes, inlen + 4); + memcpy(out, outbuf, outlen - i * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES); + } +} + + +/** + * Absorb the constant pub_seed using one round of the compression function + * This initializes hash_state_seeded, which can then be reused in thash + **/ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N; i < PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + sha256_inc_init(hash_state_seeded); + sha256_inc_blocks(hash_state_seeded, block, 1); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/sha256.h b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/sha256.h new file mode 100644 index 00000000..9536d88b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/sha256.h @@ -0,0 +1,21 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_H +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_H + +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N */ +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_ADDR_BYTES 22 + +#include +#include + +#include "sha2.h" + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_compress_address(unsigned char *out, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/sign.c b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/sign.c new file mode 100644 index 00000000..49320f50 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_D - 1); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, sk + 3 * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N; + + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_BYTES; + + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N; + unsigned char mhash[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_BYTES; + + if (PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_BYTES, sm + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/thash.h b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/thash.h new file mode 100644 index 00000000..353f27d0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_THASH_H +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/thash_sha256_simple.c b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/thash_sha256_simple.c new file mode 100644 index 00000000..ef0d3a93 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/thash_sha256_simple.c @@ -0,0 +1,75 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "sha2.h" +#include "sha256.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N bytes. + */ +static void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char outbuf[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES]; + sha256ctx sha2_state; + + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + + /* Retrieve precomputed state containing pub_seed */ + sha256_inc_ctx_clone(&sha2_state, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_compress_address(buf, addr); + memcpy(buf + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_ADDR_BYTES, in, inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N); + + sha256_inc_finalize(outbuf, &sha2_state, buf, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_thash( + out, buf, in, 1, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_thash( + out, buf, in, 2, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_ADDR_BYTES + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_LEN, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_SHA256_ADDR_BYTES + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_TREES, pub_seed, addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/utils.c b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/utils.c new file mode 100644 index 00000000..e7e70b35 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, leaf, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N); + memcpy(buffer + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_thash_2( + buffer + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N); + } else { + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/utils.h b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/utils.h new file mode 100644 index 00000000..6d919a5f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_UTILS_H +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/wots.c b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/wots.c new file mode 100644 index 00000000..f67a4dbc --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/wots.c @@ -0,0 +1,167 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], + const hash_state *hash_state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_prf_addr(sk, sk_seed, wots_addr, hash_state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_W; i++) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_thash_1( + out, out, pub_seed, addr, hash_state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const size_t out_len, + const unsigned char *input) { + size_t in = 0; + size_t out = 0; + unsigned char total = 0; + unsigned int bits = 0; + size_t consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_LOGW; + output[out] = (unsigned int)((total >> bits) & (PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_W - 1)); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, + const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_ull_to_bytes( + csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(pk + i * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, pk + i * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, + 0, PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_W - 1, pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, 0, lengths[i], pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_N, + lengths[i], PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_W - 1 - lengths[i], pub_seed, addr, + hash_state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/wots.h b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/wots.h new file mode 100644 index 00000000..e41546d5 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128f-simple/clean/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_H +#define PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128FSIMPLE_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/META.yml b/crypto_sign/sphincs/sphincs-sha256-128s-robust/META.yml new file mode 100644 index 00000000..7c66be01 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/META.yml @@ -0,0 +1,35 @@ +name: SPHINCS+ +type: signature +claimed-nist-level: 1 +length-public-key: 32 +length-secret-key: 64 +length-signature: 8080 +testvectors-sha256: 29d6d0dd732078d177779a61b7654bbe59fcf2ecb9bcd2ade8391791a6570a63 +nistkat-sha256: 4ddcad5141217340f9f28afdcf25cc236d7975bcfb41b39660e84568a9a461fe +principal-submitters: + - Andreas Hülsing +auxiliary-submitters: + - Jean-Philippe Aumasson + - Daniel J. Bernstein, + - Christoph Dobraunig + - Maria Eichlseder + - Scott Fluhrer + - Stefan-Lukas Gazdag + - Panos Kampanakis + - Stefan Kölbl + - Tanja Lange + - Martin M. Lauridsen + - Florian Mendel + - Ruben Niederhagen + - Christian Rechberger + - Joost Rijneveld + - Peter Schwabe +implementations: + - name: clean + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + - name: avx2 + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + supported_platforms: + - architecture: x86_64 + required_flags: + - avx2 diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/LICENSE b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/Makefile.Microsoft_nmake new file mode 100644 index 00000000..1669d5b7 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-sha256-128s-robust_avx2.lib +OBJECTS=address.obj wots.obj utils.obj utilsx8.obj sha256avx.obj sha256x8.obj fors.obj sign.obj hash_sha256.obj thash_sha256_robust.obj hash_sha256x8.obj thash_sha256_robustx8.obj sha256.obj + +CFLAGS=/nologo /arch:AVX2 /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/address.c b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/address.c new file mode 100644 index 00000000..301c88f8 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/address.h b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/address.h new file mode 100644 index 00000000..b6d9b30d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDRESS_H +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/api.h b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/api.h new file mode 100644 index 00000000..50be23a7 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_API_H +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_CRYPTO_SECRETKEYBYTES 64 +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES 32 +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_CRYPTO_BYTES 8080 +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_CRYPTO_SEEDBYTES 48 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/fors.c b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/fors.c new file mode 100644 index 00000000..fdcdcfc9 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/fors.c @@ -0,0 +1,240 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hashx8.h" +#include "thash.h" +#include "thashx8.h" +#include "utils.h" +#include "utilsx8.h" + +static void fors_gen_skx8(unsigned char *sk0, + unsigned char *sk1, + unsigned char *sk2, + unsigned char *sk3, + unsigned char *sk4, + unsigned char *sk5, + unsigned char *sk6, + unsigned char *sk7, const unsigned char *sk_seed, + uint32_t fors_leaf_addrx8[8 * 8]) { + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_prf_addrx8(sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7, + sk_seed, fors_leaf_addrx8); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_thash_1(leaf, sk, pub_seed, fors_leaf_addr, state_seeded); +} + +static void fors_sk_to_leafx8(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + unsigned char *leaf4, + unsigned char *leaf5, + unsigned char *leaf6, + unsigned char *leaf7, + const unsigned char *sk0, + const unsigned char *sk1, + const unsigned char *sk2, + const unsigned char *sk3, + const unsigned char *sk4, + const unsigned char *sk5, + const unsigned char *sk6, + const unsigned char *sk7, + const unsigned char *pub_seed, + uint32_t fors_leaf_addrx8[8 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_thashx8_1(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7, + pub_seed, fors_leaf_addrx8, state_seeded); +} + +static void fors_gen_leafx8(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + unsigned char *leaf4, + unsigned char *leaf5, + unsigned char *leaf6, + unsigned char *leaf7, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx0, + uint32_t addr_idx1, + uint32_t addr_idx2, + uint32_t addr_idx3, + uint32_t addr_idx4, + uint32_t addr_idx5, + uint32_t addr_idx6, + uint32_t addr_idx7, + const uint32_t fors_tree_addr[8], + const hash_state *state_seeded) { + uint32_t fors_leaf_addrx8[8 * 8] = {0}; + unsigned int j; + + /* Only copy the parts that must be kept in fors_leaf_addrx8. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_copy_keypair_addr(fors_leaf_addrx8 + j * 8, fors_tree_addr); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_type(fors_leaf_addrx8 + j * 8, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 0 * 8, addr_idx0); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 1 * 8, addr_idx1); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 2 * 8, addr_idx2); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 3 * 8, addr_idx3); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 4 * 8, addr_idx4); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 5 * 8, addr_idx5); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 6 * 8, addr_idx6); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 7 * 8, addr_idx7); + + fors_gen_skx8(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + sk_seed, fors_leaf_addrx8); + fors_sk_to_leafx8(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + pub_seed, fors_leaf_addrx8, state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + /* Round up to multiple of 8 to prevent out-of-bounds for x8 parallelism */ + uint32_t indices[(PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_TREES + 7) & ~7] = {0}; + unsigned char roots[((PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_TREES + 7) & ~7) * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N]; + /* Sign to a buffer, since we may not have a nice multiple of 8 and would + otherwise overrun the signature. */ + unsigned char sigbufx8[8 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N * (1 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_HEIGHT)]; + uint32_t fors_tree_addrx8[8 * 8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset[8] = {0}; + unsigned int i, j; + + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_copy_keypair_addr(fors_tree_addrx8 + j * 8, fors_addr); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_type(fors_tree_addrx8 + j * 8, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < ((PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_TREES + 7) & ~0x7); i += 8) { + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_TREES) { + idx_offset[j] = (i + j) * (1 << PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_height(fors_tree_addrx8 + j * 8, 0); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_index(fors_tree_addrx8 + j * 8, + indices[i + j] + idx_offset[j]); + } + } + + /* Include the secret key part that produces the selected leaf nodes. */ + fors_gen_skx8(sigbufx8 + 0 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + sigbufx8 + 1 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + sigbufx8 + 2 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + sigbufx8 + 3 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + sigbufx8 + 4 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + sigbufx8 + 5 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + sigbufx8 + 6 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + sigbufx8 + 7 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + sk_seed, fors_tree_addrx8); + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_treehashx8_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, sigbufx8 + 8 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, sk_seed, pub_seed, + &indices[i], idx_offset, fors_gen_leafx8, fors_tree_addrx8, + state_seeded); + + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_TREES) { + memcpy(sig, sigbufx8 + j * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + memcpy(sig + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + sigbufx8 + 8 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + j * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_HEIGHT, + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_HEIGHT); + sig += PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N * (1 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_HEIGHT); + } + } + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_fors_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_compute_root(roots + i * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, leaf, indices[i], idx_offset, + sig, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_HEIGHT, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/fors.h b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/fors.h new file mode 100644 index 00000000..9e39b3e2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_H +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/hash.h b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/hash.h new file mode 100644 index 00000000..602390ba --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_HASH_H +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/hash_sha256.c b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/hash_sha256.c new file mode 100644 index 00000000..3bb03320 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/hash_sha256.c @@ -0,0 +1,166 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "sha2.h" +#include "sha256.h" +#include "sha256x8.h" + +/** + * Initializes the hash function states + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed) { + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_seed_state(&hash_state_seeded->x1, pub_seed); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_seed_statex8(&hash_state_seeded->x8, pub_seed); + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/** + * Cleans up the hash function states + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_destroy_hash_function(hash_state *hash_state_seeded) { + sha256_inc_ctx_release(&hash_state_seeded->x1); +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N bytes and an address + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_prf_addr(unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES]; + unsigned char outbuf[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_compress_address(buf + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, addr); + + sha256(outbuf, buf, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed as a key + * for HMAC, and an optional randomization value prefixed to the message. + * This requires m to have at least PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N space + * available in front of the pointer, i.e. before the message to use for the + * prefix. This is necessary to prevent having to move the message around (and + * allocate memory for it). + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES]; + sha256ctx state; + int i; + + /* This implements HMAC-SHA256 */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N; i++) { + buf[i] = 0x36 ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, 0x36, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + + sha256_inc_init(&state); + sha256_inc_blocks(&state, buf, 1); + + memcpy(buf, optrand, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + + /* If optrand + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + mlen < PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES) { + memcpy(buf + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, m, mlen); + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES, &state, + buf, mlen + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(buf + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, m, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + sha256_inc_blocks(&state, buf, 1); + + m += PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N; + mlen -= PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N; + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES, &state, m, mlen); + } + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N; i++) { + buf[i] = 0x5c ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, 0x5c, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + + sha256(buf, buf, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES); + memcpy(R, buf, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_TREE_BITS (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_TREE_HEIGHT * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_D - 1)) +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_TREE_BYTES ((PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_LEAF_BITS PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_LEAF_BYTES ((PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_DGST_BYTES (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_TREE_BYTES + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_LEAF_BYTES) + + unsigned char seed[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES + 4]; + + /* Round to nearest multiple of PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_INBLOCKS (((PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_PK_BYTES + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES - 1) & \ + -PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES) / PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES) + unsigned char inbuf[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES]; + + unsigned char buf[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_DGST_BYTES]; + unsigned char *bufp = buf; + sha256ctx state; + + sha256_inc_init(&state); + + memcpy(inbuf, R, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + memcpy(inbuf + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, pk, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_PK_BYTES); + + /* If R + pk + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_PK_BYTES + mlen < PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES) { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_PK_BYTES, m, mlen); + sha256_inc_finalize(seed, &state, inbuf, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_PK_BYTES + mlen); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_PK_BYTES, m, + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N - PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_PK_BYTES); + sha256_inc_blocks(&state, inbuf, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_INBLOCKS); + + m += PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N - PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_PK_BYTES; + mlen -= PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N - PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_PK_BYTES; + sha256_inc_finalize(seed, &state, m, mlen); + } + + /* By doing this in two steps, we prevent hashing the message twice; + otherwise each iteration in MGF1 would hash the message again. */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_mgf1(bufp, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_DGST_BYTES, seed, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_bytes_to_ull(bufp, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/hash_sha256x8.c b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/hash_sha256x8.c new file mode 100644 index 00000000..c71a6054 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/hash_sha256x8.c @@ -0,0 +1,61 @@ +#include +#include + +#include "address.h" +#include "hashx8.h" +#include "params.h" +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "utils.h" + +/* + * 8-way parallel version of prf_addr; takes 8x as much input and output + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_prf_addrx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *key, + const uint32_t addrx8[8 * 8]) { + unsigned char bufx8[8 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES)]; + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned int j; + + for (j = 0; j < 8; j++) { + memcpy(bufx8 + j * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES), key, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_compress_address(bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + j * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES), + addrx8 + j * 8); + } + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256x8(outbufx8 + 0 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, + bufx8 + 0 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 1 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 2 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 3 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 4 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 5 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 6 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 7 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES), + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES); + + memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/hash_state.h b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/hash_state.h new file mode 100644 index 00000000..835b0ddd --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/hash_state.h @@ -0,0 +1,33 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_HASH_STATE_H +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * We use a struct to differentiate between the x1 and x8 variants of SHA256. + */ + +#include "sha2.h" +#include "sha256avx.h" + +typedef struct { + sha256ctx x1; + sha256ctxx8 x8; +} hash_state; + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/hashx8.h b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/hashx8.h new file mode 100644 index 00000000..68617c21 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/hashx8.h @@ -0,0 +1,19 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_HASHX8_H +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_HASHX8_H + +#include + +#include "params.h" + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_prf_addrx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *key, + const uint32_t addrx8[8 * 8]); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/params.h b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/params.h new file mode 100644 index 00000000..b12cd741 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_PARAMS_H +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N 16 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FULL_HEIGHT 64 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_D 8 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_HEIGHT 15 +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_TREES 10 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N / PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LEN (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LEN1 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_BYTES (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_PK_BYTES PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_TREE_HEIGHT (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FULL_HEIGHT / PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_BYTES ((PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_PK_BYTES PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_BYTES (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_BYTES + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_D * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FULL_HEIGHT * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_PK_BYTES (2 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SK_BYTES (2 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/sha256.c b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/sha256.c new file mode 100644 index 00000000..f722e3ec --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/sha256.c @@ -0,0 +1,71 @@ +/* Based on the public domain implementation in + * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html + * by D. J. Bernstein */ + +#include +#include +#include + +#include "sha2.h" +#include "sha256.h" +#include "utils.h" + +/* + * Compresses an address to a 22-byte sequence. + * This reduces the number of required SHA256 compression calls, as the last + * block of input is padded with at least 65 bits. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_compress_address(unsigned char *out, const uint32_t addr[8]) { + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ull_to_bytes(out, 1, addr[0]); /* drop 3 bytes of the layer field */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ull_to_bytes(out + 1, 4, addr[2]); /* drop the highest tree address word */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ull_to_bytes(out + 5, 4, addr[3]); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ull_to_bytes(out + 9, 1, addr[4]); /* drop 3 bytes of the type field */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ull_to_bytes(out + 10, 4, addr[5]); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ull_to_bytes(out + 14, 4, addr[6]); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ull_to_bytes(out + 18, 4, addr[7]); +} + +/** + * Requires 'input_plus_four_bytes' to have 'inlen' + 4 bytes, so that the last + * four bytes can be used for the counter. Typically 'input' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen) { + unsigned char outbuf[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned long i; + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES <= outlen; i++) { + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(out, input_plus_four_bytes, inlen + 4); + out += PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + if (outlen > i * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES) { + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(outbuf, input_plus_four_bytes, inlen + 4); + memcpy(out, outbuf, outlen - i * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES); + } +} + + +/** + * Absorb the constant pub_seed using one round of the compression function + * This initializes hash_state_seeded, which can then be reused in thash + **/ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N; i < PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + sha256_inc_init(hash_state_seeded); + sha256_inc_blocks(hash_state_seeded, block, 1); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/sha256.h b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/sha256.h new file mode 100644 index 00000000..51bf8c86 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/sha256.h @@ -0,0 +1,21 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_H +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_H + +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES 22 + +#include +#include + +#include "sha2.h" + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_compress_address(unsigned char *out, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/sha256avx.c b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/sha256avx.c new file mode 100644 index 00000000..28f4fda8 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/sha256avx.c @@ -0,0 +1,296 @@ +#include +#include +#include + +#include "sha256avx.h" + +// Transpose 8 vectors containing 32-bit values +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_transpose(u256 s[8]) { + u256 tmp0[8]; + u256 tmp1[8]; + tmp0[0] = _mm256_unpacklo_epi32(s[0], s[1]); + tmp0[1] = _mm256_unpackhi_epi32(s[0], s[1]); + tmp0[2] = _mm256_unpacklo_epi32(s[2], s[3]); + tmp0[3] = _mm256_unpackhi_epi32(s[2], s[3]); + tmp0[4] = _mm256_unpacklo_epi32(s[4], s[5]); + tmp0[5] = _mm256_unpackhi_epi32(s[4], s[5]); + tmp0[6] = _mm256_unpacklo_epi32(s[6], s[7]); + tmp0[7] = _mm256_unpackhi_epi32(s[6], s[7]); + tmp1[0] = _mm256_unpacklo_epi64(tmp0[0], tmp0[2]); + tmp1[1] = _mm256_unpackhi_epi64(tmp0[0], tmp0[2]); + tmp1[2] = _mm256_unpacklo_epi64(tmp0[1], tmp0[3]); + tmp1[3] = _mm256_unpackhi_epi64(tmp0[1], tmp0[3]); + tmp1[4] = _mm256_unpacklo_epi64(tmp0[4], tmp0[6]); + tmp1[5] = _mm256_unpackhi_epi64(tmp0[4], tmp0[6]); + tmp1[6] = _mm256_unpacklo_epi64(tmp0[5], tmp0[7]); + tmp1[7] = _mm256_unpackhi_epi64(tmp0[5], tmp0[7]); + s[0] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x20); + s[1] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x20); + s[2] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x20); + s[3] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x20); + s[4] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x31); + s[5] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x31); + s[6] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x31); + s[7] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x31); +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_clone_statex8(sha256ctxx8 *outctx, const sha256ctxx8 *inctx) { + memcpy(outctx, inctx, sizeof(sha256ctxx8)); +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_init8x(sha256ctxx8 *ctx) { + ctx->s[0] = _mm256_set_epi32((int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667); + ctx->s[1] = _mm256_set_epi32((int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85); + ctx->s[2] = _mm256_set_epi32((int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372); + ctx->s[3] = _mm256_set_epi32((int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a); + ctx->s[4] = _mm256_set_epi32((int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f); + ctx->s[5] = _mm256_set_epi32((int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c); + ctx->s[6] = _mm256_set_epi32((int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab); + ctx->s[7] = _mm256_set_epi32((int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19); + + ctx->datalen = 0; + ctx->msglen = 0; +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_update8x(sha256ctxx8 *ctx, + const unsigned char *d0, + const unsigned char *d1, + const unsigned char *d2, + const unsigned char *d3, + const unsigned char *d4, + const unsigned char *d5, + const unsigned char *d6, + const unsigned char *d7, + unsigned long long len) { + size_t i = 0; + size_t bytes_to_copy; + + while (i < len) { + bytes_to_copy = (size_t)len - i; + if (bytes_to_copy > 64) { + bytes_to_copy = 64; + } + memcpy(&ctx->msgblocks[64 * 0], d0 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 1], d1 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 2], d2 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 3], d3 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 4], d4 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 5], d5 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 6], d6 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 7], d7 + i, bytes_to_copy); + ctx->datalen += (unsigned int)bytes_to_copy; + i += bytes_to_copy; + if (ctx->datalen == 64) { + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + ctx->msglen += 512; + ctx->datalen = 0; + } + } +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_final8x(sha256ctxx8 *ctx, + unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7) { + unsigned int i, curlen; + + // Padding + if (ctx->datalen < 56) { + for (i = 0; i < 8; ++i) { + curlen = ctx->datalen; + ctx->msgblocks[64 * i + curlen++] = 0x80; + while (curlen < 64) { + ctx->msgblocks[64 * i + curlen++] = 0x00; + } + } + } else { + for (i = 0; i < 8; ++i) { + curlen = ctx->datalen; + ctx->msgblocks[64 * i + curlen++] = 0x80; + while (curlen < 64) { + ctx->msgblocks[64 * i + curlen++] = 0x00; + } + } + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + memset(ctx->msgblocks, 0, 8 * 64); + } + + // Add length of the message to each block + ctx->msglen += ctx->datalen * 8; + for (i = 0; i < 8; i++) { + ctx->msgblocks[64 * i + 63] = (unsigned char)ctx->msglen; + ctx->msgblocks[64 * i + 62] = (unsigned char)(ctx->msglen >> 8); + ctx->msgblocks[64 * i + 61] = (unsigned char)(ctx->msglen >> 16); + ctx->msgblocks[64 * i + 60] = (unsigned char)(ctx->msglen >> 24); + ctx->msgblocks[64 * i + 59] = (unsigned char)(ctx->msglen >> 32); + ctx->msgblocks[64 * i + 58] = (unsigned char)(ctx->msglen >> 40); + ctx->msgblocks[64 * i + 57] = (unsigned char)(ctx->msglen >> 48); + ctx->msgblocks[64 * i + 56] = (unsigned char)(ctx->msglen >> 56); + } + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + + // Compute final hash output + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_transpose(ctx->s); + + // Store Hash value + STORE(out0, BYTESWAP(ctx->s[0])); + STORE(out1, BYTESWAP(ctx->s[1])); + STORE(out2, BYTESWAP(ctx->s[2])); + STORE(out3, BYTESWAP(ctx->s[3])); + STORE(out4, BYTESWAP(ctx->s[4])); + STORE(out5, BYTESWAP(ctx->s[5])); + STORE(out6, BYTESWAP(ctx->s[6])); + STORE(out7, BYTESWAP(ctx->s[7])); +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_transform8x(sha256ctxx8 *ctx, const unsigned char *data) { + u256 s[8], w[64], T0, T1; + int i; + + // Load words and transform data correctly + for (i = 0; i < 8; i++) { + w[i] = BYTESWAP(LOAD(data + 64 * i)); + w[i + 8] = BYTESWAP(LOAD(data + 32 + 64 * i)); + } + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_transpose(w); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_transpose(w + 8); + + // Initial State + s[0] = ctx->s[0]; + s[1] = ctx->s[1]; + s[2] = ctx->s[2]; + s[3] = ctx->s[3]; + s[4] = ctx->s[4]; + s[5] = ctx->s[5]; + s[6] = ctx->s[6]; + s[7] = ctx->s[7]; + + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 0, w[0]); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 1, w[1]); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 2, w[2]); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 3, w[3]); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 4, w[4]); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 5, w[5]); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 6, w[6]); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 7, w[7]); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 8, w[8]); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 9, w[9]); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 10, w[10]); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 11, w[11]); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 12, w[12]); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 13, w[13]); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 14, w[14]); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 15, w[15]); + w[16] = ADD4_32(WSIGMA1_AVX(w[14]), w[0], w[9], WSIGMA0_AVX(w[1])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 16, w[16]); + w[17] = ADD4_32(WSIGMA1_AVX(w[15]), w[1], w[10], WSIGMA0_AVX(w[2])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 17, w[17]); + w[18] = ADD4_32(WSIGMA1_AVX(w[16]), w[2], w[11], WSIGMA0_AVX(w[3])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 18, w[18]); + w[19] = ADD4_32(WSIGMA1_AVX(w[17]), w[3], w[12], WSIGMA0_AVX(w[4])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 19, w[19]); + w[20] = ADD4_32(WSIGMA1_AVX(w[18]), w[4], w[13], WSIGMA0_AVX(w[5])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 20, w[20]); + w[21] = ADD4_32(WSIGMA1_AVX(w[19]), w[5], w[14], WSIGMA0_AVX(w[6])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 21, w[21]); + w[22] = ADD4_32(WSIGMA1_AVX(w[20]), w[6], w[15], WSIGMA0_AVX(w[7])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 22, w[22]); + w[23] = ADD4_32(WSIGMA1_AVX(w[21]), w[7], w[16], WSIGMA0_AVX(w[8])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 23, w[23]); + w[24] = ADD4_32(WSIGMA1_AVX(w[22]), w[8], w[17], WSIGMA0_AVX(w[9])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 24, w[24]); + w[25] = ADD4_32(WSIGMA1_AVX(w[23]), w[9], w[18], WSIGMA0_AVX(w[10])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 25, w[25]); + w[26] = ADD4_32(WSIGMA1_AVX(w[24]), w[10], w[19], WSIGMA0_AVX(w[11])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 26, w[26]); + w[27] = ADD4_32(WSIGMA1_AVX(w[25]), w[11], w[20], WSIGMA0_AVX(w[12])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 27, w[27]); + w[28] = ADD4_32(WSIGMA1_AVX(w[26]), w[12], w[21], WSIGMA0_AVX(w[13])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 28, w[28]); + w[29] = ADD4_32(WSIGMA1_AVX(w[27]), w[13], w[22], WSIGMA0_AVX(w[14])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 29, w[29]); + w[30] = ADD4_32(WSIGMA1_AVX(w[28]), w[14], w[23], WSIGMA0_AVX(w[15])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 30, w[30]); + w[31] = ADD4_32(WSIGMA1_AVX(w[29]), w[15], w[24], WSIGMA0_AVX(w[16])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 31, w[31]); + w[32] = ADD4_32(WSIGMA1_AVX(w[30]), w[16], w[25], WSIGMA0_AVX(w[17])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 32, w[32]); + w[33] = ADD4_32(WSIGMA1_AVX(w[31]), w[17], w[26], WSIGMA0_AVX(w[18])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 33, w[33]); + w[34] = ADD4_32(WSIGMA1_AVX(w[32]), w[18], w[27], WSIGMA0_AVX(w[19])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 34, w[34]); + w[35] = ADD4_32(WSIGMA1_AVX(w[33]), w[19], w[28], WSIGMA0_AVX(w[20])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 35, w[35]); + w[36] = ADD4_32(WSIGMA1_AVX(w[34]), w[20], w[29], WSIGMA0_AVX(w[21])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 36, w[36]); + w[37] = ADD4_32(WSIGMA1_AVX(w[35]), w[21], w[30], WSIGMA0_AVX(w[22])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 37, w[37]); + w[38] = ADD4_32(WSIGMA1_AVX(w[36]), w[22], w[31], WSIGMA0_AVX(w[23])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 38, w[38]); + w[39] = ADD4_32(WSIGMA1_AVX(w[37]), w[23], w[32], WSIGMA0_AVX(w[24])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 39, w[39]); + w[40] = ADD4_32(WSIGMA1_AVX(w[38]), w[24], w[33], WSIGMA0_AVX(w[25])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 40, w[40]); + w[41] = ADD4_32(WSIGMA1_AVX(w[39]), w[25], w[34], WSIGMA0_AVX(w[26])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 41, w[41]); + w[42] = ADD4_32(WSIGMA1_AVX(w[40]), w[26], w[35], WSIGMA0_AVX(w[27])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 42, w[42]); + w[43] = ADD4_32(WSIGMA1_AVX(w[41]), w[27], w[36], WSIGMA0_AVX(w[28])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 43, w[43]); + w[44] = ADD4_32(WSIGMA1_AVX(w[42]), w[28], w[37], WSIGMA0_AVX(w[29])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 44, w[44]); + w[45] = ADD4_32(WSIGMA1_AVX(w[43]), w[29], w[38], WSIGMA0_AVX(w[30])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 45, w[45]); + w[46] = ADD4_32(WSIGMA1_AVX(w[44]), w[30], w[39], WSIGMA0_AVX(w[31])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 46, w[46]); + w[47] = ADD4_32(WSIGMA1_AVX(w[45]), w[31], w[40], WSIGMA0_AVX(w[32])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 47, w[47]); + w[48] = ADD4_32(WSIGMA1_AVX(w[46]), w[32], w[41], WSIGMA0_AVX(w[33])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 48, w[48]); + w[49] = ADD4_32(WSIGMA1_AVX(w[47]), w[33], w[42], WSIGMA0_AVX(w[34])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 49, w[49]); + w[50] = ADD4_32(WSIGMA1_AVX(w[48]), w[34], w[43], WSIGMA0_AVX(w[35])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 50, w[50]); + w[51] = ADD4_32(WSIGMA1_AVX(w[49]), w[35], w[44], WSIGMA0_AVX(w[36])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 51, w[51]); + w[52] = ADD4_32(WSIGMA1_AVX(w[50]), w[36], w[45], WSIGMA0_AVX(w[37])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 52, w[52]); + w[53] = ADD4_32(WSIGMA1_AVX(w[51]), w[37], w[46], WSIGMA0_AVX(w[38])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 53, w[53]); + w[54] = ADD4_32(WSIGMA1_AVX(w[52]), w[38], w[47], WSIGMA0_AVX(w[39])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 54, w[54]); + w[55] = ADD4_32(WSIGMA1_AVX(w[53]), w[39], w[48], WSIGMA0_AVX(w[40])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 55, w[55]); + w[56] = ADD4_32(WSIGMA1_AVX(w[54]), w[40], w[49], WSIGMA0_AVX(w[41])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 56, w[56]); + w[57] = ADD4_32(WSIGMA1_AVX(w[55]), w[41], w[50], WSIGMA0_AVX(w[42])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 57, w[57]); + w[58] = ADD4_32(WSIGMA1_AVX(w[56]), w[42], w[51], WSIGMA0_AVX(w[43])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 58, w[58]); + w[59] = ADD4_32(WSIGMA1_AVX(w[57]), w[43], w[52], WSIGMA0_AVX(w[44])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 59, w[59]); + w[60] = ADD4_32(WSIGMA1_AVX(w[58]), w[44], w[53], WSIGMA0_AVX(w[45])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 60, w[60]); + w[61] = ADD4_32(WSIGMA1_AVX(w[59]), w[45], w[54], WSIGMA0_AVX(w[46])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 61, w[61]); + w[62] = ADD4_32(WSIGMA1_AVX(w[60]), w[46], w[55], WSIGMA0_AVX(w[47])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 62, w[62]); + w[63] = ADD4_32(WSIGMA1_AVX(w[61]), w[47], w[56], WSIGMA0_AVX(w[48])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 63, w[63]); + + // Feed Forward + ctx->s[0] = ADD32(s[0], ctx->s[0]); + ctx->s[1] = ADD32(s[1], ctx->s[1]); + ctx->s[2] = ADD32(s[2], ctx->s[2]); + ctx->s[3] = ADD32(s[3], ctx->s[3]); + ctx->s[4] = ADD32(s[4], ctx->s[4]); + ctx->s[5] = ADD32(s[5], ctx->s[5]); + ctx->s[6] = ADD32(s[6], ctx->s[6]); + ctx->s[7] = ADD32(s[7], ctx->s[7]); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/sha256avx.h b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/sha256avx.h new file mode 100644 index 00000000..278f5c4d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/sha256avx.h @@ -0,0 +1,103 @@ +#ifndef SHA256AVX_H +#define SHA256AVX_H + +#include +#include + +static const unsigned int RC[] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +#define u32 uint32_t +#define u256 __m256i + +#define XOR _mm256_xor_si256 +#define OR _mm256_or_si256 +#define AND _mm256_and_si256 +#define ADD32 _mm256_add_epi32 +#define NOT(x) _mm256_xor_si256(x, _mm256_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1)) + +#define LOAD(src) _mm256_loadu_si256((__m256i *)(src)) +#define STORE(dest,src) _mm256_storeu_si256((__m256i *)(dest),src) + +#define BYTESWAP(x) _mm256_shuffle_epi8(x, _mm256_set_epi8(0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3,0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3)) + +#define SHIFTR32(x, y) _mm256_srli_epi32(x, y) +#define SHIFTL32(x, y) _mm256_slli_epi32(x, y) + +#define ROTR32(x, y) OR(SHIFTR32(x, y), SHIFTL32(x, 32 - (y))) +#define ROTL32(x, y) OR(SHIFTL32(x, y), SHIFTR32(x, 32 - (y))) + +#define XOR3(a, b, c) XOR(XOR(a, b), c) + +#define ADD3_32(a, b, c) ADD32(ADD32(a, b), c) +#define ADD4_32(a, b, c, d) ADD32(ADD32(ADD32(a, b), c), d) +#define ADD5_32(a, b, c, d, e) ADD32(ADD32(ADD32(ADD32(a, b), c), d), e) + +#define MAJ_AVX(a, b, c) XOR3(AND(a, b), AND(a, c), AND(b, c)) +#define CH_AVX(a, b, c) XOR(AND(a, b), AND(NOT(a), c)) + +#define SIGMA1_AVX(x) XOR3(ROTR32(x, 6), ROTR32(x, 11), ROTR32(x, 25)) +#define SIGMA0_AVX(x) XOR3(ROTR32(x, 2), ROTR32(x, 13), ROTR32(x, 22)) + +#define WSIGMA1_AVX(x) XOR3(ROTR32(x, 17), ROTR32(x, 19), SHIFTR32(x, 10)) +#define WSIGMA0_AVX(x) XOR3(ROTR32(x, 7), ROTR32(x, 18), SHIFTR32(x, 3)) + +#define SHA256ROUND_AVX(a, b, c, d, e, f, g, h, rc, w) \ + T0 = ADD5_32(h, SIGMA1_AVX(e), CH_AVX(e, f, g), _mm256_set1_epi32((int)RC[rc]), w); \ + (d) = ADD32(d, T0); \ + T1 = ADD32(SIGMA0_AVX(a), MAJ_AVX(a, b, c)); \ + (h) = ADD32(T0, T1); + +typedef struct SHA256state { + u256 s[8]; + uint8_t msgblocks[8 * 64]; + unsigned int datalen; + uint64_t msglen; +} sha256ctxx8; + + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_transpose(u256 s[8]); +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_init_frombytes_x8(sha256ctxx8 *ctx, const uint8_t *s, unsigned long long msglen); +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_init8x(sha256ctxx8 *ctx); +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_update8x(sha256ctxx8 *ctx, + const unsigned char *d0, + const unsigned char *d1, + const unsigned char *d2, + const unsigned char *d3, + const unsigned char *d4, + const unsigned char *d5, + const unsigned char *d6, + const unsigned char *d7, + unsigned long long len); +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_final8x(sha256ctxx8 *ctx, + unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7); + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_transform8x(sha256ctxx8 *ctx, const unsigned char *data); + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_clone_statex8(sha256ctxx8 *outctx, const sha256ctxx8 *inctx); + + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/sha256x8.c b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/sha256x8.c new file mode 100644 index 00000000..257c93f0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/sha256x8.c @@ -0,0 +1,128 @@ +#include + +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_seed_statex8(sha256ctxx8 *ctx, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N; i < PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_init8x(ctx); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_update8x(ctx, block, block, block, block, block, block, block, block, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES); + +} + +/* This provides a wrapper around the internals of 8x parallel SHA256 */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256x8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned long long inlen) { + sha256ctxx8 ctx; + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_init8x(&ctx); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_update8x(&ctx, in0, in1, in2, in3, in4, in5, in6, in7, inlen); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_final8x(&ctx, out0, out1, out2, out3, out4, out5, out6, out7); +} + +/** + * Note that inlen should be sufficiently small that it still allows for + * an array to be allocated on the stack. Typically 'in' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_mgf1x8( + unsigned char *outx8, + unsigned long outlen, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, + unsigned long inlen) { + unsigned char inbufx8[8 * ((PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES) + 4)]; + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned long i; + unsigned int j; + + memcpy(inbufx8 + 0 * (inlen + 4), in0, inlen); + memcpy(inbufx8 + 1 * (inlen + 4), in1, inlen); + memcpy(inbufx8 + 2 * (inlen + 4), in2, inlen); + memcpy(inbufx8 + 3 * (inlen + 4), in3, inlen); + memcpy(inbufx8 + 4 * (inlen + 4), in4, inlen); + memcpy(inbufx8 + 5 * (inlen + 4), in5, inlen); + memcpy(inbufx8 + 6 * (inlen + 4), in6, inlen); + memcpy(inbufx8 + 7 * (inlen + 4), in7, inlen); + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES <= outlen; i++) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ull_to_bytes(inbufx8 + inlen + j * (inlen + 4), 4, i); + } + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256x8(outx8 + 0 * outlen, + outx8 + 1 * outlen, + outx8 + 2 * outlen, + outx8 + 3 * outlen, + outx8 + 4 * outlen, + outx8 + 5 * outlen, + outx8 + 6 * outlen, + outx8 + 7 * outlen, + inbufx8 + 0 * (inlen + 4), + inbufx8 + 1 * (inlen + 4), + inbufx8 + 2 * (inlen + 4), + inbufx8 + 3 * (inlen + 4), + inbufx8 + 4 * (inlen + 4), + inbufx8 + 5 * (inlen + 4), + inbufx8 + 6 * (inlen + 4), + inbufx8 + 7 * (inlen + 4), inlen + 4); + outx8 += PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ull_to_bytes(inbufx8 + inlen + j * (inlen + 4), 4, i); + } + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256x8(outbufx8 + 0 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, + inbufx8 + 0 * (inlen + 4), + inbufx8 + 1 * (inlen + 4), + inbufx8 + 2 * (inlen + 4), + inbufx8 + 3 * (inlen + 4), + inbufx8 + 4 * (inlen + 4), + inbufx8 + 5 * (inlen + 4), + inbufx8 + 6 * (inlen + 4), + inbufx8 + 7 * (inlen + 4), inlen + 4); + + for (j = 0; j < 8; j++) { + memcpy(outx8 + j * outlen, + outbufx8 + j * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outlen - i * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/sha256x8.h b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/sha256x8.h new file mode 100644 index 00000000..dd9e5909 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/sha256x8.h @@ -0,0 +1,44 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256X8_H +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256X8_H + +#include "sha256avx.h" + +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N */ + +/* This provides a wrapper around the internals of 8x parallel SHA256 */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256x8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned long long inlen); + +/** + * Note that inlen should be sufficiently small that it still allows for + * an array to be allocated on the stack. Typically 'in' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_mgf1x8(unsigned char *outx8, unsigned long outlen, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, + unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_seed_statex8(sha256ctxx8 *ctx, const unsigned char *pub_seed); +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/sign.c b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/sign.c new file mode 100644 index 00000000..e9472453 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_D - 1); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, sk + 3 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N; + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_D; i++) { + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_BYTES; + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N; + unsigned char mhash[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_D; i++) { + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_BYTES; + + if (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_BYTES, sm + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/thash.h b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/thash.h new file mode 100644 index 00000000..72406b66 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_THASH_H +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/thash_sha256_robust.c b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/thash_sha256_robust.c new file mode 100644 index 00000000..b9af100e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/thash_sha256_robust.c @@ -0,0 +1,78 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "sha2.h" +#include "sha256.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N bytes. + */ +static void thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char outbuf[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned char *bitmask = buf + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + 4; + sha256ctx sha2_state; + unsigned int i; + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_compress_address(buf + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, addr); + /* MGF1 requires us to have 4 extra bytes in 'buf' */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_mgf1(bitmask, inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, buf, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES); + + /* Retrieve precomputed state containing pub_seed */ + sha256_inc_ctx_clone(&sha2_state, &hash_state_seeded->x1); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N; i++) { + buf[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + i] = in[i] ^ bitmask[i]; + } + + sha256_inc_finalize(outbuf, &sha2_state, buf + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + 4 + 1 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N]; + thash(out, buf, in, 1, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + 4 + 2 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N]; + thash(out, buf, in, 2, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + 4 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N]; + thash(out, buf, in, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LEN, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + 4 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N]; + thash(out, buf, in, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_TREES, pub_seed, addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/thash_sha256_robustx8.c b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/thash_sha256_robustx8.c new file mode 100644 index 00000000..d3f45a50 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/thash_sha256_robustx8.c @@ -0,0 +1,156 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "thashx8.h" +#include "utils.h" + +/** + * 8-way parallel version of thash; takes 8x as much input and output + */ +static void thashx8(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + uint8_t *out4, + uint8_t *out5, + uint8_t *out6, + uint8_t *out7, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + const uint8_t *in4, + const uint8_t *in5, + const uint8_t *in6, + const uint8_t *in7, + unsigned int inblocks, + const uint8_t *pub_seed, + uint32_t addrx8[8 * 8], + uint8_t *bufx8, + uint8_t *bitmaskx8, + const hash_state *state_seeded) { + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned int i; + sha256ctxx8 ctx; + + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + + for (i = 0; i < 8; i++) { + memcpy(bufx8 + i * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N), + pub_seed, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_compress_address(bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + + i * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N), + addrx8 + i * 8); + } + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_mgf1x8(bitmaskx8, inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + bufx8 + 0 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N), + bufx8 + 1 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N), + bufx8 + 2 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N), + bufx8 + 3 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N), + bufx8 + 4 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N), + bufx8 + 5 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N), + bufx8 + 6 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N), + bufx8 + 7 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N), + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + ); + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_clone_statex8(&ctx, &state_seeded->x8); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N; i++) { + bufx8[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + i + + 0 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)] = + in0[i] ^ bitmaskx8[i + 0 * (inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + i + + 1 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)] = + in1[i] ^ bitmaskx8[i + 1 * (inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + i + + 2 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)] = + in2[i] ^ bitmaskx8[i + 2 * (inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + i + + 3 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)] = + in3[i] ^ bitmaskx8[i + 3 * (inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + i + + 4 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)] = + in4[i] ^ bitmaskx8[i + 4 * (inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + i + + 5 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)] = + in5[i] ^ bitmaskx8[i + 5 * (inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + i + + 6 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)] = + in6[i] ^ bitmaskx8[i + 6 * (inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + i + + 7 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)] = + in7[i] ^ bitmaskx8[i + 7 * (inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)]; + } + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_update8x(&ctx, + bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + 0 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + 1 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + 2 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + 3 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + 4 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + 5 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + 6 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + 7 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N), + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_final8x(&ctx, + outbufx8 + 0 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES); + + memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); +} + +#define thash_size_variant(name, size) \ + void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_thashx8_##name(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + unsigned char *out4, \ + unsigned char *out5, \ + unsigned char *out6, \ + unsigned char *out7, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *in4, \ + const unsigned char *in5, \ + const unsigned char *in6, \ + const unsigned char *in7, \ + const unsigned char *pub_seed, \ + uint32_t addrx8[8*8], \ + const hash_state *state_seeded) { \ + const unsigned int inblocks = (size); \ + uint8_t bufx8[8*(PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + (size)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)]; \ + uint8_t bitmaskx8[8*((size) * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)]; \ + thashx8(out0, out1, out2, out3, out4, out5, out6, out7, \ + in0, in1, in2, in3, in4, in5, in6, in7, inblocks, \ + pub_seed, addrx8, bufx8, bitmaskx8, state_seeded); \ + } + +thash_size_variant(1, 1) +thash_size_variant(2, 2) +thash_size_variant(WOTS_LEN, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LEN) +thash_size_variant(FORS_TREES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_TREES) + +#undef thash_size_variant diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/thashx8.h b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/thashx8.h new file mode 100644 index 00000000..4c4a1bc0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/thashx8.h @@ -0,0 +1,39 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_THASHX8_H +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_THASHX8_H + +#include + +#include "hash_state.h" +#include "sha256avx.h" + + +#define thashx8_variant(name) \ + void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_thashx8_##name( \ + unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + unsigned char *out4, \ + unsigned char *out5, \ + unsigned char *out6, \ + unsigned char *out7, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *in4, \ + const unsigned char *in5, \ + const unsigned char *in6, \ + const unsigned char *in7, \ + const unsigned char *pub_seed, \ + uint32_t addrx8[8*8], \ + const hash_state *state_seeded) + + +thashx8_variant(1); +thashx8_variant(2); +thashx8_variant(WOTS_LEN); +thashx8_variant(FORS_TREES); + +#undef thashx8_variant +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/utils.c b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/utils.c new file mode 100644 index 00000000..d2dd13b3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, leaf, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + memcpy(buffer + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, auth_path, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_thash_2( + buffer + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + } else { + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, auth_path, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/utils.h b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/utils.h new file mode 100644 index 00000000..367900dc --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_UTILS_H +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/utilsx8.c b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/utilsx8.c new file mode 100644 index 00000000..64bca5eb --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/utilsx8.c @@ -0,0 +1,172 @@ +#include + +#include "address.h" +#include "params.h" +#include "thashx8.h" +#include "utils.h" + +#include "utilsx8.h" + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void treehashx8(unsigned char *rootx8, unsigned char *auth_pathx8, + unsigned char *stackx8, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t leaf_idx[8], uint32_t idx_offset[8], + uint32_t tree_height, + void (*gen_leafx8)( + unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + unsigned char * /* leaf4 */, + unsigned char * /* leaf5 */, + unsigned char * /* leaf6 */, + unsigned char * /* leaf7 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + uint32_t /* addr_idx4 */, + uint32_t /* addr_idx5 */, + uint32_t /* addr_idx6 */, + uint32_t /* addr_idx7 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx8[8 * 8], + const hash_state *state_seeded) { + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + unsigned int j; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leafx8(stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + sk_seed, pub_seed, + idx + idx_offset[0], + idx + idx_offset[1], + idx + idx_offset[2], + idx + idx_offset[3], + idx + idx_offset[4], + idx + idx_offset[5], + idx + idx_offset[6], + idx + idx_offset[7], + tree_addrx8, + state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + for (j = 0; j < 8; j++) { + if ((leaf_idx[j] ^ 0x1) == idx) { + memcpy(auth_pathx8 + j * tree_height * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + (offset - 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + } + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_height(tree_addrx8 + j * 8, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_tree_index(tree_addrx8 + j * 8, + tree_idx + (idx_offset[j] >> (heights[offset - 1] + 1))); + } + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_thashx8_2(stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + pub_seed, tree_addrx8, state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + for (j = 0; j < 8; j++) { + if (((leaf_idx[j] >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_pathx8 + j * tree_height * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + heights[offset - 1]*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + (offset - 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + } + } + } + } + + for (j = 0; j < 8; j++) { + memcpy(rootx8 + j * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + } +} + +/* The wrappers below ensure we used fixed-size buffers on the stack (no VLAs) */ + + +#define treehashx8_variant(name, size) \ + void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_treehashx8_##name( \ + unsigned char *rootx8, unsigned char *auth_pathx8, \ + const unsigned char *sk_seed, const unsigned char *pub_seed, \ + const uint32_t leaf_idx[8], uint32_t idx_offset[8], \ + void (*gen_leafx8)( \ + unsigned char* /* leaf0 */, \ + unsigned char* /* leaf1 */, \ + unsigned char* /* leaf2 */, \ + unsigned char* /* leaf3 */, \ + unsigned char* /* leaf4 */, \ + unsigned char* /* leaf5 */, \ + unsigned char* /* leaf6 */, \ + unsigned char* /* leaf7 */, \ + const unsigned char* /* sk_seed */, \ + const unsigned char* /* pub_seed */, \ + uint32_t /* addr_idx0 */, \ + uint32_t /* addr_idx1 */, \ + uint32_t /* addr_idx2 */, \ + uint32_t /* addr_idx3 */, \ + uint32_t /* addr_idx4 */, \ + uint32_t /* addr_idx5 */, \ + uint32_t /* addr_idx6 */, \ + uint32_t /* addr_idx7 */, \ + const uint32_t[8] /* tree_addr */, \ + const hash_state* /* state_seeded */), \ + uint32_t tree_addrx8[8*8], \ + const hash_state *state_seeded) \ + { \ + const uint32_t tree_height = (size); \ + unsigned char stackx8[8*((size) + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N]; \ + unsigned int heights[(size) + 1]; \ + treehashx8(rootx8, auth_pathx8, stackx8, heights, sk_seed, pub_seed, \ + leaf_idx, idx_offset, tree_height, gen_leafx8, tree_addrx8, state_seeded); \ + } + +treehashx8_variant(FORS_HEIGHT, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_HEIGHT) + +#undef treehashx8_variant diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/utilsx8.h b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/utilsx8.h new file mode 100644 index 00000000..a472def1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/utilsx8.h @@ -0,0 +1,46 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_UTILSX8_H +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_UTILSX8_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ + +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_treehashx8_FORS_HEIGHT( + unsigned char *rootx8, unsigned char *auth_pathx8, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t leaf_idx[8], uint32_t idx_offset[8], + void (*gen_leafx8)( + unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + unsigned char * /* leaf4 */, + unsigned char * /* leaf5 */, + unsigned char * /* leaf6 */, + unsigned char * /* leaf7 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + uint32_t /* addr_idx4 */, + uint32_t /* addr_idx5 */, + uint32_t /* addr_idx6 */, + uint32_t /* addr_idx7 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx8[8 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/wots.c b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/wots.c new file mode 100644 index 00000000..6f21fca8 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/wots.c @@ -0,0 +1,240 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "hashx8.h" +#include "params.h" +#include "thash.h" +#include "thashx8.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], const hash_state *state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_prf_addr(sk, sk_seed, wots_addr, state_seeded); +} + +/** + * 8-way parallel version of wots_gen_sk; expects 8x as much space in sk + */ +static void wots_gen_skx8(unsigned char *skx8, const unsigned char *sk_seed, + uint32_t wots_addrx8[8 * 8]) { + unsigned int j; + + /* Make sure that the hash address is actually zeroed. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_hash_addr(wots_addrx8 + j * 8, 0); + } + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_prf_addrx8(skx8 + 0 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + skx8 + 1 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + skx8 + 2 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + skx8 + 3 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + skx8 + 4 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + skx8 + 5 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + skx8 + 6 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + skx8 + 7 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + sk_seed, wots_addrx8); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_W; i++) { + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_thash_1(out, out, pub_seed, addr, state_seeded); + } +} + +/** + * 8-way parallel version of gen_chain; expects 8x as much space in out, and + * 8x as much space in inx8. Assumes start and step identical across chains. + */ +static void gen_chainx8(unsigned char *outx8, const unsigned char *inx8, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addrx8[8 * 8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + /* Initialize outx8 with the value at position 'start'. */ + memcpy(outx8, inx8, 8 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_W; i++) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_hash_addr(addrx8 + j * 8, i); + } + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_thashx8_1(outx8 + 0 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + outx8 + 1 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + outx8 + 2 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + outx8 + 3 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + outx8 + 4 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + outx8 + 5 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + outx8 + 6 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + outx8 + 7 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + outx8 + 0 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + outx8 + 1 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + outx8 + 2 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + outx8 + 3 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + outx8 + 4 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + outx8 + 5 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + outx8 + 6 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + outx8 + 7 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + pub_seed, addrx8, state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { + int in = 0; + int out = 0; + unsigned char total = 0; + int bits = 0; + int consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LOGW; + output[out] = (unsigned int)(total >> bits) & (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_W - 1); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_wots_gen_pk(unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + uint32_t addrx8[8 * 8]; + unsigned char pkbuf[8 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N]; + + for (j = 0; j < 8; j++) { + memcpy(addrx8 + j * 8, addr, sizeof(uint32_t) * 8); + } + + /* The last iteration typically does not have complete set of 4 chains, + but because we use pkbuf, this is not an issue -- we still do as many + in parallel as possible. */ + for (i = 0; i < ((PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LEN + 7) & ~0x7); i += 8) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_chain_addr(addrx8 + j * 8, i + j); + } + wots_gen_skx8(pkbuf, sk_seed, addrx8); + gen_chainx8(pkbuf, pkbuf, 0, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_W - 1, pub_seed, addrx8, state_seeded); + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LEN) { + memcpy(pk + (i + j)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, pkbuf + j * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N); + } + } + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_wots_sign(unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, sk_seed, addr, state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, sig + i * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, 0, lengths[i], pub_seed, addr, state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_wots_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, sig + i * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N, + lengths[i], PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_W - 1 - lengths[i], pub_seed, addr, state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/wots.h b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/wots.h new file mode 100644 index 00000000..ddc0b90a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/avx2/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_H +#define PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/LICENSE b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..c4910fcf --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-sha256-128s-robust_clean.lib +OBJECTS=address.obj wots.obj utils.obj fors.obj sign.obj hash_sha256.obj thash_sha256_robust.obj sha256.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/address.c b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/address.c new file mode 100644 index 00000000..5b946b75 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/address.h b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/address.h new file mode 100644 index 00000000..ae9a7ea8 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ADDRESS_H +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/api.h b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/api.h new file mode 100644 index 00000000..9db23cf5 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_API_H +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_CRYPTO_SECRETKEYBYTES 64 +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_CRYPTO_PUBLICKEYBYTES 32 +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_CRYPTO_BYTES 8080 +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_CRYPTO_SEEDBYTES 48 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/fors.c b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/fors.c new file mode 100644 index 00000000..b6490e2e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/fors.c @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "thash.h" +#include "utils.h" + +static void fors_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t fors_leaf_addr[8], const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_prf_addr( + sk, sk_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_thash_1( + leaf, sk, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t fors_tree_addr[8], + const hash_state *hash_state_seeded) { + uint32_t fors_leaf_addr[8] = {0}; + + /* Only copy the parts that must be kept in fors_leaf_addr. */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_copy_keypair_addr( + fors_leaf_addr, fors_tree_addr); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_type( + fors_leaf_addr, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_tree_index( + fors_leaf_addr, addr_idx); + + fors_gen_sk(leaf, sk_seed, fors_leaf_addr, hash_state_seeded); + fors_sk_to_leaf(leaf, leaf, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_copy_keypair_addr( + fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_copy_keypair_addr( + fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_type( + fors_tree_addr, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_type( + fors_pk_addr, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_tree_height( + fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_tree_index( + fors_tree_addr, indices[i] + idx_offset); + + /* Include the secret key part that produces the selected leaf node. */ + fors_gen_sk(sig, sk_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N; + + /* Compute the authentication path for this leaf node. */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_treehash_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, sig, sk_seed, pub_seed, + indices[i], idx_offset, fors_gen_leaf, fors_tree_addr, + hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_thash_FORS_TREES( + pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_compute_root(roots + i * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, leaf, indices[i], idx_offset, sig, + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_HEIGHT, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/fors.h b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/fors.h new file mode 100644 index 00000000..c726541b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_H +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/hash.h b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/hash.h new file mode 100644 index 00000000..5aefd667 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_HASH_H +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/hash_sha256.c b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/hash_sha256.c new file mode 100644 index 00000000..744670a8 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/hash_sha256.c @@ -0,0 +1,162 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "sha2.h" +#include "sha256.h" + +/* For SHA256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed) { + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_seed_state(hash_state_seeded, pub_seed); + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* Clean up hash state */ +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_destroy_hash_function(hash_state *hash_state_seeded) { + sha256_inc_ctx_release(hash_state_seeded); +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N bytes and an address + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_ADDR_BYTES]; + unsigned char outbuf[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_OUTPUT_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_compress_address(buf + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, addr); + + sha256(outbuf, buf, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_ADDR_BYTES); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed as a key + * for HMAC, and an optional randomization value prefixed to the message. + * This requires m to have at least PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N space + * available in front of the pointer, i.e. before the message to use for the + * prefix. This is necessary to prevent having to move the message around (and + * allocate memory for it). + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_OUTPUT_BYTES]; + sha256ctx state; + int i; + + /* This implements HMAC-SHA256 */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N; i++) { + buf[i] = 0x36 ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, 0x36, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N); + + sha256_inc_init(&state); + sha256_inc_blocks(&state, buf, 1); + + memcpy(buf, optrand, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N); + + /* If optrand + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N + mlen < PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_BLOCK_BYTES) { + memcpy(buf + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, m, mlen); + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_BLOCK_BYTES, &state, + buf, mlen + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(buf + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, m, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N); + sha256_inc_blocks(&state, buf, 1); + + m += PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N; + mlen -= PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N; + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_BLOCK_BYTES, &state, m, mlen); + } + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N; i++) { + buf[i] = 0x5c ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, 0x5c, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N); + + sha256(buf, buf, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_OUTPUT_BYTES); + memcpy(R, buf, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_TREE_BITS (PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_TREE_HEIGHT * (PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_D - 1)) +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_TREE_BYTES ((PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_LEAF_BITS PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_LEAF_BYTES ((PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_DGST_BYTES (PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_TREE_BYTES + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_LEAF_BYTES) + + unsigned char seed[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_OUTPUT_BYTES + 4]; + + /* Round to nearest multiple of PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_BLOCK_BYTES */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_INBLOCKS (((PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_PK_BYTES + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_BLOCK_BYTES - 1) & \ + -PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_BLOCK_BYTES) / PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_BLOCK_BYTES) + unsigned char inbuf[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_BLOCK_BYTES]; + + unsigned char buf[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_DGST_BYTES]; + unsigned char *bufp = buf; + sha256ctx state; + + sha256_inc_init(&state); + + memcpy(inbuf, R, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N); + memcpy(inbuf + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, pk, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_PK_BYTES); + + /* If R + pk + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_PK_BYTES + mlen < PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_BLOCK_BYTES) { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_PK_BYTES, m, mlen); + sha256_inc_finalize(seed, &state, inbuf, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_PK_BYTES + mlen); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_PK_BYTES, m, + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N - PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_PK_BYTES); + sha256_inc_blocks(&state, inbuf, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_INBLOCKS); + + m += PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N - PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_PK_BYTES; + mlen -= PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N - PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_PK_BYTES; + sha256_inc_finalize(seed, &state, m, mlen); + } + + /* By doing this in two steps, we prevent hashing the message twice; + otherwise each iteration in MGF1 would hash the message again. */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_mgf1(bufp, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_DGST_BYTES, seed, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_OUTPUT_BYTES); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_bytes_to_ull(bufp, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/hash_state.h b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/hash_state.h new file mode 100644 index 00000000..19fc335e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/hash_state.h @@ -0,0 +1,26 @@ +#ifndef SPX_HASH_STATE_H +#define SPX_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + */ + +#include "sha2.h" +#define hash_state sha256ctx + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/params.h b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/params.h new file mode 100644 index 00000000..8690edb1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_PARAMS_H +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N 16 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FULL_HEIGHT 64 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_D 8 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_HEIGHT 15 +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_TREES 10 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N / PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_LEN (PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_LEN1 + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_BYTES (PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_PK_BYTES PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_TREE_HEIGHT (PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FULL_HEIGHT / PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_BYTES ((PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_PK_BYTES PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_BYTES (PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_BYTES + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_D * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FULL_HEIGHT * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_PK_BYTES (2 * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SK_BYTES (2 * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/sha256.c b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/sha256.c new file mode 100644 index 00000000..5e49d372 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/sha256.c @@ -0,0 +1,71 @@ +/* Based on the public domain implementation in + * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html + * by D. J. Bernstein */ + +#include +#include +#include + +#include "sha2.h" +#include "sha256.h" +#include "utils.h" + +/* + * Compresses an address to a 22-byte sequence. + * This reduces the number of required SHA256 compression calls, as the last + * block of input is padded with at least 65 bits. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_compress_address(unsigned char *out, const uint32_t addr[8]) { + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ull_to_bytes(out, 1, addr[0]); /* drop 3 bytes of the layer field */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ull_to_bytes(out + 1, 4, addr[2]); /* drop the highest tree address word */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ull_to_bytes(out + 5, 4, addr[3]); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ull_to_bytes(out + 9, 1, addr[4]); /* drop 3 bytes of the type field */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ull_to_bytes(out + 10, 4, addr[5]); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ull_to_bytes(out + 14, 4, addr[6]); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ull_to_bytes(out + 18, 4, addr[7]); +} + +/** + * Requires 'input_plus_four_bytes' to have 'inlen' + 4 bytes, so that the last + * four bytes can be used for the counter. Typically 'input' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen) { + unsigned char outbuf[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_OUTPUT_BYTES]; + unsigned long i; + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_OUTPUT_BYTES <= outlen; i++) { + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(out, input_plus_four_bytes, inlen + 4); + out += PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + if (outlen > i * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_OUTPUT_BYTES) { + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(outbuf, input_plus_four_bytes, inlen + 4); + memcpy(out, outbuf, outlen - i * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_OUTPUT_BYTES); + } +} + + +/** + * Absorb the constant pub_seed using one round of the compression function + * This initializes hash_state_seeded, which can then be reused in thash + **/ +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N; i < PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + sha256_inc_init(hash_state_seeded); + sha256_inc_blocks(hash_state_seeded, block, 1); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/sha256.h b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/sha256.h new file mode 100644 index 00000000..9bbf592f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/sha256.h @@ -0,0 +1,21 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_H +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_H + +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N */ +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_ADDR_BYTES 22 + +#include +#include + +#include "sha2.h" + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_compress_address(unsigned char *out, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/sign.c b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/sign.c new file mode 100644 index 00000000..d5a31191 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_D - 1); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, sk + 3 * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N); + + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N; + + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_BYTES; + + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N; + unsigned char mhash[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_BYTES; + + if (PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_BYTES, sm + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/thash.h b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/thash.h new file mode 100644 index 00000000..e44c2884 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_THASH_H +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/thash_sha256_robust.c b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/thash_sha256_robust.c new file mode 100644 index 00000000..a10b1a82 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/thash_sha256_robust.c @@ -0,0 +1,82 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "sha2.h" +#include "sha256.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N bytes. + */ +static void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char outbuf[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_OUTPUT_BYTES]; + unsigned char *bitmask = buf + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_ADDR_BYTES + 4; + sha256ctx sha2_state; + unsigned int i; + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_compress_address(buf + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, addr); + /* MGF1 requires us to have 4 extra bytes in 'buf' */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_mgf1(bitmask, inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, buf, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_ADDR_BYTES); + + /* Retrieve precomputed state containing pub_seed */ + sha256_inc_ctx_clone(&sha2_state, hash_state_seeded); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N; i++) { + buf[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_ADDR_BYTES + i] = in[i] ^ bitmask[i]; + } + + sha256_inc_finalize(outbuf, &sha2_state, buf + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_ADDR_BYTES + 4 + 1 * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_thash( + out, buf, in, 1, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_ADDR_BYTES + 4 + 2 * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_thash( + out, buf, in, 2, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_ADDR_BYTES + 4 + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_LEN, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_SHA256_ADDR_BYTES + 4 + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_TREES, pub_seed, addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/utils.c b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/utils.c new file mode 100644 index 00000000..61c3a669 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, leaf, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N); + memcpy(buffer + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_thash_2( + buffer + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N); + } else { + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/utils.h b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/utils.h new file mode 100644 index 00000000..693eecae --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_UTILS_H +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/wots.c b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/wots.c new file mode 100644 index 00000000..7eca64cc --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/wots.c @@ -0,0 +1,167 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], + const hash_state *hash_state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_prf_addr(sk, sk_seed, wots_addr, hash_state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_W; i++) { + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_thash_1( + out, out, pub_seed, addr, hash_state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const size_t out_len, + const unsigned char *input) { + size_t in = 0; + size_t out = 0; + unsigned char total = 0; + unsigned int bits = 0; + size_t consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_LOGW; + output[out] = (unsigned int)((total >> bits) & (PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_W - 1)); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, + const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_ull_to_bytes( + csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(pk + i * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, pk + i * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, + 0, PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_W - 1, pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, 0, lengths[i], pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_N, + lengths[i], PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_W - 1 - lengths[i], pub_seed, addr, + hash_state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/wots.h b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/wots.h new file mode 100644 index 00000000..d45383db --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-robust/clean/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_H +#define PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128SROBUST_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/META.yml b/crypto_sign/sphincs/sphincs-sha256-128s-simple/META.yml new file mode 100644 index 00000000..b05b52b6 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/META.yml @@ -0,0 +1,35 @@ +name: SPHINCS+ +type: signature +claimed-nist-level: 1 +length-public-key: 32 +length-secret-key: 64 +length-signature: 8080 +testvectors-sha256: edf1b76246ac560558d7938f8ac7bbf820f1e697ef4f5b5e1962f04fadb84a76 +nistkat-sha256: 8ae7a91b321cd18bd855710eea9d13deea1a53bb7858baee5f77d0237d1897eb +principal-submitters: + - Andreas Hülsing +auxiliary-submitters: + - Jean-Philippe Aumasson + - Daniel J. Bernstein, + - Christoph Dobraunig + - Maria Eichlseder + - Scott Fluhrer + - Stefan-Lukas Gazdag + - Panos Kampanakis + - Stefan Kölbl + - Tanja Lange + - Martin M. Lauridsen + - Florian Mendel + - Ruben Niederhagen + - Christian Rechberger + - Joost Rijneveld + - Peter Schwabe +implementations: + - name: clean + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + - name: avx2 + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + supported_platforms: + - architecture: x86_64 + required_flags: + - avx2 diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/LICENSE b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/Makefile.Microsoft_nmake new file mode 100644 index 00000000..ec4f2083 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-sha256-128s-simple_avx2.lib +OBJECTS=address.obj wots.obj utils.obj utilsx8.obj sha256avx.obj sha256x8.obj fors.obj sign.obj hash_sha256.obj thash_sha256_simple.obj hash_sha256x8.obj thash_sha256_simplex8.obj sha256.obj + +CFLAGS=/nologo /arch:AVX2 /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/address.c b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/address.c new file mode 100644 index 00000000..5e9437ea --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/address.h b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/address.h new file mode 100644 index 00000000..afc84c24 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDRESS_H +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/api.h b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/api.h new file mode 100644 index 00000000..f676864e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_API_H +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES 64 +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES 32 +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_CRYPTO_BYTES 8080 +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_CRYPTO_SEEDBYTES 48 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/fors.c b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/fors.c new file mode 100644 index 00000000..b99bd679 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/fors.c @@ -0,0 +1,240 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hashx8.h" +#include "thash.h" +#include "thashx8.h" +#include "utils.h" +#include "utilsx8.h" + +static void fors_gen_skx8(unsigned char *sk0, + unsigned char *sk1, + unsigned char *sk2, + unsigned char *sk3, + unsigned char *sk4, + unsigned char *sk5, + unsigned char *sk6, + unsigned char *sk7, const unsigned char *sk_seed, + uint32_t fors_leaf_addrx8[8 * 8]) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_prf_addrx8(sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7, + sk_seed, fors_leaf_addrx8); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thash_1(leaf, sk, pub_seed, fors_leaf_addr, state_seeded); +} + +static void fors_sk_to_leafx8(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + unsigned char *leaf4, + unsigned char *leaf5, + unsigned char *leaf6, + unsigned char *leaf7, + const unsigned char *sk0, + const unsigned char *sk1, + const unsigned char *sk2, + const unsigned char *sk3, + const unsigned char *sk4, + const unsigned char *sk5, + const unsigned char *sk6, + const unsigned char *sk7, + const unsigned char *pub_seed, + uint32_t fors_leaf_addrx8[8 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thashx8_1(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7, + pub_seed, fors_leaf_addrx8, state_seeded); +} + +static void fors_gen_leafx8(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + unsigned char *leaf4, + unsigned char *leaf5, + unsigned char *leaf6, + unsigned char *leaf7, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx0, + uint32_t addr_idx1, + uint32_t addr_idx2, + uint32_t addr_idx3, + uint32_t addr_idx4, + uint32_t addr_idx5, + uint32_t addr_idx6, + uint32_t addr_idx7, + const uint32_t fors_tree_addr[8], + const hash_state *state_seeded) { + uint32_t fors_leaf_addrx8[8 * 8] = {0}; + unsigned int j; + + /* Only copy the parts that must be kept in fors_leaf_addrx8. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_copy_keypair_addr(fors_leaf_addrx8 + j * 8, fors_tree_addr); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_type(fors_leaf_addrx8 + j * 8, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 0 * 8, addr_idx0); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 1 * 8, addr_idx1); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 2 * 8, addr_idx2); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 3 * 8, addr_idx3); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 4 * 8, addr_idx4); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 5 * 8, addr_idx5); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 6 * 8, addr_idx6); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 7 * 8, addr_idx7); + + fors_gen_skx8(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + sk_seed, fors_leaf_addrx8); + fors_sk_to_leafx8(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + pub_seed, fors_leaf_addrx8, state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + /* Round up to multiple of 8 to prevent out-of-bounds for x8 parallelism */ + uint32_t indices[(PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_TREES + 7) & ~7] = {0}; + unsigned char roots[((PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_TREES + 7) & ~7) * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N]; + /* Sign to a buffer, since we may not have a nice multiple of 8 and would + otherwise overrun the signature. */ + unsigned char sigbufx8[8 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N * (1 + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_HEIGHT)]; + uint32_t fors_tree_addrx8[8 * 8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset[8] = {0}; + unsigned int i, j; + + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_copy_keypair_addr(fors_tree_addrx8 + j * 8, fors_addr); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_type(fors_tree_addrx8 + j * 8, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < ((PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_TREES + 7) & ~0x7); i += 8) { + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_TREES) { + idx_offset[j] = (i + j) * (1 << PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_height(fors_tree_addrx8 + j * 8, 0); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_index(fors_tree_addrx8 + j * 8, + indices[i + j] + idx_offset[j]); + } + } + + /* Include the secret key part that produces the selected leaf nodes. */ + fors_gen_skx8(sigbufx8 + 0 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + sigbufx8 + 1 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + sigbufx8 + 2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + sigbufx8 + 3 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + sigbufx8 + 4 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + sigbufx8 + 5 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + sigbufx8 + 6 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + sigbufx8 + 7 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + sk_seed, fors_tree_addrx8); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_treehashx8_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, sigbufx8 + 8 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, sk_seed, pub_seed, + &indices[i], idx_offset, fors_gen_leafx8, fors_tree_addrx8, + state_seeded); + + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_TREES) { + memcpy(sig, sigbufx8 + j * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + memcpy(sig + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + sigbufx8 + 8 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + j * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_HEIGHT, + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_HEIGHT); + sig += PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N * (1 + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_HEIGHT); + } + } + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_fors_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_compute_root(roots + i * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, leaf, indices[i], idx_offset, + sig, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_HEIGHT, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/fors.h b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/fors.h new file mode 100644 index 00000000..f0bdc3b4 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_H +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/hash.h b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/hash.h new file mode 100644 index 00000000..36d143bd --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_HASH_H +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/hash_sha256.c b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/hash_sha256.c new file mode 100644 index 00000000..62625cd2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/hash_sha256.c @@ -0,0 +1,166 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "sha2.h" +#include "sha256.h" +#include "sha256x8.h" + +/** + * Initializes the hash function states + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_seed_state(&hash_state_seeded->x1, pub_seed); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_seed_statex8(&hash_state_seeded->x8, pub_seed); + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/** + * Cleans up the hash function states + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_destroy_hash_function(hash_state *hash_state_seeded) { + sha256_inc_ctx_release(&hash_state_seeded->x1); +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N bytes and an address + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_prf_addr(unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES]; + unsigned char outbuf[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_compress_address(buf + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, addr); + + sha256(outbuf, buf, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed as a key + * for HMAC, and an optional randomization value prefixed to the message. + * This requires m to have at least PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N space + * available in front of the pointer, i.e. before the message to use for the + * prefix. This is necessary to prevent having to move the message around (and + * allocate memory for it). + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + sha256ctx state; + int i; + + /* This implements HMAC-SHA256 */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N; i++) { + buf[i] = 0x36 ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, 0x36, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + + sha256_inc_init(&state); + sha256_inc_blocks(&state, buf, 1); + + memcpy(buf, optrand, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + + /* If optrand + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + mlen < PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES) { + memcpy(buf + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, m, mlen); + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES, &state, + buf, mlen + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(buf + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, m, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + sha256_inc_blocks(&state, buf, 1); + + m += PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N; + mlen -= PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N; + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES, &state, m, mlen); + } + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N; i++) { + buf[i] = 0x5c ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, 0x5c, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + + sha256(buf, buf, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + memcpy(R, buf, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_TREE_BITS (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_TREE_HEIGHT * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_D - 1)) +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_TREE_BYTES ((PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_LEAF_BITS PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_LEAF_BYTES ((PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_DGST_BYTES (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_TREE_BYTES + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_LEAF_BYTES) + + unsigned char seed[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES + 4]; + + /* Round to nearest multiple of PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_INBLOCKS (((PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_PK_BYTES + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES - 1) & \ + -PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES) / PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES) + unsigned char inbuf[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES]; + + unsigned char buf[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_DGST_BYTES]; + unsigned char *bufp = buf; + sha256ctx state; + + sha256_inc_init(&state); + + memcpy(inbuf, R, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + memcpy(inbuf + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, pk, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_PK_BYTES); + + /* If R + pk + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_PK_BYTES + mlen < PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES) { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_PK_BYTES, m, mlen); + sha256_inc_finalize(seed, &state, inbuf, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_PK_BYTES + mlen); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_PK_BYTES, m, + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N - PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_PK_BYTES); + sha256_inc_blocks(&state, inbuf, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_INBLOCKS); + + m += PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N - PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_PK_BYTES; + mlen -= PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N - PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_PK_BYTES; + sha256_inc_finalize(seed, &state, m, mlen); + } + + /* By doing this in two steps, we prevent hashing the message twice; + otherwise each iteration in MGF1 would hash the message again. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_mgf1(bufp, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_DGST_BYTES, seed, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_bytes_to_ull(bufp, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/hash_sha256x8.c b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/hash_sha256x8.c new file mode 100644 index 00000000..06a5c1e6 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/hash_sha256x8.c @@ -0,0 +1,61 @@ +#include +#include + +#include "address.h" +#include "hashx8.h" +#include "params.h" +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "utils.h" + +/* + * 8-way parallel version of prf_addr; takes 8x as much input and output + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_prf_addrx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *key, + const uint32_t addrx8[8 * 8]) { + unsigned char bufx8[8 * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES)]; + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + unsigned int j; + + for (j = 0; j < 8; j++) { + memcpy(bufx8 + j * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES), key, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_compress_address(bufx8 + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + j * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES), + addrx8 + j * 8); + } + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256x8(outbufx8 + 0 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + bufx8 + 0 * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 1 * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 2 * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 3 * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 4 * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 5 * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 6 * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 7 * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES), + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES); + + memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/hash_state.h b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/hash_state.h new file mode 100644 index 00000000..17e1e96b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/hash_state.h @@ -0,0 +1,33 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_HASH_STATE_H +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * We use a struct to differentiate between the x1 and x8 variants of SHA256. + */ + +#include "sha2.h" +#include "sha256avx.h" + +typedef struct { + sha256ctx x1; + sha256ctxx8 x8; +} hash_state; + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/hashx8.h b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/hashx8.h new file mode 100644 index 00000000..b35e898d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/hashx8.h @@ -0,0 +1,19 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_HASHX8_H +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_HASHX8_H + +#include + +#include "params.h" + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_prf_addrx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *key, + const uint32_t addrx8[8 * 8]); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/params.h b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/params.h new file mode 100644 index 00000000..e6b3b5bd --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_PARAMS_H +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N 16 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FULL_HEIGHT 64 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_D 8 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_HEIGHT 15 +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_TREES 10 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N / PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LEN (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LEN1 + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_BYTES (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_PK_BYTES PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_TREE_HEIGHT (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FULL_HEIGHT / PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_BYTES ((PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_PK_BYTES PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_BYTES (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_BYTES + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_D * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FULL_HEIGHT * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_PK_BYTES (2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SK_BYTES (2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/sha256.c b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/sha256.c new file mode 100644 index 00000000..b91a2eee --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/sha256.c @@ -0,0 +1,71 @@ +/* Based on the public domain implementation in + * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html + * by D. J. Bernstein */ + +#include +#include +#include + +#include "sha2.h" +#include "sha256.h" +#include "utils.h" + +/* + * Compresses an address to a 22-byte sequence. + * This reduces the number of required SHA256 compression calls, as the last + * block of input is padded with at least 65 bits. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_compress_address(unsigned char *out, const uint32_t addr[8]) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ull_to_bytes(out, 1, addr[0]); /* drop 3 bytes of the layer field */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ull_to_bytes(out + 1, 4, addr[2]); /* drop the highest tree address word */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ull_to_bytes(out + 5, 4, addr[3]); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ull_to_bytes(out + 9, 1, addr[4]); /* drop 3 bytes of the type field */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ull_to_bytes(out + 10, 4, addr[5]); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ull_to_bytes(out + 14, 4, addr[6]); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ull_to_bytes(out + 18, 4, addr[7]); +} + +/** + * Requires 'input_plus_four_bytes' to have 'inlen' + 4 bytes, so that the last + * four bytes can be used for the counter. Typically 'input' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen) { + unsigned char outbuf[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + unsigned long i; + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES <= outlen; i++) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(out, input_plus_four_bytes, inlen + 4); + out += PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + if (outlen > i * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(outbuf, input_plus_four_bytes, inlen + 4); + memcpy(out, outbuf, outlen - i * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + } +} + + +/** + * Absorb the constant pub_seed using one round of the compression function + * This initializes hash_state_seeded, which can then be reused in thash + **/ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N; i < PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + sha256_inc_init(hash_state_seeded); + sha256_inc_blocks(hash_state_seeded, block, 1); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/sha256.h b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/sha256.h new file mode 100644 index 00000000..8d130b0e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/sha256.h @@ -0,0 +1,21 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_H +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_H + +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES 22 + +#include +#include + +#include "sha2.h" + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_compress_address(unsigned char *out, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/sha256avx.c b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/sha256avx.c new file mode 100644 index 00000000..485a7d7e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/sha256avx.c @@ -0,0 +1,296 @@ +#include +#include +#include + +#include "sha256avx.h" + +// Transpose 8 vectors containing 32-bit values +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_transpose(u256 s[8]) { + u256 tmp0[8]; + u256 tmp1[8]; + tmp0[0] = _mm256_unpacklo_epi32(s[0], s[1]); + tmp0[1] = _mm256_unpackhi_epi32(s[0], s[1]); + tmp0[2] = _mm256_unpacklo_epi32(s[2], s[3]); + tmp0[3] = _mm256_unpackhi_epi32(s[2], s[3]); + tmp0[4] = _mm256_unpacklo_epi32(s[4], s[5]); + tmp0[5] = _mm256_unpackhi_epi32(s[4], s[5]); + tmp0[6] = _mm256_unpacklo_epi32(s[6], s[7]); + tmp0[7] = _mm256_unpackhi_epi32(s[6], s[7]); + tmp1[0] = _mm256_unpacklo_epi64(tmp0[0], tmp0[2]); + tmp1[1] = _mm256_unpackhi_epi64(tmp0[0], tmp0[2]); + tmp1[2] = _mm256_unpacklo_epi64(tmp0[1], tmp0[3]); + tmp1[3] = _mm256_unpackhi_epi64(tmp0[1], tmp0[3]); + tmp1[4] = _mm256_unpacklo_epi64(tmp0[4], tmp0[6]); + tmp1[5] = _mm256_unpackhi_epi64(tmp0[4], tmp0[6]); + tmp1[6] = _mm256_unpacklo_epi64(tmp0[5], tmp0[7]); + tmp1[7] = _mm256_unpackhi_epi64(tmp0[5], tmp0[7]); + s[0] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x20); + s[1] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x20); + s[2] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x20); + s[3] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x20); + s[4] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x31); + s[5] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x31); + s[6] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x31); + s[7] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x31); +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256_clone_statex8(sha256ctxx8 *outctx, const sha256ctxx8 *inctx) { + memcpy(outctx, inctx, sizeof(sha256ctxx8)); +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256_init8x(sha256ctxx8 *ctx) { + ctx->s[0] = _mm256_set_epi32((int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667); + ctx->s[1] = _mm256_set_epi32((int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85); + ctx->s[2] = _mm256_set_epi32((int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372); + ctx->s[3] = _mm256_set_epi32((int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a); + ctx->s[4] = _mm256_set_epi32((int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f); + ctx->s[5] = _mm256_set_epi32((int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c); + ctx->s[6] = _mm256_set_epi32((int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab); + ctx->s[7] = _mm256_set_epi32((int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19); + + ctx->datalen = 0; + ctx->msglen = 0; +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256_update8x(sha256ctxx8 *ctx, + const unsigned char *d0, + const unsigned char *d1, + const unsigned char *d2, + const unsigned char *d3, + const unsigned char *d4, + const unsigned char *d5, + const unsigned char *d6, + const unsigned char *d7, + unsigned long long len) { + size_t i = 0; + size_t bytes_to_copy; + + while (i < len) { + bytes_to_copy = (size_t)len - i; + if (bytes_to_copy > 64) { + bytes_to_copy = 64; + } + memcpy(&ctx->msgblocks[64 * 0], d0 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 1], d1 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 2], d2 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 3], d3 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 4], d4 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 5], d5 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 6], d6 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 7], d7 + i, bytes_to_copy); + ctx->datalen += (unsigned int)bytes_to_copy; + i += bytes_to_copy; + if (ctx->datalen == 64) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + ctx->msglen += 512; + ctx->datalen = 0; + } + } +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256_final8x(sha256ctxx8 *ctx, + unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7) { + unsigned int i, curlen; + + // Padding + if (ctx->datalen < 56) { + for (i = 0; i < 8; ++i) { + curlen = ctx->datalen; + ctx->msgblocks[64 * i + curlen++] = 0x80; + while (curlen < 64) { + ctx->msgblocks[64 * i + curlen++] = 0x00; + } + } + } else { + for (i = 0; i < 8; ++i) { + curlen = ctx->datalen; + ctx->msgblocks[64 * i + curlen++] = 0x80; + while (curlen < 64) { + ctx->msgblocks[64 * i + curlen++] = 0x00; + } + } + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + memset(ctx->msgblocks, 0, 8 * 64); + } + + // Add length of the message to each block + ctx->msglen += ctx->datalen * 8; + for (i = 0; i < 8; i++) { + ctx->msgblocks[64 * i + 63] = (unsigned char)ctx->msglen; + ctx->msgblocks[64 * i + 62] = (unsigned char)(ctx->msglen >> 8); + ctx->msgblocks[64 * i + 61] = (unsigned char)(ctx->msglen >> 16); + ctx->msgblocks[64 * i + 60] = (unsigned char)(ctx->msglen >> 24); + ctx->msgblocks[64 * i + 59] = (unsigned char)(ctx->msglen >> 32); + ctx->msgblocks[64 * i + 58] = (unsigned char)(ctx->msglen >> 40); + ctx->msgblocks[64 * i + 57] = (unsigned char)(ctx->msglen >> 48); + ctx->msgblocks[64 * i + 56] = (unsigned char)(ctx->msglen >> 56); + } + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + + // Compute final hash output + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_transpose(ctx->s); + + // Store Hash value + STORE(out0, BYTESWAP(ctx->s[0])); + STORE(out1, BYTESWAP(ctx->s[1])); + STORE(out2, BYTESWAP(ctx->s[2])); + STORE(out3, BYTESWAP(ctx->s[3])); + STORE(out4, BYTESWAP(ctx->s[4])); + STORE(out5, BYTESWAP(ctx->s[5])); + STORE(out6, BYTESWAP(ctx->s[6])); + STORE(out7, BYTESWAP(ctx->s[7])); +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256_transform8x(sha256ctxx8 *ctx, const unsigned char *data) { + u256 s[8], w[64], T0, T1; + int i; + + // Load words and transform data correctly + for (i = 0; i < 8; i++) { + w[i] = BYTESWAP(LOAD(data + 64 * i)); + w[i + 8] = BYTESWAP(LOAD(data + 32 + 64 * i)); + } + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_transpose(w); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_transpose(w + 8); + + // Initial State + s[0] = ctx->s[0]; + s[1] = ctx->s[1]; + s[2] = ctx->s[2]; + s[3] = ctx->s[3]; + s[4] = ctx->s[4]; + s[5] = ctx->s[5]; + s[6] = ctx->s[6]; + s[7] = ctx->s[7]; + + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 0, w[0]); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 1, w[1]); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 2, w[2]); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 3, w[3]); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 4, w[4]); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 5, w[5]); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 6, w[6]); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 7, w[7]); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 8, w[8]); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 9, w[9]); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 10, w[10]); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 11, w[11]); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 12, w[12]); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 13, w[13]); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 14, w[14]); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 15, w[15]); + w[16] = ADD4_32(WSIGMA1_AVX(w[14]), w[0], w[9], WSIGMA0_AVX(w[1])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 16, w[16]); + w[17] = ADD4_32(WSIGMA1_AVX(w[15]), w[1], w[10], WSIGMA0_AVX(w[2])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 17, w[17]); + w[18] = ADD4_32(WSIGMA1_AVX(w[16]), w[2], w[11], WSIGMA0_AVX(w[3])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 18, w[18]); + w[19] = ADD4_32(WSIGMA1_AVX(w[17]), w[3], w[12], WSIGMA0_AVX(w[4])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 19, w[19]); + w[20] = ADD4_32(WSIGMA1_AVX(w[18]), w[4], w[13], WSIGMA0_AVX(w[5])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 20, w[20]); + w[21] = ADD4_32(WSIGMA1_AVX(w[19]), w[5], w[14], WSIGMA0_AVX(w[6])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 21, w[21]); + w[22] = ADD4_32(WSIGMA1_AVX(w[20]), w[6], w[15], WSIGMA0_AVX(w[7])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 22, w[22]); + w[23] = ADD4_32(WSIGMA1_AVX(w[21]), w[7], w[16], WSIGMA0_AVX(w[8])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 23, w[23]); + w[24] = ADD4_32(WSIGMA1_AVX(w[22]), w[8], w[17], WSIGMA0_AVX(w[9])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 24, w[24]); + w[25] = ADD4_32(WSIGMA1_AVX(w[23]), w[9], w[18], WSIGMA0_AVX(w[10])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 25, w[25]); + w[26] = ADD4_32(WSIGMA1_AVX(w[24]), w[10], w[19], WSIGMA0_AVX(w[11])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 26, w[26]); + w[27] = ADD4_32(WSIGMA1_AVX(w[25]), w[11], w[20], WSIGMA0_AVX(w[12])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 27, w[27]); + w[28] = ADD4_32(WSIGMA1_AVX(w[26]), w[12], w[21], WSIGMA0_AVX(w[13])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 28, w[28]); + w[29] = ADD4_32(WSIGMA1_AVX(w[27]), w[13], w[22], WSIGMA0_AVX(w[14])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 29, w[29]); + w[30] = ADD4_32(WSIGMA1_AVX(w[28]), w[14], w[23], WSIGMA0_AVX(w[15])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 30, w[30]); + w[31] = ADD4_32(WSIGMA1_AVX(w[29]), w[15], w[24], WSIGMA0_AVX(w[16])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 31, w[31]); + w[32] = ADD4_32(WSIGMA1_AVX(w[30]), w[16], w[25], WSIGMA0_AVX(w[17])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 32, w[32]); + w[33] = ADD4_32(WSIGMA1_AVX(w[31]), w[17], w[26], WSIGMA0_AVX(w[18])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 33, w[33]); + w[34] = ADD4_32(WSIGMA1_AVX(w[32]), w[18], w[27], WSIGMA0_AVX(w[19])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 34, w[34]); + w[35] = ADD4_32(WSIGMA1_AVX(w[33]), w[19], w[28], WSIGMA0_AVX(w[20])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 35, w[35]); + w[36] = ADD4_32(WSIGMA1_AVX(w[34]), w[20], w[29], WSIGMA0_AVX(w[21])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 36, w[36]); + w[37] = ADD4_32(WSIGMA1_AVX(w[35]), w[21], w[30], WSIGMA0_AVX(w[22])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 37, w[37]); + w[38] = ADD4_32(WSIGMA1_AVX(w[36]), w[22], w[31], WSIGMA0_AVX(w[23])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 38, w[38]); + w[39] = ADD4_32(WSIGMA1_AVX(w[37]), w[23], w[32], WSIGMA0_AVX(w[24])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 39, w[39]); + w[40] = ADD4_32(WSIGMA1_AVX(w[38]), w[24], w[33], WSIGMA0_AVX(w[25])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 40, w[40]); + w[41] = ADD4_32(WSIGMA1_AVX(w[39]), w[25], w[34], WSIGMA0_AVX(w[26])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 41, w[41]); + w[42] = ADD4_32(WSIGMA1_AVX(w[40]), w[26], w[35], WSIGMA0_AVX(w[27])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 42, w[42]); + w[43] = ADD4_32(WSIGMA1_AVX(w[41]), w[27], w[36], WSIGMA0_AVX(w[28])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 43, w[43]); + w[44] = ADD4_32(WSIGMA1_AVX(w[42]), w[28], w[37], WSIGMA0_AVX(w[29])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 44, w[44]); + w[45] = ADD4_32(WSIGMA1_AVX(w[43]), w[29], w[38], WSIGMA0_AVX(w[30])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 45, w[45]); + w[46] = ADD4_32(WSIGMA1_AVX(w[44]), w[30], w[39], WSIGMA0_AVX(w[31])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 46, w[46]); + w[47] = ADD4_32(WSIGMA1_AVX(w[45]), w[31], w[40], WSIGMA0_AVX(w[32])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 47, w[47]); + w[48] = ADD4_32(WSIGMA1_AVX(w[46]), w[32], w[41], WSIGMA0_AVX(w[33])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 48, w[48]); + w[49] = ADD4_32(WSIGMA1_AVX(w[47]), w[33], w[42], WSIGMA0_AVX(w[34])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 49, w[49]); + w[50] = ADD4_32(WSIGMA1_AVX(w[48]), w[34], w[43], WSIGMA0_AVX(w[35])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 50, w[50]); + w[51] = ADD4_32(WSIGMA1_AVX(w[49]), w[35], w[44], WSIGMA0_AVX(w[36])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 51, w[51]); + w[52] = ADD4_32(WSIGMA1_AVX(w[50]), w[36], w[45], WSIGMA0_AVX(w[37])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 52, w[52]); + w[53] = ADD4_32(WSIGMA1_AVX(w[51]), w[37], w[46], WSIGMA0_AVX(w[38])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 53, w[53]); + w[54] = ADD4_32(WSIGMA1_AVX(w[52]), w[38], w[47], WSIGMA0_AVX(w[39])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 54, w[54]); + w[55] = ADD4_32(WSIGMA1_AVX(w[53]), w[39], w[48], WSIGMA0_AVX(w[40])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 55, w[55]); + w[56] = ADD4_32(WSIGMA1_AVX(w[54]), w[40], w[49], WSIGMA0_AVX(w[41])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 56, w[56]); + w[57] = ADD4_32(WSIGMA1_AVX(w[55]), w[41], w[50], WSIGMA0_AVX(w[42])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 57, w[57]); + w[58] = ADD4_32(WSIGMA1_AVX(w[56]), w[42], w[51], WSIGMA0_AVX(w[43])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 58, w[58]); + w[59] = ADD4_32(WSIGMA1_AVX(w[57]), w[43], w[52], WSIGMA0_AVX(w[44])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 59, w[59]); + w[60] = ADD4_32(WSIGMA1_AVX(w[58]), w[44], w[53], WSIGMA0_AVX(w[45])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 60, w[60]); + w[61] = ADD4_32(WSIGMA1_AVX(w[59]), w[45], w[54], WSIGMA0_AVX(w[46])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 61, w[61]); + w[62] = ADD4_32(WSIGMA1_AVX(w[60]), w[46], w[55], WSIGMA0_AVX(w[47])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 62, w[62]); + w[63] = ADD4_32(WSIGMA1_AVX(w[61]), w[47], w[56], WSIGMA0_AVX(w[48])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 63, w[63]); + + // Feed Forward + ctx->s[0] = ADD32(s[0], ctx->s[0]); + ctx->s[1] = ADD32(s[1], ctx->s[1]); + ctx->s[2] = ADD32(s[2], ctx->s[2]); + ctx->s[3] = ADD32(s[3], ctx->s[3]); + ctx->s[4] = ADD32(s[4], ctx->s[4]); + ctx->s[5] = ADD32(s[5], ctx->s[5]); + ctx->s[6] = ADD32(s[6], ctx->s[6]); + ctx->s[7] = ADD32(s[7], ctx->s[7]); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/sha256avx.h b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/sha256avx.h new file mode 100644 index 00000000..e3acbfc2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/sha256avx.h @@ -0,0 +1,103 @@ +#ifndef SHA256AVX_H +#define SHA256AVX_H + +#include +#include + +static const unsigned int RC[] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +#define u32 uint32_t +#define u256 __m256i + +#define XOR _mm256_xor_si256 +#define OR _mm256_or_si256 +#define AND _mm256_and_si256 +#define ADD32 _mm256_add_epi32 +#define NOT(x) _mm256_xor_si256(x, _mm256_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1)) + +#define LOAD(src) _mm256_loadu_si256((__m256i *)(src)) +#define STORE(dest,src) _mm256_storeu_si256((__m256i *)(dest),src) + +#define BYTESWAP(x) _mm256_shuffle_epi8(x, _mm256_set_epi8(0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3,0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3)) + +#define SHIFTR32(x, y) _mm256_srli_epi32(x, y) +#define SHIFTL32(x, y) _mm256_slli_epi32(x, y) + +#define ROTR32(x, y) OR(SHIFTR32(x, y), SHIFTL32(x, 32 - (y))) +#define ROTL32(x, y) OR(SHIFTL32(x, y), SHIFTR32(x, 32 - (y))) + +#define XOR3(a, b, c) XOR(XOR(a, b), c) + +#define ADD3_32(a, b, c) ADD32(ADD32(a, b), c) +#define ADD4_32(a, b, c, d) ADD32(ADD32(ADD32(a, b), c), d) +#define ADD5_32(a, b, c, d, e) ADD32(ADD32(ADD32(ADD32(a, b), c), d), e) + +#define MAJ_AVX(a, b, c) XOR3(AND(a, b), AND(a, c), AND(b, c)) +#define CH_AVX(a, b, c) XOR(AND(a, b), AND(NOT(a), c)) + +#define SIGMA1_AVX(x) XOR3(ROTR32(x, 6), ROTR32(x, 11), ROTR32(x, 25)) +#define SIGMA0_AVX(x) XOR3(ROTR32(x, 2), ROTR32(x, 13), ROTR32(x, 22)) + +#define WSIGMA1_AVX(x) XOR3(ROTR32(x, 17), ROTR32(x, 19), SHIFTR32(x, 10)) +#define WSIGMA0_AVX(x) XOR3(ROTR32(x, 7), ROTR32(x, 18), SHIFTR32(x, 3)) + +#define SHA256ROUND_AVX(a, b, c, d, e, f, g, h, rc, w) \ + T0 = ADD5_32(h, SIGMA1_AVX(e), CH_AVX(e, f, g), _mm256_set1_epi32((int)RC[rc]), w); \ + (d) = ADD32(d, T0); \ + T1 = ADD32(SIGMA0_AVX(a), MAJ_AVX(a, b, c)); \ + (h) = ADD32(T0, T1); + +typedef struct SHA256state { + u256 s[8]; + uint8_t msgblocks[8 * 64]; + unsigned int datalen; + uint64_t msglen; +} sha256ctxx8; + + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_transpose(u256 s[8]); +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256_init_frombytes_x8(sha256ctxx8 *ctx, const uint8_t *s, unsigned long long msglen); +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256_init8x(sha256ctxx8 *ctx); +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256_update8x(sha256ctxx8 *ctx, + const unsigned char *d0, + const unsigned char *d1, + const unsigned char *d2, + const unsigned char *d3, + const unsigned char *d4, + const unsigned char *d5, + const unsigned char *d6, + const unsigned char *d7, + unsigned long long len); +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256_final8x(sha256ctxx8 *ctx, + unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256_transform8x(sha256ctxx8 *ctx, const unsigned char *data); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256_clone_statex8(sha256ctxx8 *outctx, const sha256ctxx8 *inctx); + + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/sha256x8.c b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/sha256x8.c new file mode 100644 index 00000000..23fc7e52 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/sha256x8.c @@ -0,0 +1,128 @@ +#include + +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_seed_statex8(sha256ctxx8 *ctx, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N; i < PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256_init8x(ctx); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256_update8x(ctx, block, block, block, block, block, block, block, block, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES); + +} + +/* This provides a wrapper around the internals of 8x parallel SHA256 */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256x8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned long long inlen) { + sha256ctxx8 ctx; + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256_init8x(&ctx); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256_update8x(&ctx, in0, in1, in2, in3, in4, in5, in6, in7, inlen); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256_final8x(&ctx, out0, out1, out2, out3, out4, out5, out6, out7); +} + +/** + * Note that inlen should be sufficiently small that it still allows for + * an array to be allocated on the stack. Typically 'in' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_mgf1x8( + unsigned char *outx8, + unsigned long outlen, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, + unsigned long inlen) { + unsigned char inbufx8[8 * ((PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES) + 4)]; + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + unsigned long i; + unsigned int j; + + memcpy(inbufx8 + 0 * (inlen + 4), in0, inlen); + memcpy(inbufx8 + 1 * (inlen + 4), in1, inlen); + memcpy(inbufx8 + 2 * (inlen + 4), in2, inlen); + memcpy(inbufx8 + 3 * (inlen + 4), in3, inlen); + memcpy(inbufx8 + 4 * (inlen + 4), in4, inlen); + memcpy(inbufx8 + 5 * (inlen + 4), in5, inlen); + memcpy(inbufx8 + 6 * (inlen + 4), in6, inlen); + memcpy(inbufx8 + 7 * (inlen + 4), in7, inlen); + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES <= outlen; i++) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ull_to_bytes(inbufx8 + inlen + j * (inlen + 4), 4, i); + } + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256x8(outx8 + 0 * outlen, + outx8 + 1 * outlen, + outx8 + 2 * outlen, + outx8 + 3 * outlen, + outx8 + 4 * outlen, + outx8 + 5 * outlen, + outx8 + 6 * outlen, + outx8 + 7 * outlen, + inbufx8 + 0 * (inlen + 4), + inbufx8 + 1 * (inlen + 4), + inbufx8 + 2 * (inlen + 4), + inbufx8 + 3 * (inlen + 4), + inbufx8 + 4 * (inlen + 4), + inbufx8 + 5 * (inlen + 4), + inbufx8 + 6 * (inlen + 4), + inbufx8 + 7 * (inlen + 4), inlen + 4); + outx8 += PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ull_to_bytes(inbufx8 + inlen + j * (inlen + 4), 4, i); + } + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256x8(outbufx8 + 0 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + inbufx8 + 0 * (inlen + 4), + inbufx8 + 1 * (inlen + 4), + inbufx8 + 2 * (inlen + 4), + inbufx8 + 3 * (inlen + 4), + inbufx8 + 4 * (inlen + 4), + inbufx8 + 5 * (inlen + 4), + inbufx8 + 6 * (inlen + 4), + inbufx8 + 7 * (inlen + 4), inlen + 4); + + for (j = 0; j < 8; j++) { + memcpy(outx8 + j * outlen, + outbufx8 + j * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outlen - i * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/sha256x8.h b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/sha256x8.h new file mode 100644 index 00000000..05763581 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/sha256x8.h @@ -0,0 +1,44 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256X8_H +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256X8_H + +#include "sha256avx.h" + +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N */ + +/* This provides a wrapper around the internals of 8x parallel SHA256 */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256x8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned long long inlen); + +/** + * Note that inlen should be sufficiently small that it still allows for + * an array to be allocated on the stack. Typically 'in' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_mgf1x8(unsigned char *outx8, unsigned long outlen, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, + unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_seed_statex8(sha256ctxx8 *ctx, const unsigned char *pub_seed); +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/sign.c b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/sign.c new file mode 100644 index 00000000..d0bf4e3a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_D - 1); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, sk + 3 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N; + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_D; i++) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_BYTES; + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N; + unsigned char mhash[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_D; i++) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_BYTES; + + if (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_BYTES, sm + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/thash.h b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/thash.h new file mode 100644 index 00000000..0b9eb1c1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_THASH_H +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/thash_sha256_simple.c b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/thash_sha256_simple.c new file mode 100644 index 00000000..df280608 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/thash_sha256_simple.c @@ -0,0 +1,75 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "sha2.h" +#include "sha256.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N bytes. + */ +static void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char outbuf[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + sha256ctx sha2_state; + + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + + /* Retrieve precomputed state containing pub_seed */ + sha256_inc_ctx_clone(&sha2_state, &hash_state_seeded->x1); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_compress_address(buf, addr); + memcpy(buf + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES, in, inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + + sha256_inc_finalize(outbuf, &sha2_state, buf, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thash( + out, buf, in, 1, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thash( + out, buf, in, 2, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LEN, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_TREES, pub_seed, addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/thash_sha256_simplex8.c b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/thash_sha256_simplex8.c new file mode 100644 index 00000000..4bdb9209 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/thash_sha256_simplex8.c @@ -0,0 +1,129 @@ +#include +#include + +#include "address.h" +#include "hash_state.h" +#include "params.h" +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "thashx8.h" +#include "utils.h" + +/** + * 8-way parallel version of thash; takes 8x as much input and output + */ +static void thashx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addrx8[8 * 8], + uint8_t *bufx8, + const hash_state *state_seeded) { + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + unsigned int i; + sha256ctxx8 ctx; + + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256_clone_statex8(&ctx, &state_seeded->x8); + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_compress_address(bufx8 + i * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N), + addrx8 + i * 8); + } + + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + + 0 * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N), in0, inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + + 1 * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N), in1, inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + + 2 * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N), in2, inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + + 3 * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N), in3, inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + + 4 * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N), in4, inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + + 5 * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N), in5, inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + + 6 * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N), in6, inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + + 7 * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N), in7, inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256_update8x(&ctx, + bufx8 + 0 * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N), + bufx8 + 1 * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N), + bufx8 + 2 * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N), + bufx8 + 3 * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N), + bufx8 + 4 * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N), + bufx8 + 5 * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N), + bufx8 + 6 * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N), + bufx8 + 7 * (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N), + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256_final8x(&ctx, + outbufx8 + 0 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + + memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); +} + +#define thashx8_variant_impl(name, size) \ + void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thashx8_##name(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + unsigned char *out4, \ + unsigned char *out5, \ + unsigned char *out6, \ + unsigned char *out7, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *in4, \ + const unsigned char *in5, \ + const unsigned char *in6, \ + const unsigned char *in7, \ + const unsigned char *pub_seed, \ + uint32_t addrx8[8*8], \ + const hash_state *state_seeded) \ + { \ + const unsigned int inblocks = (size); \ + uint8_t bufx8[8*(PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_SHA256_ADDR_BYTES + (size)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N)]; \ + thashx8(out0, out1, out2, out3, out4, out5, out6, out7, \ + in0, in1, in2, in3, in4, in5, in6, in7, inblocks, \ + pub_seed, addrx8, bufx8, state_seeded); \ + } + +thashx8_variant_impl(1, 1) +thashx8_variant_impl(2, 2) +thashx8_variant_impl(WOTS_LEN, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LEN) +thashx8_variant_impl(FORS_TREES, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_TREES) + +#undef thashx8_variant_impl diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/thashx8.h b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/thashx8.h new file mode 100644 index 00000000..3fa8f7bd --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/thashx8.h @@ -0,0 +1,39 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_THASHX8_H +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_THASHX8_H + +#include + +#include "hash_state.h" +#include "sha256avx.h" + + +#define thashx8_variant(name) \ + void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thashx8_##name( \ + unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + unsigned char *out4, \ + unsigned char *out5, \ + unsigned char *out6, \ + unsigned char *out7, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *in4, \ + const unsigned char *in5, \ + const unsigned char *in6, \ + const unsigned char *in7, \ + const unsigned char *pub_seed, \ + uint32_t addrx8[8*8], \ + const hash_state *state_seeded) + + +thashx8_variant(1); +thashx8_variant(2); +thashx8_variant(WOTS_LEN); +thashx8_variant(FORS_TREES); + +#undef thashx8_variant +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/utils.c b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/utils.c new file mode 100644 index 00000000..0e299f43 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, leaf, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + memcpy(buffer + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, auth_path, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thash_2( + buffer + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + } else { + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, auth_path, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/utils.h b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/utils.h new file mode 100644 index 00000000..b6eac4e2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_UTILS_H +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/utilsx8.c b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/utilsx8.c new file mode 100644 index 00000000..94ffe293 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/utilsx8.c @@ -0,0 +1,172 @@ +#include + +#include "address.h" +#include "params.h" +#include "thashx8.h" +#include "utils.h" + +#include "utilsx8.h" + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void treehashx8(unsigned char *rootx8, unsigned char *auth_pathx8, + unsigned char *stackx8, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t leaf_idx[8], uint32_t idx_offset[8], + uint32_t tree_height, + void (*gen_leafx8)( + unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + unsigned char * /* leaf4 */, + unsigned char * /* leaf5 */, + unsigned char * /* leaf6 */, + unsigned char * /* leaf7 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + uint32_t /* addr_idx4 */, + uint32_t /* addr_idx5 */, + uint32_t /* addr_idx6 */, + uint32_t /* addr_idx7 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx8[8 * 8], + const hash_state *state_seeded) { + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + unsigned int j; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leafx8(stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + sk_seed, pub_seed, + idx + idx_offset[0], + idx + idx_offset[1], + idx + idx_offset[2], + idx + idx_offset[3], + idx + idx_offset[4], + idx + idx_offset[5], + idx + idx_offset[6], + idx + idx_offset[7], + tree_addrx8, + state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + for (j = 0; j < 8; j++) { + if ((leaf_idx[j] ^ 0x1) == idx) { + memcpy(auth_pathx8 + j * tree_height * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + (offset - 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + } + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_height(tree_addrx8 + j * 8, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_tree_index(tree_addrx8 + j * 8, + tree_idx + (idx_offset[j] >> (heights[offset - 1] + 1))); + } + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thashx8_2(stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + pub_seed, tree_addrx8, state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + for (j = 0; j < 8; j++) { + if (((leaf_idx[j] >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_pathx8 + j * tree_height * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + heights[offset - 1]*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N + (offset - 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + } + } + } + } + + for (j = 0; j < 8; j++) { + memcpy(rootx8 + j * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + } +} + +/* The wrappers below ensure we used fixed-size buffers on the stack (no VLAs) */ + + +#define treehashx8_variant(name, size) \ + void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_treehashx8_##name( \ + unsigned char *rootx8, unsigned char *auth_pathx8, \ + const unsigned char *sk_seed, const unsigned char *pub_seed, \ + const uint32_t leaf_idx[8], uint32_t idx_offset[8], \ + void (*gen_leafx8)( \ + unsigned char* /* leaf0 */, \ + unsigned char* /* leaf1 */, \ + unsigned char* /* leaf2 */, \ + unsigned char* /* leaf3 */, \ + unsigned char* /* leaf4 */, \ + unsigned char* /* leaf5 */, \ + unsigned char* /* leaf6 */, \ + unsigned char* /* leaf7 */, \ + const unsigned char* /* sk_seed */, \ + const unsigned char* /* pub_seed */, \ + uint32_t /* addr_idx0 */, \ + uint32_t /* addr_idx1 */, \ + uint32_t /* addr_idx2 */, \ + uint32_t /* addr_idx3 */, \ + uint32_t /* addr_idx4 */, \ + uint32_t /* addr_idx5 */, \ + uint32_t /* addr_idx6 */, \ + uint32_t /* addr_idx7 */, \ + const uint32_t[8] /* tree_addr */, \ + const hash_state* /* state_seeded */), \ + uint32_t tree_addrx8[8*8], \ + const hash_state *state_seeded) \ + { \ + const uint32_t tree_height = (size); \ + unsigned char stackx8[8*((size) + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N]; \ + unsigned int heights[(size) + 1]; \ + treehashx8(rootx8, auth_pathx8, stackx8, heights, sk_seed, pub_seed, \ + leaf_idx, idx_offset, tree_height, gen_leafx8, tree_addrx8, state_seeded); \ + } + +treehashx8_variant(FORS_HEIGHT, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_FORS_HEIGHT) + +#undef treehashx8_variant diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/utilsx8.h b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/utilsx8.h new file mode 100644 index 00000000..6f8a4212 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/utilsx8.h @@ -0,0 +1,46 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_UTILSX8_H +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_UTILSX8_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_treehashx8_FORS_HEIGHT( + unsigned char *rootx8, unsigned char *auth_pathx8, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t leaf_idx[8], uint32_t idx_offset[8], + void (*gen_leafx8)( + unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + unsigned char * /* leaf4 */, + unsigned char * /* leaf5 */, + unsigned char * /* leaf6 */, + unsigned char * /* leaf7 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + uint32_t /* addr_idx4 */, + uint32_t /* addr_idx5 */, + uint32_t /* addr_idx6 */, + uint32_t /* addr_idx7 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx8[8 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/wots.c b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/wots.c new file mode 100644 index 00000000..40ad0128 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/wots.c @@ -0,0 +1,240 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "hashx8.h" +#include "params.h" +#include "thash.h" +#include "thashx8.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], const hash_state *state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_prf_addr(sk, sk_seed, wots_addr, state_seeded); +} + +/** + * 8-way parallel version of wots_gen_sk; expects 8x as much space in sk + */ +static void wots_gen_skx8(unsigned char *skx8, const unsigned char *sk_seed, + uint32_t wots_addrx8[8 * 8]) { + unsigned int j; + + /* Make sure that the hash address is actually zeroed. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_hash_addr(wots_addrx8 + j * 8, 0); + } + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_prf_addrx8(skx8 + 0 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + skx8 + 1 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + skx8 + 2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + skx8 + 3 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + skx8 + 4 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + skx8 + 5 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + skx8 + 6 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + skx8 + 7 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + sk_seed, wots_addrx8); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_W; i++) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thash_1(out, out, pub_seed, addr, state_seeded); + } +} + +/** + * 8-way parallel version of gen_chain; expects 8x as much space in out, and + * 8x as much space in inx8. Assumes start and step identical across chains. + */ +static void gen_chainx8(unsigned char *outx8, const unsigned char *inx8, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addrx8[8 * 8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + /* Initialize outx8 with the value at position 'start'. */ + memcpy(outx8, inx8, 8 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_W; i++) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_hash_addr(addrx8 + j * 8, i); + } + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_thashx8_1(outx8 + 0 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + outx8 + 1 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + outx8 + 2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + outx8 + 3 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + outx8 + 4 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + outx8 + 5 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + outx8 + 6 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + outx8 + 7 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + outx8 + 0 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + outx8 + 1 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + outx8 + 2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + outx8 + 3 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + outx8 + 4 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + outx8 + 5 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + outx8 + 6 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + outx8 + 7 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + pub_seed, addrx8, state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { + int in = 0; + int out = 0; + unsigned char total = 0; + int bits = 0; + int consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LOGW; + output[out] = (unsigned int)(total >> bits) & (PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_W - 1); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_wots_gen_pk(unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + uint32_t addrx8[8 * 8]; + unsigned char pkbuf[8 * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N]; + + for (j = 0; j < 8; j++) { + memcpy(addrx8 + j * 8, addr, sizeof(uint32_t) * 8); + } + + /* The last iteration typically does not have complete set of 4 chains, + but because we use pkbuf, this is not an issue -- we still do as many + in parallel as possible. */ + for (i = 0; i < ((PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LEN + 7) & ~0x7); i += 8) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_chain_addr(addrx8 + j * 8, i + j); + } + wots_gen_skx8(pkbuf, sk_seed, addrx8); + gen_chainx8(pkbuf, pkbuf, 0, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_W - 1, pub_seed, addrx8, state_seeded); + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LEN) { + memcpy(pk + (i + j)*PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, pkbuf + j * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N); + } + } + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_wots_sign(unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, sk_seed, addr, state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, sig + i * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, 0, lengths[i], pub_seed, addr, state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_wots_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, sig + i * PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_N, + lengths[i], PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_W - 1 - lengths[i], pub_seed, addr, state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/wots.h b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/wots.h new file mode 100644 index 00000000..8946e815 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/avx2/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_H +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/LICENSE b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..1a81c61f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-sha256-128s-simple_clean.lib +OBJECTS=address.obj wots.obj utils.obj fors.obj sign.obj hash_sha256.obj thash_sha256_simple.obj sha256.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/address.c b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/address.c new file mode 100644 index 00000000..f58ec0ca --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/address.h b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/address.h new file mode 100644 index 00000000..265cb76f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ADDRESS_H +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/api.h b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/api.h new file mode 100644 index 00000000..5f39d55c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_API_H +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_CRYPTO_SECRETKEYBYTES 64 +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_CRYPTO_PUBLICKEYBYTES 32 +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_CRYPTO_BYTES 8080 +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_CRYPTO_SEEDBYTES 48 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/fors.c b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/fors.c new file mode 100644 index 00000000..35a50fb3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/fors.c @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "thash.h" +#include "utils.h" + +static void fors_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t fors_leaf_addr[8], const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_prf_addr( + sk, sk_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_thash_1( + leaf, sk, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t fors_tree_addr[8], + const hash_state *hash_state_seeded) { + uint32_t fors_leaf_addr[8] = {0}; + + /* Only copy the parts that must be kept in fors_leaf_addr. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_copy_keypair_addr( + fors_leaf_addr, fors_tree_addr); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_type( + fors_leaf_addr, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_tree_index( + fors_leaf_addr, addr_idx); + + fors_gen_sk(leaf, sk_seed, fors_leaf_addr, hash_state_seeded); + fors_sk_to_leaf(leaf, leaf, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_copy_keypair_addr( + fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_copy_keypair_addr( + fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_type( + fors_tree_addr, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_type( + fors_pk_addr, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_tree_height( + fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_tree_index( + fors_tree_addr, indices[i] + idx_offset); + + /* Include the secret key part that produces the selected leaf node. */ + fors_gen_sk(sig, sk_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N; + + /* Compute the authentication path for this leaf node. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_treehash_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, sig, sk_seed, pub_seed, + indices[i], idx_offset, fors_gen_leaf, fors_tree_addr, + hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_thash_FORS_TREES( + pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_compute_root(roots + i * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, leaf, indices[i], idx_offset, sig, + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_HEIGHT, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/fors.h b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/fors.h new file mode 100644 index 00000000..a9ebebf9 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_H +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/hash.h b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/hash.h new file mode 100644 index 00000000..5bd73b34 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_HASH_H +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/hash_sha256.c b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/hash_sha256.c new file mode 100644 index 00000000..b2334d19 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/hash_sha256.c @@ -0,0 +1,162 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "sha2.h" +#include "sha256.h" + +/* For SHA256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_seed_state(hash_state_seeded, pub_seed); + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* Clean up hash state */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_destroy_hash_function(hash_state *hash_state_seeded) { + sha256_inc_ctx_release(hash_state_seeded); +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N bytes and an address + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_ADDR_BYTES]; + unsigned char outbuf[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_compress_address(buf + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, addr); + + sha256(outbuf, buf, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_ADDR_BYTES); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed as a key + * for HMAC, and an optional randomization value prefixed to the message. + * This requires m to have at least PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N space + * available in front of the pointer, i.e. before the message to use for the + * prefix. This is necessary to prevent having to move the message around (and + * allocate memory for it). + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES]; + sha256ctx state; + int i; + + /* This implements HMAC-SHA256 */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N; i++) { + buf[i] = 0x36 ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, 0x36, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N); + + sha256_inc_init(&state); + sha256_inc_blocks(&state, buf, 1); + + memcpy(buf, optrand, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N); + + /* If optrand + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N + mlen < PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_BLOCK_BYTES) { + memcpy(buf + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, m, mlen); + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_BLOCK_BYTES, &state, + buf, mlen + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(buf + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, m, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N); + sha256_inc_blocks(&state, buf, 1); + + m += PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N; + mlen -= PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N; + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_BLOCK_BYTES, &state, m, mlen); + } + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N; i++) { + buf[i] = 0x5c ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, 0x5c, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N); + + sha256(buf, buf, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES); + memcpy(R, buf, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_TREE_BITS (PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_TREE_HEIGHT * (PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_D - 1)) +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_TREE_BYTES ((PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_LEAF_BITS PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_LEAF_BYTES ((PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_DGST_BYTES (PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_TREE_BYTES + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_LEAF_BYTES) + + unsigned char seed[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES + 4]; + + /* Round to nearest multiple of PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_BLOCK_BYTES */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_INBLOCKS (((PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_PK_BYTES + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - 1) & \ + -PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_BLOCK_BYTES) / PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_BLOCK_BYTES) + unsigned char inbuf[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_BLOCK_BYTES]; + + unsigned char buf[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_DGST_BYTES]; + unsigned char *bufp = buf; + sha256ctx state; + + sha256_inc_init(&state); + + memcpy(inbuf, R, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N); + memcpy(inbuf + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, pk, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_PK_BYTES); + + /* If R + pk + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_PK_BYTES + mlen < PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_BLOCK_BYTES) { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_PK_BYTES, m, mlen); + sha256_inc_finalize(seed, &state, inbuf, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_PK_BYTES + mlen); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_PK_BYTES, m, + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N - PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_PK_BYTES); + sha256_inc_blocks(&state, inbuf, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_INBLOCKS); + + m += PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N - PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_PK_BYTES; + mlen -= PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N - PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_PK_BYTES; + sha256_inc_finalize(seed, &state, m, mlen); + } + + /* By doing this in two steps, we prevent hashing the message twice; + otherwise each iteration in MGF1 would hash the message again. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_mgf1(bufp, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_DGST_BYTES, seed, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_bytes_to_ull(bufp, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/hash_state.h b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/hash_state.h new file mode 100644 index 00000000..19fc335e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/hash_state.h @@ -0,0 +1,26 @@ +#ifndef SPX_HASH_STATE_H +#define SPX_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + */ + +#include "sha2.h" +#define hash_state sha256ctx + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/params.h b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/params.h new file mode 100644 index 00000000..d3063ccc --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_PARAMS_H +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N 16 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FULL_HEIGHT 64 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_D 8 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_HEIGHT 15 +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_TREES 10 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N / PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_LEN (PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_LEN1 + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_BYTES (PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_PK_BYTES PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_TREE_HEIGHT (PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FULL_HEIGHT / PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_BYTES ((PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_PK_BYTES PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_BYTES (PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_BYTES + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_D * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FULL_HEIGHT * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_PK_BYTES (2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SK_BYTES (2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/sha256.c b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/sha256.c new file mode 100644 index 00000000..f801acac --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/sha256.c @@ -0,0 +1,71 @@ +/* Based on the public domain implementation in + * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html + * by D. J. Bernstein */ + +#include +#include +#include + +#include "sha2.h" +#include "sha256.h" +#include "utils.h" + +/* + * Compresses an address to a 22-byte sequence. + * This reduces the number of required SHA256 compression calls, as the last + * block of input is padded with at least 65 bits. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_compress_address(unsigned char *out, const uint32_t addr[8]) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ull_to_bytes(out, 1, addr[0]); /* drop 3 bytes of the layer field */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ull_to_bytes(out + 1, 4, addr[2]); /* drop the highest tree address word */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ull_to_bytes(out + 5, 4, addr[3]); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ull_to_bytes(out + 9, 1, addr[4]); /* drop 3 bytes of the type field */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ull_to_bytes(out + 10, 4, addr[5]); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ull_to_bytes(out + 14, 4, addr[6]); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ull_to_bytes(out + 18, 4, addr[7]); +} + +/** + * Requires 'input_plus_four_bytes' to have 'inlen' + 4 bytes, so that the last + * four bytes can be used for the counter. Typically 'input' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen) { + unsigned char outbuf[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES]; + unsigned long i; + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES <= outlen; i++) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(out, input_plus_four_bytes, inlen + 4); + out += PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + if (outlen > i * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(outbuf, input_plus_four_bytes, inlen + 4); + memcpy(out, outbuf, outlen - i * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES); + } +} + + +/** + * Absorb the constant pub_seed using one round of the compression function + * This initializes hash_state_seeded, which can then be reused in thash + **/ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N; i < PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + sha256_inc_init(hash_state_seeded); + sha256_inc_blocks(hash_state_seeded, block, 1); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/sha256.h b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/sha256.h new file mode 100644 index 00000000..fa9bc9ba --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/sha256.h @@ -0,0 +1,21 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_H +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_H + +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N */ +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_ADDR_BYTES 22 + +#include +#include + +#include "sha2.h" + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_compress_address(unsigned char *out, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/sign.c b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/sign.c new file mode 100644 index 00000000..276ef8f7 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_D - 1); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, sk + 3 * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N; + + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_BYTES; + + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N; + unsigned char mhash[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_BYTES; + + if (PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_BYTES, sm + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/thash.h b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/thash.h new file mode 100644 index 00000000..d17f8591 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_THASH_H +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/thash_sha256_simple.c b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/thash_sha256_simple.c new file mode 100644 index 00000000..4695df98 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/thash_sha256_simple.c @@ -0,0 +1,75 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "sha2.h" +#include "sha256.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N bytes. + */ +static void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char outbuf[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES]; + sha256ctx sha2_state; + + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + + /* Retrieve precomputed state containing pub_seed */ + sha256_inc_ctx_clone(&sha2_state, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_compress_address(buf, addr); + memcpy(buf + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_ADDR_BYTES, in, inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N); + + sha256_inc_finalize(outbuf, &sha2_state, buf, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_thash( + out, buf, in, 1, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_thash( + out, buf, in, 2, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_ADDR_BYTES + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_LEN, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_SHA256_ADDR_BYTES + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_TREES, pub_seed, addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/utils.c b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/utils.c new file mode 100644 index 00000000..ef0ab916 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, leaf, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N); + memcpy(buffer + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_thash_2( + buffer + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N); + } else { + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/utils.h b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/utils.h new file mode 100644 index 00000000..2ae62c52 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_UTILS_H +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/wots.c b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/wots.c new file mode 100644 index 00000000..0f74f47a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/wots.c @@ -0,0 +1,167 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], + const hash_state *hash_state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_prf_addr(sk, sk_seed, wots_addr, hash_state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_W; i++) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_thash_1( + out, out, pub_seed, addr, hash_state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const size_t out_len, + const unsigned char *input) { + size_t in = 0; + size_t out = 0; + unsigned char total = 0; + unsigned int bits = 0; + size_t consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_LOGW; + output[out] = (unsigned int)((total >> bits) & (PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_W - 1)); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, + const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_ull_to_bytes( + csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(pk + i * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, pk + i * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, + 0, PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_W - 1, pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, 0, lengths[i], pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_N, + lengths[i], PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_W - 1 - lengths[i], pub_seed, addr, + hash_state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/wots.h b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/wots.h new file mode 100644 index 00000000..56be39f1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-128s-simple/clean/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_H +#define PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256128SSIMPLE_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/META.yml b/crypto_sign/sphincs/sphincs-sha256-192f-robust/META.yml new file mode 100644 index 00000000..7144cdc7 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/META.yml @@ -0,0 +1,35 @@ +name: SPHINCS+ +type: signature +claimed-nist-level: 3 +length-public-key: 48 +length-secret-key: 96 +length-signature: 35664 +testvectors-sha256: ca61e66c0377fd367ab0c920d2190855a64348668a336d300ec7f2c72e721be4 +nistkat-sha256: 9d0898cb264172c31d0fb4901dd56d46728e83e0bf008abccb8b0912c2ebbc52 +principal-submitters: + - Andreas Hülsing +auxiliary-submitters: + - Jean-Philippe Aumasson + - Daniel J. Bernstein, + - Christoph Dobraunig + - Maria Eichlseder + - Scott Fluhrer + - Stefan-Lukas Gazdag + - Panos Kampanakis + - Stefan Kölbl + - Tanja Lange + - Martin M. Lauridsen + - Florian Mendel + - Ruben Niederhagen + - Christian Rechberger + - Joost Rijneveld + - Peter Schwabe +implementations: + - name: clean + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + - name: avx2 + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + supported_platforms: + - architecture: x86_64 + required_flags: + - avx2 diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/LICENSE b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/Makefile.Microsoft_nmake new file mode 100644 index 00000000..21154fd4 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-sha256-192f-robust_avx2.lib +OBJECTS=address.obj wots.obj utils.obj utilsx8.obj sha256avx.obj sha256x8.obj fors.obj sign.obj hash_sha256.obj thash_sha256_robust.obj hash_sha256x8.obj thash_sha256_robustx8.obj sha256.obj + +CFLAGS=/nologo /arch:AVX2 /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/address.c b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/address.c new file mode 100644 index 00000000..e67fe546 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/address.h b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/address.h new file mode 100644 index 00000000..cca15aca --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDRESS_H +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/api.h b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/api.h new file mode 100644 index 00000000..4a6ba6f0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_API_H +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_CRYPTO_SECRETKEYBYTES 96 +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES 48 +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_CRYPTO_BYTES 35664 +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_CRYPTO_SEEDBYTES 72 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/fors.c b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/fors.c new file mode 100644 index 00000000..c828925b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/fors.c @@ -0,0 +1,240 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hashx8.h" +#include "thash.h" +#include "thashx8.h" +#include "utils.h" +#include "utilsx8.h" + +static void fors_gen_skx8(unsigned char *sk0, + unsigned char *sk1, + unsigned char *sk2, + unsigned char *sk3, + unsigned char *sk4, + unsigned char *sk5, + unsigned char *sk6, + unsigned char *sk7, const unsigned char *sk_seed, + uint32_t fors_leaf_addrx8[8 * 8]) { + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_prf_addrx8(sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7, + sk_seed, fors_leaf_addrx8); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_thash_1(leaf, sk, pub_seed, fors_leaf_addr, state_seeded); +} + +static void fors_sk_to_leafx8(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + unsigned char *leaf4, + unsigned char *leaf5, + unsigned char *leaf6, + unsigned char *leaf7, + const unsigned char *sk0, + const unsigned char *sk1, + const unsigned char *sk2, + const unsigned char *sk3, + const unsigned char *sk4, + const unsigned char *sk5, + const unsigned char *sk6, + const unsigned char *sk7, + const unsigned char *pub_seed, + uint32_t fors_leaf_addrx8[8 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_thashx8_1(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7, + pub_seed, fors_leaf_addrx8, state_seeded); +} + +static void fors_gen_leafx8(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + unsigned char *leaf4, + unsigned char *leaf5, + unsigned char *leaf6, + unsigned char *leaf7, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx0, + uint32_t addr_idx1, + uint32_t addr_idx2, + uint32_t addr_idx3, + uint32_t addr_idx4, + uint32_t addr_idx5, + uint32_t addr_idx6, + uint32_t addr_idx7, + const uint32_t fors_tree_addr[8], + const hash_state *state_seeded) { + uint32_t fors_leaf_addrx8[8 * 8] = {0}; + unsigned int j; + + /* Only copy the parts that must be kept in fors_leaf_addrx8. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_copy_keypair_addr(fors_leaf_addrx8 + j * 8, fors_tree_addr); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_type(fors_leaf_addrx8 + j * 8, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 0 * 8, addr_idx0); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 1 * 8, addr_idx1); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 2 * 8, addr_idx2); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 3 * 8, addr_idx3); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 4 * 8, addr_idx4); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 5 * 8, addr_idx5); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 6 * 8, addr_idx6); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 7 * 8, addr_idx7); + + fors_gen_skx8(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + sk_seed, fors_leaf_addrx8); + fors_sk_to_leafx8(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + pub_seed, fors_leaf_addrx8, state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + /* Round up to multiple of 8 to prevent out-of-bounds for x8 parallelism */ + uint32_t indices[(PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_TREES + 7) & ~7] = {0}; + unsigned char roots[((PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_TREES + 7) & ~7) * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N]; + /* Sign to a buffer, since we may not have a nice multiple of 8 and would + otherwise overrun the signature. */ + unsigned char sigbufx8[8 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N * (1 + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_HEIGHT)]; + uint32_t fors_tree_addrx8[8 * 8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset[8] = {0}; + unsigned int i, j; + + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_copy_keypair_addr(fors_tree_addrx8 + j * 8, fors_addr); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_type(fors_tree_addrx8 + j * 8, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < ((PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_TREES + 7) & ~0x7); i += 8) { + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_TREES) { + idx_offset[j] = (i + j) * (1 << PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_height(fors_tree_addrx8 + j * 8, 0); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_index(fors_tree_addrx8 + j * 8, + indices[i + j] + idx_offset[j]); + } + } + + /* Include the secret key part that produces the selected leaf nodes. */ + fors_gen_skx8(sigbufx8 + 0 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + sigbufx8 + 1 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + sigbufx8 + 2 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + sigbufx8 + 3 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + sigbufx8 + 4 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + sigbufx8 + 5 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + sigbufx8 + 6 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + sigbufx8 + 7 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + sk_seed, fors_tree_addrx8); + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_treehashx8_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, sigbufx8 + 8 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, sk_seed, pub_seed, + &indices[i], idx_offset, fors_gen_leafx8, fors_tree_addrx8, + state_seeded); + + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_TREES) { + memcpy(sig, sigbufx8 + j * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + memcpy(sig + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + sigbufx8 + 8 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + j * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_HEIGHT, + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_HEIGHT); + sig += PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N * (1 + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_HEIGHT); + } + } + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_fors_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_compute_root(roots + i * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, leaf, indices[i], idx_offset, + sig, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_HEIGHT, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/fors.h b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/fors.h new file mode 100644 index 00000000..be1df581 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_H +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/hash.h b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/hash.h new file mode 100644 index 00000000..bf26d724 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_HASH_H +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/hash_sha256.c b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/hash_sha256.c new file mode 100644 index 00000000..fad52a5f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/hash_sha256.c @@ -0,0 +1,166 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "sha2.h" +#include "sha256.h" +#include "sha256x8.h" + +/** + * Initializes the hash function states + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed) { + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_seed_state(&hash_state_seeded->x1, pub_seed); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_seed_statex8(&hash_state_seeded->x8, pub_seed); + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/** + * Cleans up the hash function states + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_destroy_hash_function(hash_state *hash_state_seeded) { + sha256_inc_ctx_release(&hash_state_seeded->x1); +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N bytes and an address + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_prf_addr(unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES]; + unsigned char outbuf[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_compress_address(buf + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, addr); + + sha256(outbuf, buf, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed as a key + * for HMAC, and an optional randomization value prefixed to the message. + * This requires m to have at least PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N space + * available in front of the pointer, i.e. before the message to use for the + * prefix. This is necessary to prevent having to move the message around (and + * allocate memory for it). + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES]; + sha256ctx state; + int i; + + /* This implements HMAC-SHA256 */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N; i++) { + buf[i] = 0x36 ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, 0x36, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + + sha256_inc_init(&state); + sha256_inc_blocks(&state, buf, 1); + + memcpy(buf, optrand, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + + /* If optrand + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + mlen < PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES) { + memcpy(buf + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, m, mlen); + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES, &state, + buf, mlen + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(buf + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, m, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + sha256_inc_blocks(&state, buf, 1); + + m += PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N; + mlen -= PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N; + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES, &state, m, mlen); + } + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N; i++) { + buf[i] = 0x5c ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, 0x5c, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + + sha256(buf, buf, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES); + memcpy(R, buf, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_TREE_BITS (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_TREE_HEIGHT * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_D - 1)) +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_TREE_BYTES ((PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_LEAF_BITS PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_LEAF_BYTES ((PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_DGST_BYTES (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_TREE_BYTES + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_LEAF_BYTES) + + unsigned char seed[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES + 4]; + + /* Round to nearest multiple of PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_INBLOCKS (((PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_PK_BYTES + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES - 1) & \ + -PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES) / PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES) + unsigned char inbuf[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES]; + + unsigned char buf[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_DGST_BYTES]; + unsigned char *bufp = buf; + sha256ctx state; + + sha256_inc_init(&state); + + memcpy(inbuf, R, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + memcpy(inbuf + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, pk, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_PK_BYTES); + + /* If R + pk + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_PK_BYTES + mlen < PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES) { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_PK_BYTES, m, mlen); + sha256_inc_finalize(seed, &state, inbuf, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_PK_BYTES + mlen); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_PK_BYTES, m, + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N - PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_PK_BYTES); + sha256_inc_blocks(&state, inbuf, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_INBLOCKS); + + m += PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N - PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_PK_BYTES; + mlen -= PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N - PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_PK_BYTES; + sha256_inc_finalize(seed, &state, m, mlen); + } + + /* By doing this in two steps, we prevent hashing the message twice; + otherwise each iteration in MGF1 would hash the message again. */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_mgf1(bufp, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_DGST_BYTES, seed, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_bytes_to_ull(bufp, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/hash_sha256x8.c b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/hash_sha256x8.c new file mode 100644 index 00000000..57992dc3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/hash_sha256x8.c @@ -0,0 +1,61 @@ +#include +#include + +#include "address.h" +#include "hashx8.h" +#include "params.h" +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "utils.h" + +/* + * 8-way parallel version of prf_addr; takes 8x as much input and output + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_prf_addrx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *key, + const uint32_t addrx8[8 * 8]) { + unsigned char bufx8[8 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES)]; + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned int j; + + for (j = 0; j < 8; j++) { + memcpy(bufx8 + j * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES), key, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_compress_address(bufx8 + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + j * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES), + addrx8 + j * 8); + } + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256x8(outbufx8 + 0 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, + bufx8 + 0 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 1 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 2 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 3 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 4 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 5 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 6 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 7 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES), + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES); + + memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/hash_state.h b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/hash_state.h new file mode 100644 index 00000000..5d65df8b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/hash_state.h @@ -0,0 +1,33 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_HASH_STATE_H +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * We use a struct to differentiate between the x1 and x8 variants of SHA256. + */ + +#include "sha2.h" +#include "sha256avx.h" + +typedef struct { + sha256ctx x1; + sha256ctxx8 x8; +} hash_state; + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/hashx8.h b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/hashx8.h new file mode 100644 index 00000000..064211f4 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/hashx8.h @@ -0,0 +1,19 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_HASHX8_H +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_HASHX8_H + +#include + +#include "params.h" + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_prf_addrx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *key, + const uint32_t addrx8[8 * 8]); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/params.h b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/params.h new file mode 100644 index 00000000..650a703a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_PARAMS_H +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N 24 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FULL_HEIGHT 66 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_D 22 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_HEIGHT 8 +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_TREES 33 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N / PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LEN (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LEN1 + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_BYTES (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_PK_BYTES PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_TREE_HEIGHT (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FULL_HEIGHT / PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_BYTES ((PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_PK_BYTES PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_BYTES (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_BYTES + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_D * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FULL_HEIGHT * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_PK_BYTES (2 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SK_BYTES (2 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/sha256.c b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/sha256.c new file mode 100644 index 00000000..92ee47ef --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/sha256.c @@ -0,0 +1,71 @@ +/* Based on the public domain implementation in + * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html + * by D. J. Bernstein */ + +#include +#include +#include + +#include "sha2.h" +#include "sha256.h" +#include "utils.h" + +/* + * Compresses an address to a 22-byte sequence. + * This reduces the number of required SHA256 compression calls, as the last + * block of input is padded with at least 65 bits. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_compress_address(unsigned char *out, const uint32_t addr[8]) { + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ull_to_bytes(out, 1, addr[0]); /* drop 3 bytes of the layer field */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ull_to_bytes(out + 1, 4, addr[2]); /* drop the highest tree address word */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ull_to_bytes(out + 5, 4, addr[3]); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ull_to_bytes(out + 9, 1, addr[4]); /* drop 3 bytes of the type field */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ull_to_bytes(out + 10, 4, addr[5]); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ull_to_bytes(out + 14, 4, addr[6]); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ull_to_bytes(out + 18, 4, addr[7]); +} + +/** + * Requires 'input_plus_four_bytes' to have 'inlen' + 4 bytes, so that the last + * four bytes can be used for the counter. Typically 'input' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen) { + unsigned char outbuf[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned long i; + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES <= outlen; i++) { + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(out, input_plus_four_bytes, inlen + 4); + out += PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + if (outlen > i * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES) { + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(outbuf, input_plus_four_bytes, inlen + 4); + memcpy(out, outbuf, outlen - i * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES); + } +} + + +/** + * Absorb the constant pub_seed using one round of the compression function + * This initializes hash_state_seeded, which can then be reused in thash + **/ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N; i < PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + sha256_inc_init(hash_state_seeded); + sha256_inc_blocks(hash_state_seeded, block, 1); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/sha256.h b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/sha256.h new file mode 100644 index 00000000..e3ff91de --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/sha256.h @@ -0,0 +1,21 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_H +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_H + +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES 22 + +#include +#include + +#include "sha2.h" + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_compress_address(unsigned char *out, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/sha256avx.c b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/sha256avx.c new file mode 100644 index 00000000..ab2e7329 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/sha256avx.c @@ -0,0 +1,296 @@ +#include +#include +#include + +#include "sha256avx.h" + +// Transpose 8 vectors containing 32-bit values +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_transpose(u256 s[8]) { + u256 tmp0[8]; + u256 tmp1[8]; + tmp0[0] = _mm256_unpacklo_epi32(s[0], s[1]); + tmp0[1] = _mm256_unpackhi_epi32(s[0], s[1]); + tmp0[2] = _mm256_unpacklo_epi32(s[2], s[3]); + tmp0[3] = _mm256_unpackhi_epi32(s[2], s[3]); + tmp0[4] = _mm256_unpacklo_epi32(s[4], s[5]); + tmp0[5] = _mm256_unpackhi_epi32(s[4], s[5]); + tmp0[6] = _mm256_unpacklo_epi32(s[6], s[7]); + tmp0[7] = _mm256_unpackhi_epi32(s[6], s[7]); + tmp1[0] = _mm256_unpacklo_epi64(tmp0[0], tmp0[2]); + tmp1[1] = _mm256_unpackhi_epi64(tmp0[0], tmp0[2]); + tmp1[2] = _mm256_unpacklo_epi64(tmp0[1], tmp0[3]); + tmp1[3] = _mm256_unpackhi_epi64(tmp0[1], tmp0[3]); + tmp1[4] = _mm256_unpacklo_epi64(tmp0[4], tmp0[6]); + tmp1[5] = _mm256_unpackhi_epi64(tmp0[4], tmp0[6]); + tmp1[6] = _mm256_unpacklo_epi64(tmp0[5], tmp0[7]); + tmp1[7] = _mm256_unpackhi_epi64(tmp0[5], tmp0[7]); + s[0] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x20); + s[1] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x20); + s[2] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x20); + s[3] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x20); + s[4] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x31); + s[5] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x31); + s[6] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x31); + s[7] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x31); +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256_clone_statex8(sha256ctxx8 *outctx, const sha256ctxx8 *inctx) { + memcpy(outctx, inctx, sizeof(sha256ctxx8)); +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256_init8x(sha256ctxx8 *ctx) { + ctx->s[0] = _mm256_set_epi32((int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667); + ctx->s[1] = _mm256_set_epi32((int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85); + ctx->s[2] = _mm256_set_epi32((int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372); + ctx->s[3] = _mm256_set_epi32((int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a); + ctx->s[4] = _mm256_set_epi32((int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f); + ctx->s[5] = _mm256_set_epi32((int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c); + ctx->s[6] = _mm256_set_epi32((int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab); + ctx->s[7] = _mm256_set_epi32((int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19); + + ctx->datalen = 0; + ctx->msglen = 0; +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256_update8x(sha256ctxx8 *ctx, + const unsigned char *d0, + const unsigned char *d1, + const unsigned char *d2, + const unsigned char *d3, + const unsigned char *d4, + const unsigned char *d5, + const unsigned char *d6, + const unsigned char *d7, + unsigned long long len) { + size_t i = 0; + size_t bytes_to_copy; + + while (i < len) { + bytes_to_copy = (size_t)len - i; + if (bytes_to_copy > 64) { + bytes_to_copy = 64; + } + memcpy(&ctx->msgblocks[64 * 0], d0 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 1], d1 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 2], d2 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 3], d3 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 4], d4 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 5], d5 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 6], d6 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 7], d7 + i, bytes_to_copy); + ctx->datalen += (unsigned int)bytes_to_copy; + i += bytes_to_copy; + if (ctx->datalen == 64) { + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + ctx->msglen += 512; + ctx->datalen = 0; + } + } +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256_final8x(sha256ctxx8 *ctx, + unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7) { + unsigned int i, curlen; + + // Padding + if (ctx->datalen < 56) { + for (i = 0; i < 8; ++i) { + curlen = ctx->datalen; + ctx->msgblocks[64 * i + curlen++] = 0x80; + while (curlen < 64) { + ctx->msgblocks[64 * i + curlen++] = 0x00; + } + } + } else { + for (i = 0; i < 8; ++i) { + curlen = ctx->datalen; + ctx->msgblocks[64 * i + curlen++] = 0x80; + while (curlen < 64) { + ctx->msgblocks[64 * i + curlen++] = 0x00; + } + } + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + memset(ctx->msgblocks, 0, 8 * 64); + } + + // Add length of the message to each block + ctx->msglen += ctx->datalen * 8; + for (i = 0; i < 8; i++) { + ctx->msgblocks[64 * i + 63] = (unsigned char)ctx->msglen; + ctx->msgblocks[64 * i + 62] = (unsigned char)(ctx->msglen >> 8); + ctx->msgblocks[64 * i + 61] = (unsigned char)(ctx->msglen >> 16); + ctx->msgblocks[64 * i + 60] = (unsigned char)(ctx->msglen >> 24); + ctx->msgblocks[64 * i + 59] = (unsigned char)(ctx->msglen >> 32); + ctx->msgblocks[64 * i + 58] = (unsigned char)(ctx->msglen >> 40); + ctx->msgblocks[64 * i + 57] = (unsigned char)(ctx->msglen >> 48); + ctx->msgblocks[64 * i + 56] = (unsigned char)(ctx->msglen >> 56); + } + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + + // Compute final hash output + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_transpose(ctx->s); + + // Store Hash value + STORE(out0, BYTESWAP(ctx->s[0])); + STORE(out1, BYTESWAP(ctx->s[1])); + STORE(out2, BYTESWAP(ctx->s[2])); + STORE(out3, BYTESWAP(ctx->s[3])); + STORE(out4, BYTESWAP(ctx->s[4])); + STORE(out5, BYTESWAP(ctx->s[5])); + STORE(out6, BYTESWAP(ctx->s[6])); + STORE(out7, BYTESWAP(ctx->s[7])); +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256_transform8x(sha256ctxx8 *ctx, const unsigned char *data) { + u256 s[8], w[64], T0, T1; + int i; + + // Load words and transform data correctly + for (i = 0; i < 8; i++) { + w[i] = BYTESWAP(LOAD(data + 64 * i)); + w[i + 8] = BYTESWAP(LOAD(data + 32 + 64 * i)); + } + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_transpose(w); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_transpose(w + 8); + + // Initial State + s[0] = ctx->s[0]; + s[1] = ctx->s[1]; + s[2] = ctx->s[2]; + s[3] = ctx->s[3]; + s[4] = ctx->s[4]; + s[5] = ctx->s[5]; + s[6] = ctx->s[6]; + s[7] = ctx->s[7]; + + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 0, w[0]); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 1, w[1]); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 2, w[2]); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 3, w[3]); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 4, w[4]); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 5, w[5]); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 6, w[6]); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 7, w[7]); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 8, w[8]); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 9, w[9]); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 10, w[10]); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 11, w[11]); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 12, w[12]); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 13, w[13]); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 14, w[14]); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 15, w[15]); + w[16] = ADD4_32(WSIGMA1_AVX(w[14]), w[0], w[9], WSIGMA0_AVX(w[1])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 16, w[16]); + w[17] = ADD4_32(WSIGMA1_AVX(w[15]), w[1], w[10], WSIGMA0_AVX(w[2])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 17, w[17]); + w[18] = ADD4_32(WSIGMA1_AVX(w[16]), w[2], w[11], WSIGMA0_AVX(w[3])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 18, w[18]); + w[19] = ADD4_32(WSIGMA1_AVX(w[17]), w[3], w[12], WSIGMA0_AVX(w[4])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 19, w[19]); + w[20] = ADD4_32(WSIGMA1_AVX(w[18]), w[4], w[13], WSIGMA0_AVX(w[5])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 20, w[20]); + w[21] = ADD4_32(WSIGMA1_AVX(w[19]), w[5], w[14], WSIGMA0_AVX(w[6])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 21, w[21]); + w[22] = ADD4_32(WSIGMA1_AVX(w[20]), w[6], w[15], WSIGMA0_AVX(w[7])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 22, w[22]); + w[23] = ADD4_32(WSIGMA1_AVX(w[21]), w[7], w[16], WSIGMA0_AVX(w[8])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 23, w[23]); + w[24] = ADD4_32(WSIGMA1_AVX(w[22]), w[8], w[17], WSIGMA0_AVX(w[9])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 24, w[24]); + w[25] = ADD4_32(WSIGMA1_AVX(w[23]), w[9], w[18], WSIGMA0_AVX(w[10])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 25, w[25]); + w[26] = ADD4_32(WSIGMA1_AVX(w[24]), w[10], w[19], WSIGMA0_AVX(w[11])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 26, w[26]); + w[27] = ADD4_32(WSIGMA1_AVX(w[25]), w[11], w[20], WSIGMA0_AVX(w[12])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 27, w[27]); + w[28] = ADD4_32(WSIGMA1_AVX(w[26]), w[12], w[21], WSIGMA0_AVX(w[13])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 28, w[28]); + w[29] = ADD4_32(WSIGMA1_AVX(w[27]), w[13], w[22], WSIGMA0_AVX(w[14])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 29, w[29]); + w[30] = ADD4_32(WSIGMA1_AVX(w[28]), w[14], w[23], WSIGMA0_AVX(w[15])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 30, w[30]); + w[31] = ADD4_32(WSIGMA1_AVX(w[29]), w[15], w[24], WSIGMA0_AVX(w[16])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 31, w[31]); + w[32] = ADD4_32(WSIGMA1_AVX(w[30]), w[16], w[25], WSIGMA0_AVX(w[17])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 32, w[32]); + w[33] = ADD4_32(WSIGMA1_AVX(w[31]), w[17], w[26], WSIGMA0_AVX(w[18])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 33, w[33]); + w[34] = ADD4_32(WSIGMA1_AVX(w[32]), w[18], w[27], WSIGMA0_AVX(w[19])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 34, w[34]); + w[35] = ADD4_32(WSIGMA1_AVX(w[33]), w[19], w[28], WSIGMA0_AVX(w[20])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 35, w[35]); + w[36] = ADD4_32(WSIGMA1_AVX(w[34]), w[20], w[29], WSIGMA0_AVX(w[21])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 36, w[36]); + w[37] = ADD4_32(WSIGMA1_AVX(w[35]), w[21], w[30], WSIGMA0_AVX(w[22])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 37, w[37]); + w[38] = ADD4_32(WSIGMA1_AVX(w[36]), w[22], w[31], WSIGMA0_AVX(w[23])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 38, w[38]); + w[39] = ADD4_32(WSIGMA1_AVX(w[37]), w[23], w[32], WSIGMA0_AVX(w[24])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 39, w[39]); + w[40] = ADD4_32(WSIGMA1_AVX(w[38]), w[24], w[33], WSIGMA0_AVX(w[25])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 40, w[40]); + w[41] = ADD4_32(WSIGMA1_AVX(w[39]), w[25], w[34], WSIGMA0_AVX(w[26])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 41, w[41]); + w[42] = ADD4_32(WSIGMA1_AVX(w[40]), w[26], w[35], WSIGMA0_AVX(w[27])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 42, w[42]); + w[43] = ADD4_32(WSIGMA1_AVX(w[41]), w[27], w[36], WSIGMA0_AVX(w[28])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 43, w[43]); + w[44] = ADD4_32(WSIGMA1_AVX(w[42]), w[28], w[37], WSIGMA0_AVX(w[29])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 44, w[44]); + w[45] = ADD4_32(WSIGMA1_AVX(w[43]), w[29], w[38], WSIGMA0_AVX(w[30])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 45, w[45]); + w[46] = ADD4_32(WSIGMA1_AVX(w[44]), w[30], w[39], WSIGMA0_AVX(w[31])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 46, w[46]); + w[47] = ADD4_32(WSIGMA1_AVX(w[45]), w[31], w[40], WSIGMA0_AVX(w[32])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 47, w[47]); + w[48] = ADD4_32(WSIGMA1_AVX(w[46]), w[32], w[41], WSIGMA0_AVX(w[33])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 48, w[48]); + w[49] = ADD4_32(WSIGMA1_AVX(w[47]), w[33], w[42], WSIGMA0_AVX(w[34])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 49, w[49]); + w[50] = ADD4_32(WSIGMA1_AVX(w[48]), w[34], w[43], WSIGMA0_AVX(w[35])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 50, w[50]); + w[51] = ADD4_32(WSIGMA1_AVX(w[49]), w[35], w[44], WSIGMA0_AVX(w[36])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 51, w[51]); + w[52] = ADD4_32(WSIGMA1_AVX(w[50]), w[36], w[45], WSIGMA0_AVX(w[37])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 52, w[52]); + w[53] = ADD4_32(WSIGMA1_AVX(w[51]), w[37], w[46], WSIGMA0_AVX(w[38])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 53, w[53]); + w[54] = ADD4_32(WSIGMA1_AVX(w[52]), w[38], w[47], WSIGMA0_AVX(w[39])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 54, w[54]); + w[55] = ADD4_32(WSIGMA1_AVX(w[53]), w[39], w[48], WSIGMA0_AVX(w[40])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 55, w[55]); + w[56] = ADD4_32(WSIGMA1_AVX(w[54]), w[40], w[49], WSIGMA0_AVX(w[41])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 56, w[56]); + w[57] = ADD4_32(WSIGMA1_AVX(w[55]), w[41], w[50], WSIGMA0_AVX(w[42])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 57, w[57]); + w[58] = ADD4_32(WSIGMA1_AVX(w[56]), w[42], w[51], WSIGMA0_AVX(w[43])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 58, w[58]); + w[59] = ADD4_32(WSIGMA1_AVX(w[57]), w[43], w[52], WSIGMA0_AVX(w[44])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 59, w[59]); + w[60] = ADD4_32(WSIGMA1_AVX(w[58]), w[44], w[53], WSIGMA0_AVX(w[45])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 60, w[60]); + w[61] = ADD4_32(WSIGMA1_AVX(w[59]), w[45], w[54], WSIGMA0_AVX(w[46])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 61, w[61]); + w[62] = ADD4_32(WSIGMA1_AVX(w[60]), w[46], w[55], WSIGMA0_AVX(w[47])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 62, w[62]); + w[63] = ADD4_32(WSIGMA1_AVX(w[61]), w[47], w[56], WSIGMA0_AVX(w[48])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 63, w[63]); + + // Feed Forward + ctx->s[0] = ADD32(s[0], ctx->s[0]); + ctx->s[1] = ADD32(s[1], ctx->s[1]); + ctx->s[2] = ADD32(s[2], ctx->s[2]); + ctx->s[3] = ADD32(s[3], ctx->s[3]); + ctx->s[4] = ADD32(s[4], ctx->s[4]); + ctx->s[5] = ADD32(s[5], ctx->s[5]); + ctx->s[6] = ADD32(s[6], ctx->s[6]); + ctx->s[7] = ADD32(s[7], ctx->s[7]); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/sha256avx.h b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/sha256avx.h new file mode 100644 index 00000000..dda7c531 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/sha256avx.h @@ -0,0 +1,103 @@ +#ifndef SHA256AVX_H +#define SHA256AVX_H + +#include +#include + +static const unsigned int RC[] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +#define u32 uint32_t +#define u256 __m256i + +#define XOR _mm256_xor_si256 +#define OR _mm256_or_si256 +#define AND _mm256_and_si256 +#define ADD32 _mm256_add_epi32 +#define NOT(x) _mm256_xor_si256(x, _mm256_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1)) + +#define LOAD(src) _mm256_loadu_si256((__m256i *)(src)) +#define STORE(dest,src) _mm256_storeu_si256((__m256i *)(dest),src) + +#define BYTESWAP(x) _mm256_shuffle_epi8(x, _mm256_set_epi8(0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3,0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3)) + +#define SHIFTR32(x, y) _mm256_srli_epi32(x, y) +#define SHIFTL32(x, y) _mm256_slli_epi32(x, y) + +#define ROTR32(x, y) OR(SHIFTR32(x, y), SHIFTL32(x, 32 - (y))) +#define ROTL32(x, y) OR(SHIFTL32(x, y), SHIFTR32(x, 32 - (y))) + +#define XOR3(a, b, c) XOR(XOR(a, b), c) + +#define ADD3_32(a, b, c) ADD32(ADD32(a, b), c) +#define ADD4_32(a, b, c, d) ADD32(ADD32(ADD32(a, b), c), d) +#define ADD5_32(a, b, c, d, e) ADD32(ADD32(ADD32(ADD32(a, b), c), d), e) + +#define MAJ_AVX(a, b, c) XOR3(AND(a, b), AND(a, c), AND(b, c)) +#define CH_AVX(a, b, c) XOR(AND(a, b), AND(NOT(a), c)) + +#define SIGMA1_AVX(x) XOR3(ROTR32(x, 6), ROTR32(x, 11), ROTR32(x, 25)) +#define SIGMA0_AVX(x) XOR3(ROTR32(x, 2), ROTR32(x, 13), ROTR32(x, 22)) + +#define WSIGMA1_AVX(x) XOR3(ROTR32(x, 17), ROTR32(x, 19), SHIFTR32(x, 10)) +#define WSIGMA0_AVX(x) XOR3(ROTR32(x, 7), ROTR32(x, 18), SHIFTR32(x, 3)) + +#define SHA256ROUND_AVX(a, b, c, d, e, f, g, h, rc, w) \ + T0 = ADD5_32(h, SIGMA1_AVX(e), CH_AVX(e, f, g), _mm256_set1_epi32((int)RC[rc]), w); \ + (d) = ADD32(d, T0); \ + T1 = ADD32(SIGMA0_AVX(a), MAJ_AVX(a, b, c)); \ + (h) = ADD32(T0, T1); + +typedef struct SHA256state { + u256 s[8]; + uint8_t msgblocks[8 * 64]; + unsigned int datalen; + uint64_t msglen; +} sha256ctxx8; + + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_transpose(u256 s[8]); +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256_init_frombytes_x8(sha256ctxx8 *ctx, const uint8_t *s, unsigned long long msglen); +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256_init8x(sha256ctxx8 *ctx); +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256_update8x(sha256ctxx8 *ctx, + const unsigned char *d0, + const unsigned char *d1, + const unsigned char *d2, + const unsigned char *d3, + const unsigned char *d4, + const unsigned char *d5, + const unsigned char *d6, + const unsigned char *d7, + unsigned long long len); +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256_final8x(sha256ctxx8 *ctx, + unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7); + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256_transform8x(sha256ctxx8 *ctx, const unsigned char *data); + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256_clone_statex8(sha256ctxx8 *outctx, const sha256ctxx8 *inctx); + + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/sha256x8.c b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/sha256x8.c new file mode 100644 index 00000000..ce20013a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/sha256x8.c @@ -0,0 +1,128 @@ +#include + +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_seed_statex8(sha256ctxx8 *ctx, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N; i < PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256_init8x(ctx); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256_update8x(ctx, block, block, block, block, block, block, block, block, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES); + +} + +/* This provides a wrapper around the internals of 8x parallel SHA256 */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256x8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned long long inlen) { + sha256ctxx8 ctx; + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256_init8x(&ctx); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256_update8x(&ctx, in0, in1, in2, in3, in4, in5, in6, in7, inlen); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256_final8x(&ctx, out0, out1, out2, out3, out4, out5, out6, out7); +} + +/** + * Note that inlen should be sufficiently small that it still allows for + * an array to be allocated on the stack. Typically 'in' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_mgf1x8( + unsigned char *outx8, + unsigned long outlen, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, + unsigned long inlen) { + unsigned char inbufx8[8 * ((PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES) + 4)]; + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned long i; + unsigned int j; + + memcpy(inbufx8 + 0 * (inlen + 4), in0, inlen); + memcpy(inbufx8 + 1 * (inlen + 4), in1, inlen); + memcpy(inbufx8 + 2 * (inlen + 4), in2, inlen); + memcpy(inbufx8 + 3 * (inlen + 4), in3, inlen); + memcpy(inbufx8 + 4 * (inlen + 4), in4, inlen); + memcpy(inbufx8 + 5 * (inlen + 4), in5, inlen); + memcpy(inbufx8 + 6 * (inlen + 4), in6, inlen); + memcpy(inbufx8 + 7 * (inlen + 4), in7, inlen); + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES <= outlen; i++) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ull_to_bytes(inbufx8 + inlen + j * (inlen + 4), 4, i); + } + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256x8(outx8 + 0 * outlen, + outx8 + 1 * outlen, + outx8 + 2 * outlen, + outx8 + 3 * outlen, + outx8 + 4 * outlen, + outx8 + 5 * outlen, + outx8 + 6 * outlen, + outx8 + 7 * outlen, + inbufx8 + 0 * (inlen + 4), + inbufx8 + 1 * (inlen + 4), + inbufx8 + 2 * (inlen + 4), + inbufx8 + 3 * (inlen + 4), + inbufx8 + 4 * (inlen + 4), + inbufx8 + 5 * (inlen + 4), + inbufx8 + 6 * (inlen + 4), + inbufx8 + 7 * (inlen + 4), inlen + 4); + outx8 += PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ull_to_bytes(inbufx8 + inlen + j * (inlen + 4), 4, i); + } + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256x8(outbufx8 + 0 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, + inbufx8 + 0 * (inlen + 4), + inbufx8 + 1 * (inlen + 4), + inbufx8 + 2 * (inlen + 4), + inbufx8 + 3 * (inlen + 4), + inbufx8 + 4 * (inlen + 4), + inbufx8 + 5 * (inlen + 4), + inbufx8 + 6 * (inlen + 4), + inbufx8 + 7 * (inlen + 4), inlen + 4); + + for (j = 0; j < 8; j++) { + memcpy(outx8 + j * outlen, + outbufx8 + j * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outlen - i * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/sha256x8.h b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/sha256x8.h new file mode 100644 index 00000000..05c191d4 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/sha256x8.h @@ -0,0 +1,44 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256X8_H +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256X8_H + +#include "sha256avx.h" + +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N */ + +/* This provides a wrapper around the internals of 8x parallel SHA256 */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256x8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned long long inlen); + +/** + * Note that inlen should be sufficiently small that it still allows for + * an array to be allocated on the stack. Typically 'in' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_mgf1x8(unsigned char *outx8, unsigned long outlen, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, + unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_seed_statex8(sha256ctxx8 *ctx, const unsigned char *pub_seed); +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/sign.c b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/sign.c new file mode 100644 index 00000000..6cde92c2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_D - 1); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, sk + 3 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N; + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_D; i++) { + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_BYTES; + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N; + unsigned char mhash[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_D; i++) { + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_BYTES; + + if (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_BYTES, sm + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/thash.h b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/thash.h new file mode 100644 index 00000000..52ce115b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_THASH_H +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/thash_sha256_robust.c b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/thash_sha256_robust.c new file mode 100644 index 00000000..157e8ff5 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/thash_sha256_robust.c @@ -0,0 +1,78 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "sha2.h" +#include "sha256.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N bytes. + */ +static void thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char outbuf[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned char *bitmask = buf + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + 4; + sha256ctx sha2_state; + unsigned int i; + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_compress_address(buf + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, addr); + /* MGF1 requires us to have 4 extra bytes in 'buf' */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_mgf1(bitmask, inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, buf, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES); + + /* Retrieve precomputed state containing pub_seed */ + sha256_inc_ctx_clone(&sha2_state, &hash_state_seeded->x1); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N; i++) { + buf[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + i] = in[i] ^ bitmask[i]; + } + + sha256_inc_finalize(outbuf, &sha2_state, buf + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + 4 + 1 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N]; + thash(out, buf, in, 1, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + 4 + 2 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N]; + thash(out, buf, in, 2, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + 4 + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N]; + thash(out, buf, in, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LEN, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + 4 + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N]; + thash(out, buf, in, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_TREES, pub_seed, addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/thash_sha256_robustx8.c b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/thash_sha256_robustx8.c new file mode 100644 index 00000000..032df5d0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/thash_sha256_robustx8.c @@ -0,0 +1,156 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "thashx8.h" +#include "utils.h" + +/** + * 8-way parallel version of thash; takes 8x as much input and output + */ +static void thashx8(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + uint8_t *out4, + uint8_t *out5, + uint8_t *out6, + uint8_t *out7, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + const uint8_t *in4, + const uint8_t *in5, + const uint8_t *in6, + const uint8_t *in7, + unsigned int inblocks, + const uint8_t *pub_seed, + uint32_t addrx8[8 * 8], + uint8_t *bufx8, + uint8_t *bitmaskx8, + const hash_state *state_seeded) { + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned int i; + sha256ctxx8 ctx; + + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + + for (i = 0; i < 8; i++) { + memcpy(bufx8 + i * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N), + pub_seed, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_compress_address(bufx8 + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + + i * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N), + addrx8 + i * 8); + } + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_mgf1x8(bitmaskx8, inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + bufx8 + 0 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N), + bufx8 + 1 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N), + bufx8 + 2 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N), + bufx8 + 3 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N), + bufx8 + 4 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N), + bufx8 + 5 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N), + bufx8 + 6 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N), + bufx8 + 7 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N), + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + ); + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256_clone_statex8(&ctx, &state_seeded->x8); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N; i++) { + bufx8[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + i + + 0 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N)] = + in0[i] ^ bitmaskx8[i + 0 * (inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + i + + 1 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N)] = + in1[i] ^ bitmaskx8[i + 1 * (inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + i + + 2 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N)] = + in2[i] ^ bitmaskx8[i + 2 * (inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + i + + 3 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N)] = + in3[i] ^ bitmaskx8[i + 3 * (inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + i + + 4 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N)] = + in4[i] ^ bitmaskx8[i + 4 * (inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + i + + 5 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N)] = + in5[i] ^ bitmaskx8[i + 5 * (inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + i + + 6 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N)] = + in6[i] ^ bitmaskx8[i + 6 * (inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + i + + 7 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N)] = + in7[i] ^ bitmaskx8[i + 7 * (inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N)]; + } + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256_update8x(&ctx, + bufx8 + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + 0 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + 1 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + 2 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + 3 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + 4 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + 5 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + 6 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + 7 * (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N), + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256_final8x(&ctx, + outbufx8 + 0 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES); + + memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); +} + +#define thash_size_variant(name, size) \ + void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_thashx8_##name(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + unsigned char *out4, \ + unsigned char *out5, \ + unsigned char *out6, \ + unsigned char *out7, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *in4, \ + const unsigned char *in5, \ + const unsigned char *in6, \ + const unsigned char *in7, \ + const unsigned char *pub_seed, \ + uint32_t addrx8[8*8], \ + const hash_state *state_seeded) { \ + const unsigned int inblocks = (size); \ + uint8_t bufx8[8*(PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_SHA256_ADDR_BYTES + (size)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N)]; \ + uint8_t bitmaskx8[8*((size) * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N)]; \ + thashx8(out0, out1, out2, out3, out4, out5, out6, out7, \ + in0, in1, in2, in3, in4, in5, in6, in7, inblocks, \ + pub_seed, addrx8, bufx8, bitmaskx8, state_seeded); \ + } + +thash_size_variant(1, 1) +thash_size_variant(2, 2) +thash_size_variant(WOTS_LEN, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LEN) +thash_size_variant(FORS_TREES, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_TREES) + +#undef thash_size_variant diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/thashx8.h b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/thashx8.h new file mode 100644 index 00000000..3f08c7d3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/thashx8.h @@ -0,0 +1,39 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_THASHX8_H +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_THASHX8_H + +#include + +#include "hash_state.h" +#include "sha256avx.h" + + +#define thashx8_variant(name) \ + void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_thashx8_##name( \ + unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + unsigned char *out4, \ + unsigned char *out5, \ + unsigned char *out6, \ + unsigned char *out7, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *in4, \ + const unsigned char *in5, \ + const unsigned char *in6, \ + const unsigned char *in7, \ + const unsigned char *pub_seed, \ + uint32_t addrx8[8*8], \ + const hash_state *state_seeded) + + +thashx8_variant(1); +thashx8_variant(2); +thashx8_variant(WOTS_LEN); +thashx8_variant(FORS_TREES); + +#undef thashx8_variant +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/utils.c b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/utils.c new file mode 100644 index 00000000..45cc5764 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, leaf, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + memcpy(buffer + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, auth_path, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_thash_2( + buffer + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + } else { + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, auth_path, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/utils.h b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/utils.h new file mode 100644 index 00000000..c2f1b6fb --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_UTILS_H +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/utilsx8.c b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/utilsx8.c new file mode 100644 index 00000000..a46d16de --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/utilsx8.c @@ -0,0 +1,172 @@ +#include + +#include "address.h" +#include "params.h" +#include "thashx8.h" +#include "utils.h" + +#include "utilsx8.h" + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void treehashx8(unsigned char *rootx8, unsigned char *auth_pathx8, + unsigned char *stackx8, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t leaf_idx[8], uint32_t idx_offset[8], + uint32_t tree_height, + void (*gen_leafx8)( + unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + unsigned char * /* leaf4 */, + unsigned char * /* leaf5 */, + unsigned char * /* leaf6 */, + unsigned char * /* leaf7 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + uint32_t /* addr_idx4 */, + uint32_t /* addr_idx5 */, + uint32_t /* addr_idx6 */, + uint32_t /* addr_idx7 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx8[8 * 8], + const hash_state *state_seeded) { + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + unsigned int j; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leafx8(stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + sk_seed, pub_seed, + idx + idx_offset[0], + idx + idx_offset[1], + idx + idx_offset[2], + idx + idx_offset[3], + idx + idx_offset[4], + idx + idx_offset[5], + idx + idx_offset[6], + idx + idx_offset[7], + tree_addrx8, + state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + for (j = 0; j < 8; j++) { + if ((leaf_idx[j] ^ 0x1) == idx) { + memcpy(auth_pathx8 + j * tree_height * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + (offset - 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + } + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_height(tree_addrx8 + j * 8, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_tree_index(tree_addrx8 + j * 8, + tree_idx + (idx_offset[j] >> (heights[offset - 1] + 1))); + } + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_thashx8_2(stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + pub_seed, tree_addrx8, state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + for (j = 0; j < 8; j++) { + if (((leaf_idx[j] >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_pathx8 + j * tree_height * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + heights[offset - 1]*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N + (offset - 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + } + } + } + } + + for (j = 0; j < 8; j++) { + memcpy(rootx8 + j * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + } +} + +/* The wrappers below ensure we used fixed-size buffers on the stack (no VLAs) */ + + +#define treehashx8_variant(name, size) \ + void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_treehashx8_##name( \ + unsigned char *rootx8, unsigned char *auth_pathx8, \ + const unsigned char *sk_seed, const unsigned char *pub_seed, \ + const uint32_t leaf_idx[8], uint32_t idx_offset[8], \ + void (*gen_leafx8)( \ + unsigned char* /* leaf0 */, \ + unsigned char* /* leaf1 */, \ + unsigned char* /* leaf2 */, \ + unsigned char* /* leaf3 */, \ + unsigned char* /* leaf4 */, \ + unsigned char* /* leaf5 */, \ + unsigned char* /* leaf6 */, \ + unsigned char* /* leaf7 */, \ + const unsigned char* /* sk_seed */, \ + const unsigned char* /* pub_seed */, \ + uint32_t /* addr_idx0 */, \ + uint32_t /* addr_idx1 */, \ + uint32_t /* addr_idx2 */, \ + uint32_t /* addr_idx3 */, \ + uint32_t /* addr_idx4 */, \ + uint32_t /* addr_idx5 */, \ + uint32_t /* addr_idx6 */, \ + uint32_t /* addr_idx7 */, \ + const uint32_t[8] /* tree_addr */, \ + const hash_state* /* state_seeded */), \ + uint32_t tree_addrx8[8*8], \ + const hash_state *state_seeded) \ + { \ + const uint32_t tree_height = (size); \ + unsigned char stackx8[8*((size) + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N]; \ + unsigned int heights[(size) + 1]; \ + treehashx8(rootx8, auth_pathx8, stackx8, heights, sk_seed, pub_seed, \ + leaf_idx, idx_offset, tree_height, gen_leafx8, tree_addrx8, state_seeded); \ + } + +treehashx8_variant(FORS_HEIGHT, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_FORS_HEIGHT) + +#undef treehashx8_variant diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/utilsx8.h b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/utilsx8.h new file mode 100644 index 00000000..b039cb5e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/utilsx8.h @@ -0,0 +1,46 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_UTILSX8_H +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_UTILSX8_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ + +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_treehashx8_FORS_HEIGHT( + unsigned char *rootx8, unsigned char *auth_pathx8, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t leaf_idx[8], uint32_t idx_offset[8], + void (*gen_leafx8)( + unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + unsigned char * /* leaf4 */, + unsigned char * /* leaf5 */, + unsigned char * /* leaf6 */, + unsigned char * /* leaf7 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + uint32_t /* addr_idx4 */, + uint32_t /* addr_idx5 */, + uint32_t /* addr_idx6 */, + uint32_t /* addr_idx7 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx8[8 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/wots.c b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/wots.c new file mode 100644 index 00000000..7f416c3b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/wots.c @@ -0,0 +1,240 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "hashx8.h" +#include "params.h" +#include "thash.h" +#include "thashx8.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], const hash_state *state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_prf_addr(sk, sk_seed, wots_addr, state_seeded); +} + +/** + * 8-way parallel version of wots_gen_sk; expects 8x as much space in sk + */ +static void wots_gen_skx8(unsigned char *skx8, const unsigned char *sk_seed, + uint32_t wots_addrx8[8 * 8]) { + unsigned int j; + + /* Make sure that the hash address is actually zeroed. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_hash_addr(wots_addrx8 + j * 8, 0); + } + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_prf_addrx8(skx8 + 0 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + skx8 + 1 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + skx8 + 2 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + skx8 + 3 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + skx8 + 4 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + skx8 + 5 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + skx8 + 6 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + skx8 + 7 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + sk_seed, wots_addrx8); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_W; i++) { + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_thash_1(out, out, pub_seed, addr, state_seeded); + } +} + +/** + * 8-way parallel version of gen_chain; expects 8x as much space in out, and + * 8x as much space in inx8. Assumes start and step identical across chains. + */ +static void gen_chainx8(unsigned char *outx8, const unsigned char *inx8, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addrx8[8 * 8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + /* Initialize outx8 with the value at position 'start'. */ + memcpy(outx8, inx8, 8 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_W; i++) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_hash_addr(addrx8 + j * 8, i); + } + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_thashx8_1(outx8 + 0 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + outx8 + 1 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + outx8 + 2 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + outx8 + 3 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + outx8 + 4 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + outx8 + 5 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + outx8 + 6 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + outx8 + 7 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + outx8 + 0 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + outx8 + 1 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + outx8 + 2 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + outx8 + 3 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + outx8 + 4 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + outx8 + 5 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + outx8 + 6 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + outx8 + 7 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + pub_seed, addrx8, state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { + int in = 0; + int out = 0; + unsigned char total = 0; + int bits = 0; + int consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LOGW; + output[out] = (unsigned int)(total >> bits) & (PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_W - 1); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_wots_gen_pk(unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + uint32_t addrx8[8 * 8]; + unsigned char pkbuf[8 * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N]; + + for (j = 0; j < 8; j++) { + memcpy(addrx8 + j * 8, addr, sizeof(uint32_t) * 8); + } + + /* The last iteration typically does not have complete set of 4 chains, + but because we use pkbuf, this is not an issue -- we still do as many + in parallel as possible. */ + for (i = 0; i < ((PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LEN + 7) & ~0x7); i += 8) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_chain_addr(addrx8 + j * 8, i + j); + } + wots_gen_skx8(pkbuf, sk_seed, addrx8); + gen_chainx8(pkbuf, pkbuf, 0, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_W - 1, pub_seed, addrx8, state_seeded); + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LEN) { + memcpy(pk + (i + j)*PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, pkbuf + j * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N); + } + } + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_wots_sign(unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, sk_seed, addr, state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, sig + i * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, 0, lengths[i], pub_seed, addr, state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_wots_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, sig + i * PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_N, + lengths[i], PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_W - 1 - lengths[i], pub_seed, addr, state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/wots.h b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/wots.h new file mode 100644 index 00000000..dcd92bd2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/avx2/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_H +#define PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/LICENSE b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..fd0c6307 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-sha256-192f-robust_clean.lib +OBJECTS=address.obj wots.obj utils.obj fors.obj sign.obj hash_sha256.obj thash_sha256_robust.obj sha256.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/address.c b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/address.c new file mode 100644 index 00000000..220bf4a3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/address.h b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/address.h new file mode 100644 index 00000000..21fc43e9 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ADDRESS_H +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/api.h b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/api.h new file mode 100644 index 00000000..1d8a0885 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_API_H +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_CRYPTO_SECRETKEYBYTES 96 +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_CRYPTO_PUBLICKEYBYTES 48 +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_CRYPTO_BYTES 35664 +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_CRYPTO_SEEDBYTES 72 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/fors.c b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/fors.c new file mode 100644 index 00000000..a6bea27c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/fors.c @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "thash.h" +#include "utils.h" + +static void fors_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t fors_leaf_addr[8], const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_prf_addr( + sk, sk_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_thash_1( + leaf, sk, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t fors_tree_addr[8], + const hash_state *hash_state_seeded) { + uint32_t fors_leaf_addr[8] = {0}; + + /* Only copy the parts that must be kept in fors_leaf_addr. */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_copy_keypair_addr( + fors_leaf_addr, fors_tree_addr); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_type( + fors_leaf_addr, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_tree_index( + fors_leaf_addr, addr_idx); + + fors_gen_sk(leaf, sk_seed, fors_leaf_addr, hash_state_seeded); + fors_sk_to_leaf(leaf, leaf, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_copy_keypair_addr( + fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_copy_keypair_addr( + fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_type( + fors_tree_addr, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_type( + fors_pk_addr, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_tree_height( + fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_tree_index( + fors_tree_addr, indices[i] + idx_offset); + + /* Include the secret key part that produces the selected leaf node. */ + fors_gen_sk(sig, sk_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N; + + /* Compute the authentication path for this leaf node. */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_treehash_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, sig, sk_seed, pub_seed, + indices[i], idx_offset, fors_gen_leaf, fors_tree_addr, + hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_thash_FORS_TREES( + pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_compute_root(roots + i * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, leaf, indices[i], idx_offset, sig, + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_HEIGHT, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/fors.h b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/fors.h new file mode 100644 index 00000000..b3047774 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_H +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/hash.h b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/hash.h new file mode 100644 index 00000000..282883ea --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_HASH_H +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/hash_sha256.c b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/hash_sha256.c new file mode 100644 index 00000000..490b60b9 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/hash_sha256.c @@ -0,0 +1,162 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "sha2.h" +#include "sha256.h" + +/* For SHA256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed) { + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_seed_state(hash_state_seeded, pub_seed); + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* Clean up hash state */ +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_destroy_hash_function(hash_state *hash_state_seeded) { + sha256_inc_ctx_release(hash_state_seeded); +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N bytes and an address + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_ADDR_BYTES]; + unsigned char outbuf[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_OUTPUT_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_compress_address(buf + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, addr); + + sha256(outbuf, buf, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_ADDR_BYTES); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed as a key + * for HMAC, and an optional randomization value prefixed to the message. + * This requires m to have at least PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N space + * available in front of the pointer, i.e. before the message to use for the + * prefix. This is necessary to prevent having to move the message around (and + * allocate memory for it). + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_OUTPUT_BYTES]; + sha256ctx state; + int i; + + /* This implements HMAC-SHA256 */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N; i++) { + buf[i] = 0x36 ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, 0x36, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N); + + sha256_inc_init(&state); + sha256_inc_blocks(&state, buf, 1); + + memcpy(buf, optrand, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N); + + /* If optrand + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N + mlen < PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_BLOCK_BYTES) { + memcpy(buf + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, m, mlen); + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_BLOCK_BYTES, &state, + buf, mlen + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(buf + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, m, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N); + sha256_inc_blocks(&state, buf, 1); + + m += PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N; + mlen -= PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N; + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_BLOCK_BYTES, &state, m, mlen); + } + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N; i++) { + buf[i] = 0x5c ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, 0x5c, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N); + + sha256(buf, buf, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_OUTPUT_BYTES); + memcpy(R, buf, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_TREE_BITS (PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_TREE_HEIGHT * (PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_D - 1)) +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_TREE_BYTES ((PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_LEAF_BITS PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_LEAF_BYTES ((PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_DGST_BYTES (PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_TREE_BYTES + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_LEAF_BYTES) + + unsigned char seed[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_OUTPUT_BYTES + 4]; + + /* Round to nearest multiple of PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_BLOCK_BYTES */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_INBLOCKS (((PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_PK_BYTES + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_BLOCK_BYTES - 1) & \ + -PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_BLOCK_BYTES) / PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_BLOCK_BYTES) + unsigned char inbuf[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_BLOCK_BYTES]; + + unsigned char buf[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_DGST_BYTES]; + unsigned char *bufp = buf; + sha256ctx state; + + sha256_inc_init(&state); + + memcpy(inbuf, R, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N); + memcpy(inbuf + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, pk, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_PK_BYTES); + + /* If R + pk + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_PK_BYTES + mlen < PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_BLOCK_BYTES) { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_PK_BYTES, m, mlen); + sha256_inc_finalize(seed, &state, inbuf, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_PK_BYTES + mlen); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_PK_BYTES, m, + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N - PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_PK_BYTES); + sha256_inc_blocks(&state, inbuf, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_INBLOCKS); + + m += PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N - PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_PK_BYTES; + mlen -= PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N - PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_PK_BYTES; + sha256_inc_finalize(seed, &state, m, mlen); + } + + /* By doing this in two steps, we prevent hashing the message twice; + otherwise each iteration in MGF1 would hash the message again. */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_mgf1(bufp, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_DGST_BYTES, seed, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_OUTPUT_BYTES); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_bytes_to_ull(bufp, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/hash_state.h b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/hash_state.h new file mode 100644 index 00000000..19fc335e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/hash_state.h @@ -0,0 +1,26 @@ +#ifndef SPX_HASH_STATE_H +#define SPX_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + */ + +#include "sha2.h" +#define hash_state sha256ctx + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/params.h b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/params.h new file mode 100644 index 00000000..394b3550 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_PARAMS_H +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N 24 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FULL_HEIGHT 66 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_D 22 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_HEIGHT 8 +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_TREES 33 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N / PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_LEN (PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_LEN1 + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_BYTES (PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_PK_BYTES PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_TREE_HEIGHT (PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FULL_HEIGHT / PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_BYTES ((PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_PK_BYTES PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_BYTES (PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_BYTES + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_D * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FULL_HEIGHT * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_PK_BYTES (2 * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SK_BYTES (2 * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/sha256.c b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/sha256.c new file mode 100644 index 00000000..c5d71b51 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/sha256.c @@ -0,0 +1,71 @@ +/* Based on the public domain implementation in + * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html + * by D. J. Bernstein */ + +#include +#include +#include + +#include "sha2.h" +#include "sha256.h" +#include "utils.h" + +/* + * Compresses an address to a 22-byte sequence. + * This reduces the number of required SHA256 compression calls, as the last + * block of input is padded with at least 65 bits. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_compress_address(unsigned char *out, const uint32_t addr[8]) { + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ull_to_bytes(out, 1, addr[0]); /* drop 3 bytes of the layer field */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ull_to_bytes(out + 1, 4, addr[2]); /* drop the highest tree address word */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ull_to_bytes(out + 5, 4, addr[3]); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ull_to_bytes(out + 9, 1, addr[4]); /* drop 3 bytes of the type field */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ull_to_bytes(out + 10, 4, addr[5]); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ull_to_bytes(out + 14, 4, addr[6]); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ull_to_bytes(out + 18, 4, addr[7]); +} + +/** + * Requires 'input_plus_four_bytes' to have 'inlen' + 4 bytes, so that the last + * four bytes can be used for the counter. Typically 'input' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen) { + unsigned char outbuf[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_OUTPUT_BYTES]; + unsigned long i; + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_OUTPUT_BYTES <= outlen; i++) { + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(out, input_plus_four_bytes, inlen + 4); + out += PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + if (outlen > i * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_OUTPUT_BYTES) { + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(outbuf, input_plus_four_bytes, inlen + 4); + memcpy(out, outbuf, outlen - i * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_OUTPUT_BYTES); + } +} + + +/** + * Absorb the constant pub_seed using one round of the compression function + * This initializes hash_state_seeded, which can then be reused in thash + **/ +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N; i < PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + sha256_inc_init(hash_state_seeded); + sha256_inc_blocks(hash_state_seeded, block, 1); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/sha256.h b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/sha256.h new file mode 100644 index 00000000..ee97619d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/sha256.h @@ -0,0 +1,21 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_H +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_H + +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N */ +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_ADDR_BYTES 22 + +#include +#include + +#include "sha2.h" + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_compress_address(unsigned char *out, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/sign.c b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/sign.c new file mode 100644 index 00000000..1e75cb07 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_D - 1); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, sk + 3 * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N); + + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N; + + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_BYTES; + + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N; + unsigned char mhash[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_BYTES; + + if (PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_BYTES, sm + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/thash.h b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/thash.h new file mode 100644 index 00000000..8e9aa2ee --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_THASH_H +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/thash_sha256_robust.c b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/thash_sha256_robust.c new file mode 100644 index 00000000..8a289d07 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/thash_sha256_robust.c @@ -0,0 +1,82 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "sha2.h" +#include "sha256.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N bytes. + */ +static void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char outbuf[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_OUTPUT_BYTES]; + unsigned char *bitmask = buf + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_ADDR_BYTES + 4; + sha256ctx sha2_state; + unsigned int i; + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_compress_address(buf + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, addr); + /* MGF1 requires us to have 4 extra bytes in 'buf' */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_mgf1(bitmask, inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, buf, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_ADDR_BYTES); + + /* Retrieve precomputed state containing pub_seed */ + sha256_inc_ctx_clone(&sha2_state, hash_state_seeded); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N; i++) { + buf[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_ADDR_BYTES + i] = in[i] ^ bitmask[i]; + } + + sha256_inc_finalize(outbuf, &sha2_state, buf + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_ADDR_BYTES + 4 + 1 * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_thash( + out, buf, in, 1, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_ADDR_BYTES + 4 + 2 * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_thash( + out, buf, in, 2, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_ADDR_BYTES + 4 + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_LEN, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_SHA256_ADDR_BYTES + 4 + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_TREES, pub_seed, addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/utils.c b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/utils.c new file mode 100644 index 00000000..aa12f30d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, leaf, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N); + memcpy(buffer + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_thash_2( + buffer + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N); + } else { + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/utils.h b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/utils.h new file mode 100644 index 00000000..22b428a6 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_UTILS_H +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/wots.c b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/wots.c new file mode 100644 index 00000000..8cf20b7d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/wots.c @@ -0,0 +1,167 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], + const hash_state *hash_state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_prf_addr(sk, sk_seed, wots_addr, hash_state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_W; i++) { + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_thash_1( + out, out, pub_seed, addr, hash_state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const size_t out_len, + const unsigned char *input) { + size_t in = 0; + size_t out = 0; + unsigned char total = 0; + unsigned int bits = 0; + size_t consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_LOGW; + output[out] = (unsigned int)((total >> bits) & (PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_W - 1)); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, + const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_ull_to_bytes( + csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(pk + i * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, pk + i * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, + 0, PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_W - 1, pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, 0, lengths[i], pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_N, + lengths[i], PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_W - 1 - lengths[i], pub_seed, addr, + hash_state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/wots.h b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/wots.h new file mode 100644 index 00000000..1d26d816 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-robust/clean/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_H +#define PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192FROBUST_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/META.yml b/crypto_sign/sphincs/sphincs-sha256-192f-simple/META.yml new file mode 100644 index 00000000..8a54aa61 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/META.yml @@ -0,0 +1,35 @@ +name: SPHINCS+ +type: signature +claimed-nist-level: 3 +length-public-key: 48 +length-secret-key: 96 +length-signature: 35664 +testvectors-sha256: b25e0f2560f500d8988809522c72ea3ab0f81be52476a6cdf9d05a890a2d2ce0 +nistkat-sha256: 306fef951d07b17b27c67ffe9e63185ae5d5fde87619b76872a3ca969299d47c +principal-submitters: + - Andreas Hülsing +auxiliary-submitters: + - Jean-Philippe Aumasson + - Daniel J. Bernstein, + - Christoph Dobraunig + - Maria Eichlseder + - Scott Fluhrer + - Stefan-Lukas Gazdag + - Panos Kampanakis + - Stefan Kölbl + - Tanja Lange + - Martin M. Lauridsen + - Florian Mendel + - Ruben Niederhagen + - Christian Rechberger + - Joost Rijneveld + - Peter Schwabe +implementations: + - name: clean + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + - name: avx2 + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + supported_platforms: + - architecture: x86_64 + required_flags: + - avx2 diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/LICENSE b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/Makefile.Microsoft_nmake new file mode 100644 index 00000000..80415f2a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-sha256-192f-simple_avx2.lib +OBJECTS=address.obj wots.obj utils.obj utilsx8.obj sha256avx.obj sha256x8.obj fors.obj sign.obj hash_sha256.obj thash_sha256_simple.obj hash_sha256x8.obj thash_sha256_simplex8.obj sha256.obj + +CFLAGS=/nologo /arch:AVX2 /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/address.c b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/address.c new file mode 100644 index 00000000..bd2e11f3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/address.h b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/address.h new file mode 100644 index 00000000..5187d3aa --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDRESS_H +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/api.h b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/api.h new file mode 100644 index 00000000..add60c93 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_API_H +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES 96 +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES 48 +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_CRYPTO_BYTES 35664 +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_CRYPTO_SEEDBYTES 72 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/fors.c b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/fors.c new file mode 100644 index 00000000..3ab2621c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/fors.c @@ -0,0 +1,240 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hashx8.h" +#include "thash.h" +#include "thashx8.h" +#include "utils.h" +#include "utilsx8.h" + +static void fors_gen_skx8(unsigned char *sk0, + unsigned char *sk1, + unsigned char *sk2, + unsigned char *sk3, + unsigned char *sk4, + unsigned char *sk5, + unsigned char *sk6, + unsigned char *sk7, const unsigned char *sk_seed, + uint32_t fors_leaf_addrx8[8 * 8]) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_prf_addrx8(sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7, + sk_seed, fors_leaf_addrx8); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thash_1(leaf, sk, pub_seed, fors_leaf_addr, state_seeded); +} + +static void fors_sk_to_leafx8(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + unsigned char *leaf4, + unsigned char *leaf5, + unsigned char *leaf6, + unsigned char *leaf7, + const unsigned char *sk0, + const unsigned char *sk1, + const unsigned char *sk2, + const unsigned char *sk3, + const unsigned char *sk4, + const unsigned char *sk5, + const unsigned char *sk6, + const unsigned char *sk7, + const unsigned char *pub_seed, + uint32_t fors_leaf_addrx8[8 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thashx8_1(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7, + pub_seed, fors_leaf_addrx8, state_seeded); +} + +static void fors_gen_leafx8(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + unsigned char *leaf4, + unsigned char *leaf5, + unsigned char *leaf6, + unsigned char *leaf7, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx0, + uint32_t addr_idx1, + uint32_t addr_idx2, + uint32_t addr_idx3, + uint32_t addr_idx4, + uint32_t addr_idx5, + uint32_t addr_idx6, + uint32_t addr_idx7, + const uint32_t fors_tree_addr[8], + const hash_state *state_seeded) { + uint32_t fors_leaf_addrx8[8 * 8] = {0}; + unsigned int j; + + /* Only copy the parts that must be kept in fors_leaf_addrx8. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_copy_keypair_addr(fors_leaf_addrx8 + j * 8, fors_tree_addr); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_type(fors_leaf_addrx8 + j * 8, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 0 * 8, addr_idx0); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 1 * 8, addr_idx1); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 2 * 8, addr_idx2); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 3 * 8, addr_idx3); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 4 * 8, addr_idx4); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 5 * 8, addr_idx5); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 6 * 8, addr_idx6); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 7 * 8, addr_idx7); + + fors_gen_skx8(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + sk_seed, fors_leaf_addrx8); + fors_sk_to_leafx8(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + pub_seed, fors_leaf_addrx8, state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + /* Round up to multiple of 8 to prevent out-of-bounds for x8 parallelism */ + uint32_t indices[(PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_TREES + 7) & ~7] = {0}; + unsigned char roots[((PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_TREES + 7) & ~7) * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N]; + /* Sign to a buffer, since we may not have a nice multiple of 8 and would + otherwise overrun the signature. */ + unsigned char sigbufx8[8 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N * (1 + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_HEIGHT)]; + uint32_t fors_tree_addrx8[8 * 8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset[8] = {0}; + unsigned int i, j; + + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_copy_keypair_addr(fors_tree_addrx8 + j * 8, fors_addr); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_type(fors_tree_addrx8 + j * 8, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < ((PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_TREES + 7) & ~0x7); i += 8) { + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_TREES) { + idx_offset[j] = (i + j) * (1 << PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_height(fors_tree_addrx8 + j * 8, 0); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_index(fors_tree_addrx8 + j * 8, + indices[i + j] + idx_offset[j]); + } + } + + /* Include the secret key part that produces the selected leaf nodes. */ + fors_gen_skx8(sigbufx8 + 0 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + sigbufx8 + 1 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + sigbufx8 + 2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + sigbufx8 + 3 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + sigbufx8 + 4 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + sigbufx8 + 5 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + sigbufx8 + 6 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + sigbufx8 + 7 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + sk_seed, fors_tree_addrx8); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_treehashx8_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, sigbufx8 + 8 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, sk_seed, pub_seed, + &indices[i], idx_offset, fors_gen_leafx8, fors_tree_addrx8, + state_seeded); + + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_TREES) { + memcpy(sig, sigbufx8 + j * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + memcpy(sig + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + sigbufx8 + 8 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + j * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_HEIGHT, + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_HEIGHT); + sig += PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N * (1 + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_HEIGHT); + } + } + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_fors_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_compute_root(roots + i * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, leaf, indices[i], idx_offset, + sig, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_HEIGHT, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/fors.h b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/fors.h new file mode 100644 index 00000000..a66780c8 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_H +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/hash.h b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/hash.h new file mode 100644 index 00000000..fe95601b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_HASH_H +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/hash_sha256.c b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/hash_sha256.c new file mode 100644 index 00000000..88e9f529 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/hash_sha256.c @@ -0,0 +1,166 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "sha2.h" +#include "sha256.h" +#include "sha256x8.h" + +/** + * Initializes the hash function states + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_seed_state(&hash_state_seeded->x1, pub_seed); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_seed_statex8(&hash_state_seeded->x8, pub_seed); + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/** + * Cleans up the hash function states + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_destroy_hash_function(hash_state *hash_state_seeded) { + sha256_inc_ctx_release(&hash_state_seeded->x1); +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N bytes and an address + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_prf_addr(unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES]; + unsigned char outbuf[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_compress_address(buf + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, addr); + + sha256(outbuf, buf, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed as a key + * for HMAC, and an optional randomization value prefixed to the message. + * This requires m to have at least PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N space + * available in front of the pointer, i.e. before the message to use for the + * prefix. This is necessary to prevent having to move the message around (and + * allocate memory for it). + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + sha256ctx state; + int i; + + /* This implements HMAC-SHA256 */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N; i++) { + buf[i] = 0x36 ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, 0x36, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + + sha256_inc_init(&state); + sha256_inc_blocks(&state, buf, 1); + + memcpy(buf, optrand, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + + /* If optrand + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + mlen < PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES) { + memcpy(buf + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, m, mlen); + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES, &state, + buf, mlen + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(buf + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, m, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + sha256_inc_blocks(&state, buf, 1); + + m += PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N; + mlen -= PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N; + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES, &state, m, mlen); + } + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N; i++) { + buf[i] = 0x5c ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, 0x5c, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + + sha256(buf, buf, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + memcpy(R, buf, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_TREE_BITS (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_TREE_HEIGHT * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_D - 1)) +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_TREE_BYTES ((PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_LEAF_BITS PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_LEAF_BYTES ((PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_DGST_BYTES (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_TREE_BYTES + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_LEAF_BYTES) + + unsigned char seed[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES + 4]; + + /* Round to nearest multiple of PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_INBLOCKS (((PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_PK_BYTES + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES - 1) & \ + -PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES) / PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES) + unsigned char inbuf[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES]; + + unsigned char buf[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_DGST_BYTES]; + unsigned char *bufp = buf; + sha256ctx state; + + sha256_inc_init(&state); + + memcpy(inbuf, R, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + memcpy(inbuf + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, pk, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_PK_BYTES); + + /* If R + pk + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_PK_BYTES + mlen < PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES) { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_PK_BYTES, m, mlen); + sha256_inc_finalize(seed, &state, inbuf, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_PK_BYTES + mlen); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_PK_BYTES, m, + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N - PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_PK_BYTES); + sha256_inc_blocks(&state, inbuf, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_INBLOCKS); + + m += PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N - PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_PK_BYTES; + mlen -= PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N - PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_PK_BYTES; + sha256_inc_finalize(seed, &state, m, mlen); + } + + /* By doing this in two steps, we prevent hashing the message twice; + otherwise each iteration in MGF1 would hash the message again. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_mgf1(bufp, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_DGST_BYTES, seed, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_bytes_to_ull(bufp, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/hash_sha256x8.c b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/hash_sha256x8.c new file mode 100644 index 00000000..9f55df83 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/hash_sha256x8.c @@ -0,0 +1,61 @@ +#include +#include + +#include "address.h" +#include "hashx8.h" +#include "params.h" +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "utils.h" + +/* + * 8-way parallel version of prf_addr; takes 8x as much input and output + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_prf_addrx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *key, + const uint32_t addrx8[8 * 8]) { + unsigned char bufx8[8 * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES)]; + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + unsigned int j; + + for (j = 0; j < 8; j++) { + memcpy(bufx8 + j * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES), key, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_compress_address(bufx8 + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + j * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES), + addrx8 + j * 8); + } + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256x8(outbufx8 + 0 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + bufx8 + 0 * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 1 * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 2 * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 3 * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 4 * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 5 * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 6 * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 7 * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES), + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES); + + memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/hash_state.h b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/hash_state.h new file mode 100644 index 00000000..d2e112a5 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/hash_state.h @@ -0,0 +1,33 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_HASH_STATE_H +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * We use a struct to differentiate between the x1 and x8 variants of SHA256. + */ + +#include "sha2.h" +#include "sha256avx.h" + +typedef struct { + sha256ctx x1; + sha256ctxx8 x8; +} hash_state; + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/hashx8.h b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/hashx8.h new file mode 100644 index 00000000..3ec3f024 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/hashx8.h @@ -0,0 +1,19 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_HASHX8_H +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_HASHX8_H + +#include + +#include "params.h" + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_prf_addrx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *key, + const uint32_t addrx8[8 * 8]); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/params.h b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/params.h new file mode 100644 index 00000000..602cc68f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_PARAMS_H +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N 24 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FULL_HEIGHT 66 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_D 22 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_HEIGHT 8 +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_TREES 33 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N / PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LEN (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LEN1 + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_BYTES (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_PK_BYTES PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_TREE_HEIGHT (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FULL_HEIGHT / PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_BYTES ((PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_PK_BYTES PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_BYTES (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_BYTES + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_D * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FULL_HEIGHT * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_PK_BYTES (2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SK_BYTES (2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/sha256.c b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/sha256.c new file mode 100644 index 00000000..648e5d0e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/sha256.c @@ -0,0 +1,71 @@ +/* Based on the public domain implementation in + * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html + * by D. J. Bernstein */ + +#include +#include +#include + +#include "sha2.h" +#include "sha256.h" +#include "utils.h" + +/* + * Compresses an address to a 22-byte sequence. + * This reduces the number of required SHA256 compression calls, as the last + * block of input is padded with at least 65 bits. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_compress_address(unsigned char *out, const uint32_t addr[8]) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ull_to_bytes(out, 1, addr[0]); /* drop 3 bytes of the layer field */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ull_to_bytes(out + 1, 4, addr[2]); /* drop the highest tree address word */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ull_to_bytes(out + 5, 4, addr[3]); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ull_to_bytes(out + 9, 1, addr[4]); /* drop 3 bytes of the type field */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ull_to_bytes(out + 10, 4, addr[5]); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ull_to_bytes(out + 14, 4, addr[6]); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ull_to_bytes(out + 18, 4, addr[7]); +} + +/** + * Requires 'input_plus_four_bytes' to have 'inlen' + 4 bytes, so that the last + * four bytes can be used for the counter. Typically 'input' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen) { + unsigned char outbuf[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + unsigned long i; + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES <= outlen; i++) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(out, input_plus_four_bytes, inlen + 4); + out += PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + if (outlen > i * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(outbuf, input_plus_four_bytes, inlen + 4); + memcpy(out, outbuf, outlen - i * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + } +} + + +/** + * Absorb the constant pub_seed using one round of the compression function + * This initializes hash_state_seeded, which can then be reused in thash + **/ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N; i < PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + sha256_inc_init(hash_state_seeded); + sha256_inc_blocks(hash_state_seeded, block, 1); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/sha256.h b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/sha256.h new file mode 100644 index 00000000..1540ae97 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/sha256.h @@ -0,0 +1,21 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_H +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_H + +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES 22 + +#include +#include + +#include "sha2.h" + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_compress_address(unsigned char *out, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/sha256avx.c b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/sha256avx.c new file mode 100644 index 00000000..5a0e5c59 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/sha256avx.c @@ -0,0 +1,296 @@ +#include +#include +#include + +#include "sha256avx.h" + +// Transpose 8 vectors containing 32-bit values +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_transpose(u256 s[8]) { + u256 tmp0[8]; + u256 tmp1[8]; + tmp0[0] = _mm256_unpacklo_epi32(s[0], s[1]); + tmp0[1] = _mm256_unpackhi_epi32(s[0], s[1]); + tmp0[2] = _mm256_unpacklo_epi32(s[2], s[3]); + tmp0[3] = _mm256_unpackhi_epi32(s[2], s[3]); + tmp0[4] = _mm256_unpacklo_epi32(s[4], s[5]); + tmp0[5] = _mm256_unpackhi_epi32(s[4], s[5]); + tmp0[6] = _mm256_unpacklo_epi32(s[6], s[7]); + tmp0[7] = _mm256_unpackhi_epi32(s[6], s[7]); + tmp1[0] = _mm256_unpacklo_epi64(tmp0[0], tmp0[2]); + tmp1[1] = _mm256_unpackhi_epi64(tmp0[0], tmp0[2]); + tmp1[2] = _mm256_unpacklo_epi64(tmp0[1], tmp0[3]); + tmp1[3] = _mm256_unpackhi_epi64(tmp0[1], tmp0[3]); + tmp1[4] = _mm256_unpacklo_epi64(tmp0[4], tmp0[6]); + tmp1[5] = _mm256_unpackhi_epi64(tmp0[4], tmp0[6]); + tmp1[6] = _mm256_unpacklo_epi64(tmp0[5], tmp0[7]); + tmp1[7] = _mm256_unpackhi_epi64(tmp0[5], tmp0[7]); + s[0] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x20); + s[1] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x20); + s[2] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x20); + s[3] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x20); + s[4] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x31); + s[5] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x31); + s[6] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x31); + s[7] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x31); +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256_clone_statex8(sha256ctxx8 *outctx, const sha256ctxx8 *inctx) { + memcpy(outctx, inctx, sizeof(sha256ctxx8)); +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256_init8x(sha256ctxx8 *ctx) { + ctx->s[0] = _mm256_set_epi32((int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667); + ctx->s[1] = _mm256_set_epi32((int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85); + ctx->s[2] = _mm256_set_epi32((int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372); + ctx->s[3] = _mm256_set_epi32((int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a); + ctx->s[4] = _mm256_set_epi32((int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f); + ctx->s[5] = _mm256_set_epi32((int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c); + ctx->s[6] = _mm256_set_epi32((int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab); + ctx->s[7] = _mm256_set_epi32((int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19); + + ctx->datalen = 0; + ctx->msglen = 0; +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256_update8x(sha256ctxx8 *ctx, + const unsigned char *d0, + const unsigned char *d1, + const unsigned char *d2, + const unsigned char *d3, + const unsigned char *d4, + const unsigned char *d5, + const unsigned char *d6, + const unsigned char *d7, + unsigned long long len) { + size_t i = 0; + size_t bytes_to_copy; + + while (i < len) { + bytes_to_copy = (size_t)len - i; + if (bytes_to_copy > 64) { + bytes_to_copy = 64; + } + memcpy(&ctx->msgblocks[64 * 0], d0 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 1], d1 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 2], d2 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 3], d3 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 4], d4 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 5], d5 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 6], d6 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 7], d7 + i, bytes_to_copy); + ctx->datalen += (unsigned int)bytes_to_copy; + i += bytes_to_copy; + if (ctx->datalen == 64) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + ctx->msglen += 512; + ctx->datalen = 0; + } + } +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256_final8x(sha256ctxx8 *ctx, + unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7) { + unsigned int i, curlen; + + // Padding + if (ctx->datalen < 56) { + for (i = 0; i < 8; ++i) { + curlen = ctx->datalen; + ctx->msgblocks[64 * i + curlen++] = 0x80; + while (curlen < 64) { + ctx->msgblocks[64 * i + curlen++] = 0x00; + } + } + } else { + for (i = 0; i < 8; ++i) { + curlen = ctx->datalen; + ctx->msgblocks[64 * i + curlen++] = 0x80; + while (curlen < 64) { + ctx->msgblocks[64 * i + curlen++] = 0x00; + } + } + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + memset(ctx->msgblocks, 0, 8 * 64); + } + + // Add length of the message to each block + ctx->msglen += ctx->datalen * 8; + for (i = 0; i < 8; i++) { + ctx->msgblocks[64 * i + 63] = (unsigned char)ctx->msglen; + ctx->msgblocks[64 * i + 62] = (unsigned char)(ctx->msglen >> 8); + ctx->msgblocks[64 * i + 61] = (unsigned char)(ctx->msglen >> 16); + ctx->msgblocks[64 * i + 60] = (unsigned char)(ctx->msglen >> 24); + ctx->msgblocks[64 * i + 59] = (unsigned char)(ctx->msglen >> 32); + ctx->msgblocks[64 * i + 58] = (unsigned char)(ctx->msglen >> 40); + ctx->msgblocks[64 * i + 57] = (unsigned char)(ctx->msglen >> 48); + ctx->msgblocks[64 * i + 56] = (unsigned char)(ctx->msglen >> 56); + } + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + + // Compute final hash output + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_transpose(ctx->s); + + // Store Hash value + STORE(out0, BYTESWAP(ctx->s[0])); + STORE(out1, BYTESWAP(ctx->s[1])); + STORE(out2, BYTESWAP(ctx->s[2])); + STORE(out3, BYTESWAP(ctx->s[3])); + STORE(out4, BYTESWAP(ctx->s[4])); + STORE(out5, BYTESWAP(ctx->s[5])); + STORE(out6, BYTESWAP(ctx->s[6])); + STORE(out7, BYTESWAP(ctx->s[7])); +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256_transform8x(sha256ctxx8 *ctx, const unsigned char *data) { + u256 s[8], w[64], T0, T1; + int i; + + // Load words and transform data correctly + for (i = 0; i < 8; i++) { + w[i] = BYTESWAP(LOAD(data + 64 * i)); + w[i + 8] = BYTESWAP(LOAD(data + 32 + 64 * i)); + } + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_transpose(w); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_transpose(w + 8); + + // Initial State + s[0] = ctx->s[0]; + s[1] = ctx->s[1]; + s[2] = ctx->s[2]; + s[3] = ctx->s[3]; + s[4] = ctx->s[4]; + s[5] = ctx->s[5]; + s[6] = ctx->s[6]; + s[7] = ctx->s[7]; + + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 0, w[0]); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 1, w[1]); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 2, w[2]); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 3, w[3]); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 4, w[4]); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 5, w[5]); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 6, w[6]); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 7, w[7]); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 8, w[8]); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 9, w[9]); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 10, w[10]); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 11, w[11]); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 12, w[12]); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 13, w[13]); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 14, w[14]); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 15, w[15]); + w[16] = ADD4_32(WSIGMA1_AVX(w[14]), w[0], w[9], WSIGMA0_AVX(w[1])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 16, w[16]); + w[17] = ADD4_32(WSIGMA1_AVX(w[15]), w[1], w[10], WSIGMA0_AVX(w[2])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 17, w[17]); + w[18] = ADD4_32(WSIGMA1_AVX(w[16]), w[2], w[11], WSIGMA0_AVX(w[3])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 18, w[18]); + w[19] = ADD4_32(WSIGMA1_AVX(w[17]), w[3], w[12], WSIGMA0_AVX(w[4])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 19, w[19]); + w[20] = ADD4_32(WSIGMA1_AVX(w[18]), w[4], w[13], WSIGMA0_AVX(w[5])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 20, w[20]); + w[21] = ADD4_32(WSIGMA1_AVX(w[19]), w[5], w[14], WSIGMA0_AVX(w[6])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 21, w[21]); + w[22] = ADD4_32(WSIGMA1_AVX(w[20]), w[6], w[15], WSIGMA0_AVX(w[7])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 22, w[22]); + w[23] = ADD4_32(WSIGMA1_AVX(w[21]), w[7], w[16], WSIGMA0_AVX(w[8])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 23, w[23]); + w[24] = ADD4_32(WSIGMA1_AVX(w[22]), w[8], w[17], WSIGMA0_AVX(w[9])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 24, w[24]); + w[25] = ADD4_32(WSIGMA1_AVX(w[23]), w[9], w[18], WSIGMA0_AVX(w[10])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 25, w[25]); + w[26] = ADD4_32(WSIGMA1_AVX(w[24]), w[10], w[19], WSIGMA0_AVX(w[11])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 26, w[26]); + w[27] = ADD4_32(WSIGMA1_AVX(w[25]), w[11], w[20], WSIGMA0_AVX(w[12])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 27, w[27]); + w[28] = ADD4_32(WSIGMA1_AVX(w[26]), w[12], w[21], WSIGMA0_AVX(w[13])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 28, w[28]); + w[29] = ADD4_32(WSIGMA1_AVX(w[27]), w[13], w[22], WSIGMA0_AVX(w[14])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 29, w[29]); + w[30] = ADD4_32(WSIGMA1_AVX(w[28]), w[14], w[23], WSIGMA0_AVX(w[15])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 30, w[30]); + w[31] = ADD4_32(WSIGMA1_AVX(w[29]), w[15], w[24], WSIGMA0_AVX(w[16])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 31, w[31]); + w[32] = ADD4_32(WSIGMA1_AVX(w[30]), w[16], w[25], WSIGMA0_AVX(w[17])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 32, w[32]); + w[33] = ADD4_32(WSIGMA1_AVX(w[31]), w[17], w[26], WSIGMA0_AVX(w[18])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 33, w[33]); + w[34] = ADD4_32(WSIGMA1_AVX(w[32]), w[18], w[27], WSIGMA0_AVX(w[19])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 34, w[34]); + w[35] = ADD4_32(WSIGMA1_AVX(w[33]), w[19], w[28], WSIGMA0_AVX(w[20])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 35, w[35]); + w[36] = ADD4_32(WSIGMA1_AVX(w[34]), w[20], w[29], WSIGMA0_AVX(w[21])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 36, w[36]); + w[37] = ADD4_32(WSIGMA1_AVX(w[35]), w[21], w[30], WSIGMA0_AVX(w[22])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 37, w[37]); + w[38] = ADD4_32(WSIGMA1_AVX(w[36]), w[22], w[31], WSIGMA0_AVX(w[23])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 38, w[38]); + w[39] = ADD4_32(WSIGMA1_AVX(w[37]), w[23], w[32], WSIGMA0_AVX(w[24])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 39, w[39]); + w[40] = ADD4_32(WSIGMA1_AVX(w[38]), w[24], w[33], WSIGMA0_AVX(w[25])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 40, w[40]); + w[41] = ADD4_32(WSIGMA1_AVX(w[39]), w[25], w[34], WSIGMA0_AVX(w[26])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 41, w[41]); + w[42] = ADD4_32(WSIGMA1_AVX(w[40]), w[26], w[35], WSIGMA0_AVX(w[27])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 42, w[42]); + w[43] = ADD4_32(WSIGMA1_AVX(w[41]), w[27], w[36], WSIGMA0_AVX(w[28])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 43, w[43]); + w[44] = ADD4_32(WSIGMA1_AVX(w[42]), w[28], w[37], WSIGMA0_AVX(w[29])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 44, w[44]); + w[45] = ADD4_32(WSIGMA1_AVX(w[43]), w[29], w[38], WSIGMA0_AVX(w[30])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 45, w[45]); + w[46] = ADD4_32(WSIGMA1_AVX(w[44]), w[30], w[39], WSIGMA0_AVX(w[31])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 46, w[46]); + w[47] = ADD4_32(WSIGMA1_AVX(w[45]), w[31], w[40], WSIGMA0_AVX(w[32])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 47, w[47]); + w[48] = ADD4_32(WSIGMA1_AVX(w[46]), w[32], w[41], WSIGMA0_AVX(w[33])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 48, w[48]); + w[49] = ADD4_32(WSIGMA1_AVX(w[47]), w[33], w[42], WSIGMA0_AVX(w[34])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 49, w[49]); + w[50] = ADD4_32(WSIGMA1_AVX(w[48]), w[34], w[43], WSIGMA0_AVX(w[35])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 50, w[50]); + w[51] = ADD4_32(WSIGMA1_AVX(w[49]), w[35], w[44], WSIGMA0_AVX(w[36])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 51, w[51]); + w[52] = ADD4_32(WSIGMA1_AVX(w[50]), w[36], w[45], WSIGMA0_AVX(w[37])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 52, w[52]); + w[53] = ADD4_32(WSIGMA1_AVX(w[51]), w[37], w[46], WSIGMA0_AVX(w[38])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 53, w[53]); + w[54] = ADD4_32(WSIGMA1_AVX(w[52]), w[38], w[47], WSIGMA0_AVX(w[39])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 54, w[54]); + w[55] = ADD4_32(WSIGMA1_AVX(w[53]), w[39], w[48], WSIGMA0_AVX(w[40])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 55, w[55]); + w[56] = ADD4_32(WSIGMA1_AVX(w[54]), w[40], w[49], WSIGMA0_AVX(w[41])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 56, w[56]); + w[57] = ADD4_32(WSIGMA1_AVX(w[55]), w[41], w[50], WSIGMA0_AVX(w[42])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 57, w[57]); + w[58] = ADD4_32(WSIGMA1_AVX(w[56]), w[42], w[51], WSIGMA0_AVX(w[43])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 58, w[58]); + w[59] = ADD4_32(WSIGMA1_AVX(w[57]), w[43], w[52], WSIGMA0_AVX(w[44])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 59, w[59]); + w[60] = ADD4_32(WSIGMA1_AVX(w[58]), w[44], w[53], WSIGMA0_AVX(w[45])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 60, w[60]); + w[61] = ADD4_32(WSIGMA1_AVX(w[59]), w[45], w[54], WSIGMA0_AVX(w[46])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 61, w[61]); + w[62] = ADD4_32(WSIGMA1_AVX(w[60]), w[46], w[55], WSIGMA0_AVX(w[47])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 62, w[62]); + w[63] = ADD4_32(WSIGMA1_AVX(w[61]), w[47], w[56], WSIGMA0_AVX(w[48])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 63, w[63]); + + // Feed Forward + ctx->s[0] = ADD32(s[0], ctx->s[0]); + ctx->s[1] = ADD32(s[1], ctx->s[1]); + ctx->s[2] = ADD32(s[2], ctx->s[2]); + ctx->s[3] = ADD32(s[3], ctx->s[3]); + ctx->s[4] = ADD32(s[4], ctx->s[4]); + ctx->s[5] = ADD32(s[5], ctx->s[5]); + ctx->s[6] = ADD32(s[6], ctx->s[6]); + ctx->s[7] = ADD32(s[7], ctx->s[7]); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/sha256avx.h b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/sha256avx.h new file mode 100644 index 00000000..682d557c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/sha256avx.h @@ -0,0 +1,103 @@ +#ifndef SHA256AVX_H +#define SHA256AVX_H + +#include +#include + +static const unsigned int RC[] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +#define u32 uint32_t +#define u256 __m256i + +#define XOR _mm256_xor_si256 +#define OR _mm256_or_si256 +#define AND _mm256_and_si256 +#define ADD32 _mm256_add_epi32 +#define NOT(x) _mm256_xor_si256(x, _mm256_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1)) + +#define LOAD(src) _mm256_loadu_si256((__m256i *)(src)) +#define STORE(dest,src) _mm256_storeu_si256((__m256i *)(dest),src) + +#define BYTESWAP(x) _mm256_shuffle_epi8(x, _mm256_set_epi8(0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3,0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3)) + +#define SHIFTR32(x, y) _mm256_srli_epi32(x, y) +#define SHIFTL32(x, y) _mm256_slli_epi32(x, y) + +#define ROTR32(x, y) OR(SHIFTR32(x, y), SHIFTL32(x, 32 - (y))) +#define ROTL32(x, y) OR(SHIFTL32(x, y), SHIFTR32(x, 32 - (y))) + +#define XOR3(a, b, c) XOR(XOR(a, b), c) + +#define ADD3_32(a, b, c) ADD32(ADD32(a, b), c) +#define ADD4_32(a, b, c, d) ADD32(ADD32(ADD32(a, b), c), d) +#define ADD5_32(a, b, c, d, e) ADD32(ADD32(ADD32(ADD32(a, b), c), d), e) + +#define MAJ_AVX(a, b, c) XOR3(AND(a, b), AND(a, c), AND(b, c)) +#define CH_AVX(a, b, c) XOR(AND(a, b), AND(NOT(a), c)) + +#define SIGMA1_AVX(x) XOR3(ROTR32(x, 6), ROTR32(x, 11), ROTR32(x, 25)) +#define SIGMA0_AVX(x) XOR3(ROTR32(x, 2), ROTR32(x, 13), ROTR32(x, 22)) + +#define WSIGMA1_AVX(x) XOR3(ROTR32(x, 17), ROTR32(x, 19), SHIFTR32(x, 10)) +#define WSIGMA0_AVX(x) XOR3(ROTR32(x, 7), ROTR32(x, 18), SHIFTR32(x, 3)) + +#define SHA256ROUND_AVX(a, b, c, d, e, f, g, h, rc, w) \ + T0 = ADD5_32(h, SIGMA1_AVX(e), CH_AVX(e, f, g), _mm256_set1_epi32((int)RC[rc]), w); \ + (d) = ADD32(d, T0); \ + T1 = ADD32(SIGMA0_AVX(a), MAJ_AVX(a, b, c)); \ + (h) = ADD32(T0, T1); + +typedef struct SHA256state { + u256 s[8]; + uint8_t msgblocks[8 * 64]; + unsigned int datalen; + uint64_t msglen; +} sha256ctxx8; + + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_transpose(u256 s[8]); +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256_init_frombytes_x8(sha256ctxx8 *ctx, const uint8_t *s, unsigned long long msglen); +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256_init8x(sha256ctxx8 *ctx); +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256_update8x(sha256ctxx8 *ctx, + const unsigned char *d0, + const unsigned char *d1, + const unsigned char *d2, + const unsigned char *d3, + const unsigned char *d4, + const unsigned char *d5, + const unsigned char *d6, + const unsigned char *d7, + unsigned long long len); +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256_final8x(sha256ctxx8 *ctx, + unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256_transform8x(sha256ctxx8 *ctx, const unsigned char *data); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256_clone_statex8(sha256ctxx8 *outctx, const sha256ctxx8 *inctx); + + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/sha256x8.c b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/sha256x8.c new file mode 100644 index 00000000..24d32834 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/sha256x8.c @@ -0,0 +1,128 @@ +#include + +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_seed_statex8(sha256ctxx8 *ctx, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N; i < PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256_init8x(ctx); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256_update8x(ctx, block, block, block, block, block, block, block, block, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES); + +} + +/* This provides a wrapper around the internals of 8x parallel SHA256 */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256x8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned long long inlen) { + sha256ctxx8 ctx; + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256_init8x(&ctx); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256_update8x(&ctx, in0, in1, in2, in3, in4, in5, in6, in7, inlen); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256_final8x(&ctx, out0, out1, out2, out3, out4, out5, out6, out7); +} + +/** + * Note that inlen should be sufficiently small that it still allows for + * an array to be allocated on the stack. Typically 'in' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_mgf1x8( + unsigned char *outx8, + unsigned long outlen, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, + unsigned long inlen) { + unsigned char inbufx8[8 * ((PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES) + 4)]; + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + unsigned long i; + unsigned int j; + + memcpy(inbufx8 + 0 * (inlen + 4), in0, inlen); + memcpy(inbufx8 + 1 * (inlen + 4), in1, inlen); + memcpy(inbufx8 + 2 * (inlen + 4), in2, inlen); + memcpy(inbufx8 + 3 * (inlen + 4), in3, inlen); + memcpy(inbufx8 + 4 * (inlen + 4), in4, inlen); + memcpy(inbufx8 + 5 * (inlen + 4), in5, inlen); + memcpy(inbufx8 + 6 * (inlen + 4), in6, inlen); + memcpy(inbufx8 + 7 * (inlen + 4), in7, inlen); + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES <= outlen; i++) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ull_to_bytes(inbufx8 + inlen + j * (inlen + 4), 4, i); + } + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256x8(outx8 + 0 * outlen, + outx8 + 1 * outlen, + outx8 + 2 * outlen, + outx8 + 3 * outlen, + outx8 + 4 * outlen, + outx8 + 5 * outlen, + outx8 + 6 * outlen, + outx8 + 7 * outlen, + inbufx8 + 0 * (inlen + 4), + inbufx8 + 1 * (inlen + 4), + inbufx8 + 2 * (inlen + 4), + inbufx8 + 3 * (inlen + 4), + inbufx8 + 4 * (inlen + 4), + inbufx8 + 5 * (inlen + 4), + inbufx8 + 6 * (inlen + 4), + inbufx8 + 7 * (inlen + 4), inlen + 4); + outx8 += PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ull_to_bytes(inbufx8 + inlen + j * (inlen + 4), 4, i); + } + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256x8(outbufx8 + 0 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + inbufx8 + 0 * (inlen + 4), + inbufx8 + 1 * (inlen + 4), + inbufx8 + 2 * (inlen + 4), + inbufx8 + 3 * (inlen + 4), + inbufx8 + 4 * (inlen + 4), + inbufx8 + 5 * (inlen + 4), + inbufx8 + 6 * (inlen + 4), + inbufx8 + 7 * (inlen + 4), inlen + 4); + + for (j = 0; j < 8; j++) { + memcpy(outx8 + j * outlen, + outbufx8 + j * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outlen - i * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/sha256x8.h b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/sha256x8.h new file mode 100644 index 00000000..ac40133b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/sha256x8.h @@ -0,0 +1,44 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256X8_H +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256X8_H + +#include "sha256avx.h" + +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N */ + +/* This provides a wrapper around the internals of 8x parallel SHA256 */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256x8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned long long inlen); + +/** + * Note that inlen should be sufficiently small that it still allows for + * an array to be allocated on the stack. Typically 'in' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_mgf1x8(unsigned char *outx8, unsigned long outlen, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, + unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_seed_statex8(sha256ctxx8 *ctx, const unsigned char *pub_seed); +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/sign.c b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/sign.c new file mode 100644 index 00000000..c8c94395 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_D - 1); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, sk + 3 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N; + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_D; i++) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_BYTES; + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N; + unsigned char mhash[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_D; i++) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_BYTES; + + if (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_BYTES, sm + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/thash.h b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/thash.h new file mode 100644 index 00000000..841382bd --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_THASH_H +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/thash_sha256_simple.c b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/thash_sha256_simple.c new file mode 100644 index 00000000..57f57ead --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/thash_sha256_simple.c @@ -0,0 +1,75 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "sha2.h" +#include "sha256.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N bytes. + */ +static void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char outbuf[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + sha256ctx sha2_state; + + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + + /* Retrieve precomputed state containing pub_seed */ + sha256_inc_ctx_clone(&sha2_state, &hash_state_seeded->x1); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_compress_address(buf, addr); + memcpy(buf + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES, in, inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + + sha256_inc_finalize(outbuf, &sha2_state, buf, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thash( + out, buf, in, 1, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thash( + out, buf, in, 2, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LEN, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_TREES, pub_seed, addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/thash_sha256_simplex8.c b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/thash_sha256_simplex8.c new file mode 100644 index 00000000..ee301b28 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/thash_sha256_simplex8.c @@ -0,0 +1,129 @@ +#include +#include + +#include "address.h" +#include "hash_state.h" +#include "params.h" +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "thashx8.h" +#include "utils.h" + +/** + * 8-way parallel version of thash; takes 8x as much input and output + */ +static void thashx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addrx8[8 * 8], + uint8_t *bufx8, + const hash_state *state_seeded) { + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + unsigned int i; + sha256ctxx8 ctx; + + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256_clone_statex8(&ctx, &state_seeded->x8); + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_compress_address(bufx8 + i * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N), + addrx8 + i * 8); + } + + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + + 0 * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N), in0, inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + + 1 * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N), in1, inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + + 2 * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N), in2, inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + + 3 * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N), in3, inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + + 4 * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N), in4, inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + + 5 * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N), in5, inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + + 6 * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N), in6, inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + + 7 * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N), in7, inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256_update8x(&ctx, + bufx8 + 0 * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N), + bufx8 + 1 * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N), + bufx8 + 2 * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N), + bufx8 + 3 * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N), + bufx8 + 4 * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N), + bufx8 + 5 * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N), + bufx8 + 6 * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N), + bufx8 + 7 * (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N), + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256_final8x(&ctx, + outbufx8 + 0 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + + memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); +} + +#define thashx8_variant_impl(name, size) \ + void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thashx8_##name(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + unsigned char *out4, \ + unsigned char *out5, \ + unsigned char *out6, \ + unsigned char *out7, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *in4, \ + const unsigned char *in5, \ + const unsigned char *in6, \ + const unsigned char *in7, \ + const unsigned char *pub_seed, \ + uint32_t addrx8[8*8], \ + const hash_state *state_seeded) \ + { \ + const unsigned int inblocks = (size); \ + uint8_t bufx8[8*(PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_SHA256_ADDR_BYTES + (size)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N)]; \ + thashx8(out0, out1, out2, out3, out4, out5, out6, out7, \ + in0, in1, in2, in3, in4, in5, in6, in7, inblocks, \ + pub_seed, addrx8, bufx8, state_seeded); \ + } + +thashx8_variant_impl(1, 1) +thashx8_variant_impl(2, 2) +thashx8_variant_impl(WOTS_LEN, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LEN) +thashx8_variant_impl(FORS_TREES, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_TREES) + +#undef thashx8_variant_impl diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/thashx8.h b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/thashx8.h new file mode 100644 index 00000000..17d1ecc0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/thashx8.h @@ -0,0 +1,39 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_THASHX8_H +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_THASHX8_H + +#include + +#include "hash_state.h" +#include "sha256avx.h" + + +#define thashx8_variant(name) \ + void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thashx8_##name( \ + unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + unsigned char *out4, \ + unsigned char *out5, \ + unsigned char *out6, \ + unsigned char *out7, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *in4, \ + const unsigned char *in5, \ + const unsigned char *in6, \ + const unsigned char *in7, \ + const unsigned char *pub_seed, \ + uint32_t addrx8[8*8], \ + const hash_state *state_seeded) + + +thashx8_variant(1); +thashx8_variant(2); +thashx8_variant(WOTS_LEN); +thashx8_variant(FORS_TREES); + +#undef thashx8_variant +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/utils.c b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/utils.c new file mode 100644 index 00000000..91c17256 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, leaf, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + memcpy(buffer + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, auth_path, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thash_2( + buffer + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + } else { + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, auth_path, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/utils.h b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/utils.h new file mode 100644 index 00000000..825bcca7 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_UTILS_H +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/utilsx8.c b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/utilsx8.c new file mode 100644 index 00000000..2d133369 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/utilsx8.c @@ -0,0 +1,172 @@ +#include + +#include "address.h" +#include "params.h" +#include "thashx8.h" +#include "utils.h" + +#include "utilsx8.h" + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void treehashx8(unsigned char *rootx8, unsigned char *auth_pathx8, + unsigned char *stackx8, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t leaf_idx[8], uint32_t idx_offset[8], + uint32_t tree_height, + void (*gen_leafx8)( + unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + unsigned char * /* leaf4 */, + unsigned char * /* leaf5 */, + unsigned char * /* leaf6 */, + unsigned char * /* leaf7 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + uint32_t /* addr_idx4 */, + uint32_t /* addr_idx5 */, + uint32_t /* addr_idx6 */, + uint32_t /* addr_idx7 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx8[8 * 8], + const hash_state *state_seeded) { + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + unsigned int j; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leafx8(stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + sk_seed, pub_seed, + idx + idx_offset[0], + idx + idx_offset[1], + idx + idx_offset[2], + idx + idx_offset[3], + idx + idx_offset[4], + idx + idx_offset[5], + idx + idx_offset[6], + idx + idx_offset[7], + tree_addrx8, + state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + for (j = 0; j < 8; j++) { + if ((leaf_idx[j] ^ 0x1) == idx) { + memcpy(auth_pathx8 + j * tree_height * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + (offset - 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + } + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_height(tree_addrx8 + j * 8, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_tree_index(tree_addrx8 + j * 8, + tree_idx + (idx_offset[j] >> (heights[offset - 1] + 1))); + } + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thashx8_2(stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + pub_seed, tree_addrx8, state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + for (j = 0; j < 8; j++) { + if (((leaf_idx[j] >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_pathx8 + j * tree_height * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + heights[offset - 1]*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N + (offset - 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + } + } + } + } + + for (j = 0; j < 8; j++) { + memcpy(rootx8 + j * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + } +} + +/* The wrappers below ensure we used fixed-size buffers on the stack (no VLAs) */ + + +#define treehashx8_variant(name, size) \ + void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_treehashx8_##name( \ + unsigned char *rootx8, unsigned char *auth_pathx8, \ + const unsigned char *sk_seed, const unsigned char *pub_seed, \ + const uint32_t leaf_idx[8], uint32_t idx_offset[8], \ + void (*gen_leafx8)( \ + unsigned char* /* leaf0 */, \ + unsigned char* /* leaf1 */, \ + unsigned char* /* leaf2 */, \ + unsigned char* /* leaf3 */, \ + unsigned char* /* leaf4 */, \ + unsigned char* /* leaf5 */, \ + unsigned char* /* leaf6 */, \ + unsigned char* /* leaf7 */, \ + const unsigned char* /* sk_seed */, \ + const unsigned char* /* pub_seed */, \ + uint32_t /* addr_idx0 */, \ + uint32_t /* addr_idx1 */, \ + uint32_t /* addr_idx2 */, \ + uint32_t /* addr_idx3 */, \ + uint32_t /* addr_idx4 */, \ + uint32_t /* addr_idx5 */, \ + uint32_t /* addr_idx6 */, \ + uint32_t /* addr_idx7 */, \ + const uint32_t[8] /* tree_addr */, \ + const hash_state* /* state_seeded */), \ + uint32_t tree_addrx8[8*8], \ + const hash_state *state_seeded) \ + { \ + const uint32_t tree_height = (size); \ + unsigned char stackx8[8*((size) + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N]; \ + unsigned int heights[(size) + 1]; \ + treehashx8(rootx8, auth_pathx8, stackx8, heights, sk_seed, pub_seed, \ + leaf_idx, idx_offset, tree_height, gen_leafx8, tree_addrx8, state_seeded); \ + } + +treehashx8_variant(FORS_HEIGHT, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_FORS_HEIGHT) + +#undef treehashx8_variant diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/utilsx8.h b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/utilsx8.h new file mode 100644 index 00000000..e6df8cdc --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/utilsx8.h @@ -0,0 +1,46 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_UTILSX8_H +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_UTILSX8_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_treehashx8_FORS_HEIGHT( + unsigned char *rootx8, unsigned char *auth_pathx8, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t leaf_idx[8], uint32_t idx_offset[8], + void (*gen_leafx8)( + unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + unsigned char * /* leaf4 */, + unsigned char * /* leaf5 */, + unsigned char * /* leaf6 */, + unsigned char * /* leaf7 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + uint32_t /* addr_idx4 */, + uint32_t /* addr_idx5 */, + uint32_t /* addr_idx6 */, + uint32_t /* addr_idx7 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx8[8 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/wots.c b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/wots.c new file mode 100644 index 00000000..463ce74b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/wots.c @@ -0,0 +1,240 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "hashx8.h" +#include "params.h" +#include "thash.h" +#include "thashx8.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], const hash_state *state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_prf_addr(sk, sk_seed, wots_addr, state_seeded); +} + +/** + * 8-way parallel version of wots_gen_sk; expects 8x as much space in sk + */ +static void wots_gen_skx8(unsigned char *skx8, const unsigned char *sk_seed, + uint32_t wots_addrx8[8 * 8]) { + unsigned int j; + + /* Make sure that the hash address is actually zeroed. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_hash_addr(wots_addrx8 + j * 8, 0); + } + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_prf_addrx8(skx8 + 0 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + skx8 + 1 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + skx8 + 2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + skx8 + 3 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + skx8 + 4 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + skx8 + 5 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + skx8 + 6 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + skx8 + 7 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + sk_seed, wots_addrx8); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_W; i++) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thash_1(out, out, pub_seed, addr, state_seeded); + } +} + +/** + * 8-way parallel version of gen_chain; expects 8x as much space in out, and + * 8x as much space in inx8. Assumes start and step identical across chains. + */ +static void gen_chainx8(unsigned char *outx8, const unsigned char *inx8, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addrx8[8 * 8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + /* Initialize outx8 with the value at position 'start'. */ + memcpy(outx8, inx8, 8 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_W; i++) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_hash_addr(addrx8 + j * 8, i); + } + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_thashx8_1(outx8 + 0 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + outx8 + 1 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + outx8 + 2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + outx8 + 3 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + outx8 + 4 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + outx8 + 5 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + outx8 + 6 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + outx8 + 7 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + outx8 + 0 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + outx8 + 1 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + outx8 + 2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + outx8 + 3 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + outx8 + 4 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + outx8 + 5 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + outx8 + 6 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + outx8 + 7 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + pub_seed, addrx8, state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { + int in = 0; + int out = 0; + unsigned char total = 0; + int bits = 0; + int consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LOGW; + output[out] = (unsigned int)(total >> bits) & (PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_W - 1); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_wots_gen_pk(unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + uint32_t addrx8[8 * 8]; + unsigned char pkbuf[8 * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N]; + + for (j = 0; j < 8; j++) { + memcpy(addrx8 + j * 8, addr, sizeof(uint32_t) * 8); + } + + /* The last iteration typically does not have complete set of 4 chains, + but because we use pkbuf, this is not an issue -- we still do as many + in parallel as possible. */ + for (i = 0; i < ((PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LEN + 7) & ~0x7); i += 8) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_chain_addr(addrx8 + j * 8, i + j); + } + wots_gen_skx8(pkbuf, sk_seed, addrx8); + gen_chainx8(pkbuf, pkbuf, 0, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_W - 1, pub_seed, addrx8, state_seeded); + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LEN) { + memcpy(pk + (i + j)*PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, pkbuf + j * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N); + } + } + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_wots_sign(unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, sk_seed, addr, state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, sig + i * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, 0, lengths[i], pub_seed, addr, state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_wots_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, sig + i * PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_N, + lengths[i], PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_W - 1 - lengths[i], pub_seed, addr, state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/wots.h b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/wots.h new file mode 100644 index 00000000..1adea3c6 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/avx2/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_H +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/LICENSE b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..f0670be0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-sha256-192f-simple_clean.lib +OBJECTS=address.obj wots.obj utils.obj fors.obj sign.obj hash_sha256.obj thash_sha256_simple.obj sha256.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/address.c b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/address.c new file mode 100644 index 00000000..d268f11f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/address.h b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/address.h new file mode 100644 index 00000000..c4489f99 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ADDRESS_H +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/api.h b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/api.h new file mode 100644 index 00000000..03c7f73f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_API_H +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_CRYPTO_SECRETKEYBYTES 96 +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_CRYPTO_PUBLICKEYBYTES 48 +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_CRYPTO_BYTES 35664 +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_CRYPTO_SEEDBYTES 72 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/fors.c b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/fors.c new file mode 100644 index 00000000..78d4a7ec --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/fors.c @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "thash.h" +#include "utils.h" + +static void fors_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t fors_leaf_addr[8], const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_prf_addr( + sk, sk_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_thash_1( + leaf, sk, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t fors_tree_addr[8], + const hash_state *hash_state_seeded) { + uint32_t fors_leaf_addr[8] = {0}; + + /* Only copy the parts that must be kept in fors_leaf_addr. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_copy_keypair_addr( + fors_leaf_addr, fors_tree_addr); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_type( + fors_leaf_addr, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_tree_index( + fors_leaf_addr, addr_idx); + + fors_gen_sk(leaf, sk_seed, fors_leaf_addr, hash_state_seeded); + fors_sk_to_leaf(leaf, leaf, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_copy_keypair_addr( + fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_copy_keypair_addr( + fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_type( + fors_tree_addr, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_type( + fors_pk_addr, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_tree_height( + fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_tree_index( + fors_tree_addr, indices[i] + idx_offset); + + /* Include the secret key part that produces the selected leaf node. */ + fors_gen_sk(sig, sk_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N; + + /* Compute the authentication path for this leaf node. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_treehash_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, sig, sk_seed, pub_seed, + indices[i], idx_offset, fors_gen_leaf, fors_tree_addr, + hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_thash_FORS_TREES( + pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_compute_root(roots + i * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, leaf, indices[i], idx_offset, sig, + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_HEIGHT, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/fors.h b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/fors.h new file mode 100644 index 00000000..17ae6135 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_H +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/hash.h b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/hash.h new file mode 100644 index 00000000..1f14cad7 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_HASH_H +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/hash_sha256.c b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/hash_sha256.c new file mode 100644 index 00000000..5a5e5c3a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/hash_sha256.c @@ -0,0 +1,162 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "sha2.h" +#include "sha256.h" + +/* For SHA256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_seed_state(hash_state_seeded, pub_seed); + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* Clean up hash state */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_destroy_hash_function(hash_state *hash_state_seeded) { + sha256_inc_ctx_release(hash_state_seeded); +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N bytes and an address + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_ADDR_BYTES]; + unsigned char outbuf[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_compress_address(buf + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, addr); + + sha256(outbuf, buf, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_ADDR_BYTES); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed as a key + * for HMAC, and an optional randomization value prefixed to the message. + * This requires m to have at least PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N space + * available in front of the pointer, i.e. before the message to use for the + * prefix. This is necessary to prevent having to move the message around (and + * allocate memory for it). + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES]; + sha256ctx state; + int i; + + /* This implements HMAC-SHA256 */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N; i++) { + buf[i] = 0x36 ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, 0x36, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N); + + sha256_inc_init(&state); + sha256_inc_blocks(&state, buf, 1); + + memcpy(buf, optrand, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N); + + /* If optrand + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N + mlen < PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_BLOCK_BYTES) { + memcpy(buf + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, m, mlen); + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_BLOCK_BYTES, &state, + buf, mlen + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(buf + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, m, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N); + sha256_inc_blocks(&state, buf, 1); + + m += PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N; + mlen -= PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N; + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_BLOCK_BYTES, &state, m, mlen); + } + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N; i++) { + buf[i] = 0x5c ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, 0x5c, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N); + + sha256(buf, buf, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES); + memcpy(R, buf, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_TREE_BITS (PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_TREE_HEIGHT * (PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_D - 1)) +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_TREE_BYTES ((PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_LEAF_BITS PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_LEAF_BYTES ((PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_DGST_BYTES (PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_TREE_BYTES + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_LEAF_BYTES) + + unsigned char seed[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES + 4]; + + /* Round to nearest multiple of PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_BLOCK_BYTES */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_INBLOCKS (((PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_PK_BYTES + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - 1) & \ + -PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_BLOCK_BYTES) / PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_BLOCK_BYTES) + unsigned char inbuf[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_BLOCK_BYTES]; + + unsigned char buf[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_DGST_BYTES]; + unsigned char *bufp = buf; + sha256ctx state; + + sha256_inc_init(&state); + + memcpy(inbuf, R, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N); + memcpy(inbuf + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, pk, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_PK_BYTES); + + /* If R + pk + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_PK_BYTES + mlen < PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_BLOCK_BYTES) { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_PK_BYTES, m, mlen); + sha256_inc_finalize(seed, &state, inbuf, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_PK_BYTES + mlen); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_PK_BYTES, m, + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N - PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_PK_BYTES); + sha256_inc_blocks(&state, inbuf, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_INBLOCKS); + + m += PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N - PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_PK_BYTES; + mlen -= PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N - PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_PK_BYTES; + sha256_inc_finalize(seed, &state, m, mlen); + } + + /* By doing this in two steps, we prevent hashing the message twice; + otherwise each iteration in MGF1 would hash the message again. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_mgf1(bufp, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_DGST_BYTES, seed, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_bytes_to_ull(bufp, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/hash_state.h b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/hash_state.h new file mode 100644 index 00000000..19fc335e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/hash_state.h @@ -0,0 +1,26 @@ +#ifndef SPX_HASH_STATE_H +#define SPX_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + */ + +#include "sha2.h" +#define hash_state sha256ctx + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/params.h b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/params.h new file mode 100644 index 00000000..86ea9753 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_PARAMS_H +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N 24 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FULL_HEIGHT 66 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_D 22 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_HEIGHT 8 +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_TREES 33 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N / PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_LEN (PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_LEN1 + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_BYTES (PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_PK_BYTES PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_TREE_HEIGHT (PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FULL_HEIGHT / PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_BYTES ((PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_PK_BYTES PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_BYTES (PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_BYTES + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_D * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FULL_HEIGHT * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_PK_BYTES (2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SK_BYTES (2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/sha256.c b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/sha256.c new file mode 100644 index 00000000..0c36b705 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/sha256.c @@ -0,0 +1,71 @@ +/* Based on the public domain implementation in + * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html + * by D. J. Bernstein */ + +#include +#include +#include + +#include "sha2.h" +#include "sha256.h" +#include "utils.h" + +/* + * Compresses an address to a 22-byte sequence. + * This reduces the number of required SHA256 compression calls, as the last + * block of input is padded with at least 65 bits. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_compress_address(unsigned char *out, const uint32_t addr[8]) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ull_to_bytes(out, 1, addr[0]); /* drop 3 bytes of the layer field */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ull_to_bytes(out + 1, 4, addr[2]); /* drop the highest tree address word */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ull_to_bytes(out + 5, 4, addr[3]); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ull_to_bytes(out + 9, 1, addr[4]); /* drop 3 bytes of the type field */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ull_to_bytes(out + 10, 4, addr[5]); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ull_to_bytes(out + 14, 4, addr[6]); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ull_to_bytes(out + 18, 4, addr[7]); +} + +/** + * Requires 'input_plus_four_bytes' to have 'inlen' + 4 bytes, so that the last + * four bytes can be used for the counter. Typically 'input' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen) { + unsigned char outbuf[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES]; + unsigned long i; + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES <= outlen; i++) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(out, input_plus_four_bytes, inlen + 4); + out += PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + if (outlen > i * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(outbuf, input_plus_four_bytes, inlen + 4); + memcpy(out, outbuf, outlen - i * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES); + } +} + + +/** + * Absorb the constant pub_seed using one round of the compression function + * This initializes hash_state_seeded, which can then be reused in thash + **/ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N; i < PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + sha256_inc_init(hash_state_seeded); + sha256_inc_blocks(hash_state_seeded, block, 1); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/sha256.h b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/sha256.h new file mode 100644 index 00000000..a1166df6 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/sha256.h @@ -0,0 +1,21 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_H +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_H + +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N */ +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_ADDR_BYTES 22 + +#include +#include + +#include "sha2.h" + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_compress_address(unsigned char *out, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/sign.c b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/sign.c new file mode 100644 index 00000000..00ab08f0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_D - 1); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, sk + 3 * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N; + + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_BYTES; + + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N; + unsigned char mhash[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_BYTES; + + if (PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_BYTES, sm + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/thash.h b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/thash.h new file mode 100644 index 00000000..353f7f92 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_THASH_H +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/thash_sha256_simple.c b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/thash_sha256_simple.c new file mode 100644 index 00000000..445aab66 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/thash_sha256_simple.c @@ -0,0 +1,75 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "sha2.h" +#include "sha256.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N bytes. + */ +static void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char outbuf[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES]; + sha256ctx sha2_state; + + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + + /* Retrieve precomputed state containing pub_seed */ + sha256_inc_ctx_clone(&sha2_state, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_compress_address(buf, addr); + memcpy(buf + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_ADDR_BYTES, in, inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N); + + sha256_inc_finalize(outbuf, &sha2_state, buf, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_thash( + out, buf, in, 1, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_thash( + out, buf, in, 2, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_ADDR_BYTES + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_LEN, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_SHA256_ADDR_BYTES + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_TREES, pub_seed, addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/utils.c b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/utils.c new file mode 100644 index 00000000..34c17025 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, leaf, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N); + memcpy(buffer + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_thash_2( + buffer + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N); + } else { + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/utils.h b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/utils.h new file mode 100644 index 00000000..e0b9926c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_UTILS_H +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/wots.c b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/wots.c new file mode 100644 index 00000000..ff04afab --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/wots.c @@ -0,0 +1,167 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], + const hash_state *hash_state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_prf_addr(sk, sk_seed, wots_addr, hash_state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_W; i++) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_thash_1( + out, out, pub_seed, addr, hash_state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const size_t out_len, + const unsigned char *input) { + size_t in = 0; + size_t out = 0; + unsigned char total = 0; + unsigned int bits = 0; + size_t consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_LOGW; + output[out] = (unsigned int)((total >> bits) & (PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_W - 1)); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, + const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_ull_to_bytes( + csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(pk + i * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, pk + i * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, + 0, PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_W - 1, pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, 0, lengths[i], pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_N, + lengths[i], PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_W - 1 - lengths[i], pub_seed, addr, + hash_state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/wots.h b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/wots.h new file mode 100644 index 00000000..8497fe62 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192f-simple/clean/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_H +#define PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192FSIMPLE_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/META.yml b/crypto_sign/sphincs/sphincs-sha256-192s-robust/META.yml new file mode 100644 index 00000000..259c864d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/META.yml @@ -0,0 +1,35 @@ +name: SPHINCS+ +type: signature +claimed-nist-level: 3 +length-public-key: 48 +length-secret-key: 96 +length-signature: 17064 +testvectors-sha256: 1be5c30de6d0b856b1b51f0ff50a2acf9c3a359ee2178004e153bdfc50a68832 +nistkat-sha256: 23374b2ece45c8ec7272473d70eb424894324702616b8456343dbd79f109b675 +principal-submitters: + - Andreas Hülsing +auxiliary-submitters: + - Jean-Philippe Aumasson + - Daniel J. Bernstein, + - Christoph Dobraunig + - Maria Eichlseder + - Scott Fluhrer + - Stefan-Lukas Gazdag + - Panos Kampanakis + - Stefan Kölbl + - Tanja Lange + - Martin M. Lauridsen + - Florian Mendel + - Ruben Niederhagen + - Christian Rechberger + - Joost Rijneveld + - Peter Schwabe +implementations: + - name: clean + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + - name: avx2 + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + supported_platforms: + - architecture: x86_64 + required_flags: + - avx2 diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/LICENSE b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/Makefile.Microsoft_nmake new file mode 100644 index 00000000..13e3a3eb --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-sha256-192s-robust_avx2.lib +OBJECTS=address.obj wots.obj utils.obj utilsx8.obj sha256avx.obj sha256x8.obj fors.obj sign.obj hash_sha256.obj thash_sha256_robust.obj hash_sha256x8.obj thash_sha256_robustx8.obj sha256.obj + +CFLAGS=/nologo /arch:AVX2 /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/address.c b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/address.c new file mode 100644 index 00000000..1de7cc0d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/address.h b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/address.h new file mode 100644 index 00000000..5a005db1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDRESS_H +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/api.h b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/api.h new file mode 100644 index 00000000..a49c3ad1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_API_H +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_CRYPTO_SECRETKEYBYTES 96 +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES 48 +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_CRYPTO_BYTES 17064 +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_CRYPTO_SEEDBYTES 72 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/fors.c b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/fors.c new file mode 100644 index 00000000..75fa3457 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/fors.c @@ -0,0 +1,240 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hashx8.h" +#include "thash.h" +#include "thashx8.h" +#include "utils.h" +#include "utilsx8.h" + +static void fors_gen_skx8(unsigned char *sk0, + unsigned char *sk1, + unsigned char *sk2, + unsigned char *sk3, + unsigned char *sk4, + unsigned char *sk5, + unsigned char *sk6, + unsigned char *sk7, const unsigned char *sk_seed, + uint32_t fors_leaf_addrx8[8 * 8]) { + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_prf_addrx8(sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7, + sk_seed, fors_leaf_addrx8); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_thash_1(leaf, sk, pub_seed, fors_leaf_addr, state_seeded); +} + +static void fors_sk_to_leafx8(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + unsigned char *leaf4, + unsigned char *leaf5, + unsigned char *leaf6, + unsigned char *leaf7, + const unsigned char *sk0, + const unsigned char *sk1, + const unsigned char *sk2, + const unsigned char *sk3, + const unsigned char *sk4, + const unsigned char *sk5, + const unsigned char *sk6, + const unsigned char *sk7, + const unsigned char *pub_seed, + uint32_t fors_leaf_addrx8[8 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_thashx8_1(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7, + pub_seed, fors_leaf_addrx8, state_seeded); +} + +static void fors_gen_leafx8(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + unsigned char *leaf4, + unsigned char *leaf5, + unsigned char *leaf6, + unsigned char *leaf7, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx0, + uint32_t addr_idx1, + uint32_t addr_idx2, + uint32_t addr_idx3, + uint32_t addr_idx4, + uint32_t addr_idx5, + uint32_t addr_idx6, + uint32_t addr_idx7, + const uint32_t fors_tree_addr[8], + const hash_state *state_seeded) { + uint32_t fors_leaf_addrx8[8 * 8] = {0}; + unsigned int j; + + /* Only copy the parts that must be kept in fors_leaf_addrx8. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_copy_keypair_addr(fors_leaf_addrx8 + j * 8, fors_tree_addr); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_type(fors_leaf_addrx8 + j * 8, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 0 * 8, addr_idx0); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 1 * 8, addr_idx1); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 2 * 8, addr_idx2); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 3 * 8, addr_idx3); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 4 * 8, addr_idx4); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 5 * 8, addr_idx5); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 6 * 8, addr_idx6); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 7 * 8, addr_idx7); + + fors_gen_skx8(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + sk_seed, fors_leaf_addrx8); + fors_sk_to_leafx8(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + pub_seed, fors_leaf_addrx8, state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + /* Round up to multiple of 8 to prevent out-of-bounds for x8 parallelism */ + uint32_t indices[(PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_TREES + 7) & ~7] = {0}; + unsigned char roots[((PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_TREES + 7) & ~7) * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N]; + /* Sign to a buffer, since we may not have a nice multiple of 8 and would + otherwise overrun the signature. */ + unsigned char sigbufx8[8 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N * (1 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_HEIGHT)]; + uint32_t fors_tree_addrx8[8 * 8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset[8] = {0}; + unsigned int i, j; + + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_copy_keypair_addr(fors_tree_addrx8 + j * 8, fors_addr); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_type(fors_tree_addrx8 + j * 8, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < ((PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_TREES + 7) & ~0x7); i += 8) { + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_TREES) { + idx_offset[j] = (i + j) * (1 << PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_height(fors_tree_addrx8 + j * 8, 0); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_index(fors_tree_addrx8 + j * 8, + indices[i + j] + idx_offset[j]); + } + } + + /* Include the secret key part that produces the selected leaf nodes. */ + fors_gen_skx8(sigbufx8 + 0 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + sigbufx8 + 1 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + sigbufx8 + 2 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + sigbufx8 + 3 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + sigbufx8 + 4 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + sigbufx8 + 5 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + sigbufx8 + 6 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + sigbufx8 + 7 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + sk_seed, fors_tree_addrx8); + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_treehashx8_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, sigbufx8 + 8 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, sk_seed, pub_seed, + &indices[i], idx_offset, fors_gen_leafx8, fors_tree_addrx8, + state_seeded); + + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_TREES) { + memcpy(sig, sigbufx8 + j * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + memcpy(sig + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + sigbufx8 + 8 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + j * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_HEIGHT, + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_HEIGHT); + sig += PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N * (1 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_HEIGHT); + } + } + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_fors_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_compute_root(roots + i * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, leaf, indices[i], idx_offset, + sig, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_HEIGHT, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/fors.h b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/fors.h new file mode 100644 index 00000000..a6b35a3d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_H +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/hash.h b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/hash.h new file mode 100644 index 00000000..85fa0c57 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_HASH_H +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/hash_sha256.c b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/hash_sha256.c new file mode 100644 index 00000000..aef4714c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/hash_sha256.c @@ -0,0 +1,166 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "sha2.h" +#include "sha256.h" +#include "sha256x8.h" + +/** + * Initializes the hash function states + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed) { + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_seed_state(&hash_state_seeded->x1, pub_seed); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_seed_statex8(&hash_state_seeded->x8, pub_seed); + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/** + * Cleans up the hash function states + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_destroy_hash_function(hash_state *hash_state_seeded) { + sha256_inc_ctx_release(&hash_state_seeded->x1); +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N bytes and an address + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_prf_addr(unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES]; + unsigned char outbuf[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_compress_address(buf + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, addr); + + sha256(outbuf, buf, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed as a key + * for HMAC, and an optional randomization value prefixed to the message. + * This requires m to have at least PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N space + * available in front of the pointer, i.e. before the message to use for the + * prefix. This is necessary to prevent having to move the message around (and + * allocate memory for it). + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES]; + sha256ctx state; + int i; + + /* This implements HMAC-SHA256 */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N; i++) { + buf[i] = 0x36 ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, 0x36, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + + sha256_inc_init(&state); + sha256_inc_blocks(&state, buf, 1); + + memcpy(buf, optrand, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + + /* If optrand + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + mlen < PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES) { + memcpy(buf + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, m, mlen); + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES, &state, + buf, mlen + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(buf + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, m, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + sha256_inc_blocks(&state, buf, 1); + + m += PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N; + mlen -= PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N; + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES, &state, m, mlen); + } + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N; i++) { + buf[i] = 0x5c ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, 0x5c, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + + sha256(buf, buf, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES); + memcpy(R, buf, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_TREE_BITS (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_TREE_HEIGHT * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_D - 1)) +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_TREE_BYTES ((PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_LEAF_BITS PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_LEAF_BYTES ((PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_DGST_BYTES (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_TREE_BYTES + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_LEAF_BYTES) + + unsigned char seed[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES + 4]; + + /* Round to nearest multiple of PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_INBLOCKS (((PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_PK_BYTES + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES - 1) & \ + -PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES) / PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES) + unsigned char inbuf[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES]; + + unsigned char buf[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_DGST_BYTES]; + unsigned char *bufp = buf; + sha256ctx state; + + sha256_inc_init(&state); + + memcpy(inbuf, R, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + memcpy(inbuf + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, pk, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_PK_BYTES); + + /* If R + pk + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_PK_BYTES + mlen < PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES) { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_PK_BYTES, m, mlen); + sha256_inc_finalize(seed, &state, inbuf, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_PK_BYTES + mlen); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_PK_BYTES, m, + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N - PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_PK_BYTES); + sha256_inc_blocks(&state, inbuf, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_INBLOCKS); + + m += PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N - PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_PK_BYTES; + mlen -= PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N - PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_PK_BYTES; + sha256_inc_finalize(seed, &state, m, mlen); + } + + /* By doing this in two steps, we prevent hashing the message twice; + otherwise each iteration in MGF1 would hash the message again. */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_mgf1(bufp, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_DGST_BYTES, seed, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_bytes_to_ull(bufp, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/hash_sha256x8.c b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/hash_sha256x8.c new file mode 100644 index 00000000..6132282b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/hash_sha256x8.c @@ -0,0 +1,61 @@ +#include +#include + +#include "address.h" +#include "hashx8.h" +#include "params.h" +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "utils.h" + +/* + * 8-way parallel version of prf_addr; takes 8x as much input and output + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_prf_addrx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *key, + const uint32_t addrx8[8 * 8]) { + unsigned char bufx8[8 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES)]; + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned int j; + + for (j = 0; j < 8; j++) { + memcpy(bufx8 + j * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES), key, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_compress_address(bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + j * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES), + addrx8 + j * 8); + } + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256x8(outbufx8 + 0 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, + bufx8 + 0 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 1 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 2 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 3 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 4 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 5 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 6 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 7 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES), + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES); + + memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/hash_state.h b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/hash_state.h new file mode 100644 index 00000000..40ed5e54 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/hash_state.h @@ -0,0 +1,33 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_HASH_STATE_H +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * We use a struct to differentiate between the x1 and x8 variants of SHA256. + */ + +#include "sha2.h" +#include "sha256avx.h" + +typedef struct { + sha256ctx x1; + sha256ctxx8 x8; +} hash_state; + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/hashx8.h b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/hashx8.h new file mode 100644 index 00000000..0cc8380c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/hashx8.h @@ -0,0 +1,19 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_HASHX8_H +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_HASHX8_H + +#include + +#include "params.h" + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_prf_addrx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *key, + const uint32_t addrx8[8 * 8]); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/params.h b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/params.h new file mode 100644 index 00000000..c657073f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_PARAMS_H +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N 24 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FULL_HEIGHT 64 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_D 8 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_HEIGHT 16 +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_TREES 14 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N / PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LEN (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LEN1 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_BYTES (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_PK_BYTES PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_TREE_HEIGHT (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FULL_HEIGHT / PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_BYTES ((PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_PK_BYTES PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_BYTES (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_BYTES + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_D * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FULL_HEIGHT * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_PK_BYTES (2 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SK_BYTES (2 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/sha256.c b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/sha256.c new file mode 100644 index 00000000..e108296f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/sha256.c @@ -0,0 +1,71 @@ +/* Based on the public domain implementation in + * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html + * by D. J. Bernstein */ + +#include +#include +#include + +#include "sha2.h" +#include "sha256.h" +#include "utils.h" + +/* + * Compresses an address to a 22-byte sequence. + * This reduces the number of required SHA256 compression calls, as the last + * block of input is padded with at least 65 bits. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_compress_address(unsigned char *out, const uint32_t addr[8]) { + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ull_to_bytes(out, 1, addr[0]); /* drop 3 bytes of the layer field */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ull_to_bytes(out + 1, 4, addr[2]); /* drop the highest tree address word */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ull_to_bytes(out + 5, 4, addr[3]); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ull_to_bytes(out + 9, 1, addr[4]); /* drop 3 bytes of the type field */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ull_to_bytes(out + 10, 4, addr[5]); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ull_to_bytes(out + 14, 4, addr[6]); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ull_to_bytes(out + 18, 4, addr[7]); +} + +/** + * Requires 'input_plus_four_bytes' to have 'inlen' + 4 bytes, so that the last + * four bytes can be used for the counter. Typically 'input' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen) { + unsigned char outbuf[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned long i; + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES <= outlen; i++) { + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(out, input_plus_four_bytes, inlen + 4); + out += PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + if (outlen > i * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES) { + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(outbuf, input_plus_four_bytes, inlen + 4); + memcpy(out, outbuf, outlen - i * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES); + } +} + + +/** + * Absorb the constant pub_seed using one round of the compression function + * This initializes hash_state_seeded, which can then be reused in thash + **/ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N; i < PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + sha256_inc_init(hash_state_seeded); + sha256_inc_blocks(hash_state_seeded, block, 1); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/sha256.h b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/sha256.h new file mode 100644 index 00000000..a22d3d94 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/sha256.h @@ -0,0 +1,21 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_H +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_H + +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES 22 + +#include +#include + +#include "sha2.h" + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_compress_address(unsigned char *out, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/sha256avx.c b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/sha256avx.c new file mode 100644 index 00000000..dd33b6fb --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/sha256avx.c @@ -0,0 +1,296 @@ +#include +#include +#include + +#include "sha256avx.h" + +// Transpose 8 vectors containing 32-bit values +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_transpose(u256 s[8]) { + u256 tmp0[8]; + u256 tmp1[8]; + tmp0[0] = _mm256_unpacklo_epi32(s[0], s[1]); + tmp0[1] = _mm256_unpackhi_epi32(s[0], s[1]); + tmp0[2] = _mm256_unpacklo_epi32(s[2], s[3]); + tmp0[3] = _mm256_unpackhi_epi32(s[2], s[3]); + tmp0[4] = _mm256_unpacklo_epi32(s[4], s[5]); + tmp0[5] = _mm256_unpackhi_epi32(s[4], s[5]); + tmp0[6] = _mm256_unpacklo_epi32(s[6], s[7]); + tmp0[7] = _mm256_unpackhi_epi32(s[6], s[7]); + tmp1[0] = _mm256_unpacklo_epi64(tmp0[0], tmp0[2]); + tmp1[1] = _mm256_unpackhi_epi64(tmp0[0], tmp0[2]); + tmp1[2] = _mm256_unpacklo_epi64(tmp0[1], tmp0[3]); + tmp1[3] = _mm256_unpackhi_epi64(tmp0[1], tmp0[3]); + tmp1[4] = _mm256_unpacklo_epi64(tmp0[4], tmp0[6]); + tmp1[5] = _mm256_unpackhi_epi64(tmp0[4], tmp0[6]); + tmp1[6] = _mm256_unpacklo_epi64(tmp0[5], tmp0[7]); + tmp1[7] = _mm256_unpackhi_epi64(tmp0[5], tmp0[7]); + s[0] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x20); + s[1] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x20); + s[2] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x20); + s[3] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x20); + s[4] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x31); + s[5] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x31); + s[6] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x31); + s[7] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x31); +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_clone_statex8(sha256ctxx8 *outctx, const sha256ctxx8 *inctx) { + memcpy(outctx, inctx, sizeof(sha256ctxx8)); +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_init8x(sha256ctxx8 *ctx) { + ctx->s[0] = _mm256_set_epi32((int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667); + ctx->s[1] = _mm256_set_epi32((int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85); + ctx->s[2] = _mm256_set_epi32((int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372); + ctx->s[3] = _mm256_set_epi32((int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a); + ctx->s[4] = _mm256_set_epi32((int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f); + ctx->s[5] = _mm256_set_epi32((int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c); + ctx->s[6] = _mm256_set_epi32((int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab); + ctx->s[7] = _mm256_set_epi32((int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19); + + ctx->datalen = 0; + ctx->msglen = 0; +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_update8x(sha256ctxx8 *ctx, + const unsigned char *d0, + const unsigned char *d1, + const unsigned char *d2, + const unsigned char *d3, + const unsigned char *d4, + const unsigned char *d5, + const unsigned char *d6, + const unsigned char *d7, + unsigned long long len) { + size_t i = 0; + size_t bytes_to_copy; + + while (i < len) { + bytes_to_copy = (size_t)len - i; + if (bytes_to_copy > 64) { + bytes_to_copy = 64; + } + memcpy(&ctx->msgblocks[64 * 0], d0 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 1], d1 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 2], d2 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 3], d3 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 4], d4 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 5], d5 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 6], d6 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 7], d7 + i, bytes_to_copy); + ctx->datalen += (unsigned int)bytes_to_copy; + i += bytes_to_copy; + if (ctx->datalen == 64) { + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + ctx->msglen += 512; + ctx->datalen = 0; + } + } +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_final8x(sha256ctxx8 *ctx, + unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7) { + unsigned int i, curlen; + + // Padding + if (ctx->datalen < 56) { + for (i = 0; i < 8; ++i) { + curlen = ctx->datalen; + ctx->msgblocks[64 * i + curlen++] = 0x80; + while (curlen < 64) { + ctx->msgblocks[64 * i + curlen++] = 0x00; + } + } + } else { + for (i = 0; i < 8; ++i) { + curlen = ctx->datalen; + ctx->msgblocks[64 * i + curlen++] = 0x80; + while (curlen < 64) { + ctx->msgblocks[64 * i + curlen++] = 0x00; + } + } + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + memset(ctx->msgblocks, 0, 8 * 64); + } + + // Add length of the message to each block + ctx->msglen += ctx->datalen * 8; + for (i = 0; i < 8; i++) { + ctx->msgblocks[64 * i + 63] = (unsigned char)ctx->msglen; + ctx->msgblocks[64 * i + 62] = (unsigned char)(ctx->msglen >> 8); + ctx->msgblocks[64 * i + 61] = (unsigned char)(ctx->msglen >> 16); + ctx->msgblocks[64 * i + 60] = (unsigned char)(ctx->msglen >> 24); + ctx->msgblocks[64 * i + 59] = (unsigned char)(ctx->msglen >> 32); + ctx->msgblocks[64 * i + 58] = (unsigned char)(ctx->msglen >> 40); + ctx->msgblocks[64 * i + 57] = (unsigned char)(ctx->msglen >> 48); + ctx->msgblocks[64 * i + 56] = (unsigned char)(ctx->msglen >> 56); + } + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + + // Compute final hash output + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_transpose(ctx->s); + + // Store Hash value + STORE(out0, BYTESWAP(ctx->s[0])); + STORE(out1, BYTESWAP(ctx->s[1])); + STORE(out2, BYTESWAP(ctx->s[2])); + STORE(out3, BYTESWAP(ctx->s[3])); + STORE(out4, BYTESWAP(ctx->s[4])); + STORE(out5, BYTESWAP(ctx->s[5])); + STORE(out6, BYTESWAP(ctx->s[6])); + STORE(out7, BYTESWAP(ctx->s[7])); +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_transform8x(sha256ctxx8 *ctx, const unsigned char *data) { + u256 s[8], w[64], T0, T1; + int i; + + // Load words and transform data correctly + for (i = 0; i < 8; i++) { + w[i] = BYTESWAP(LOAD(data + 64 * i)); + w[i + 8] = BYTESWAP(LOAD(data + 32 + 64 * i)); + } + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_transpose(w); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_transpose(w + 8); + + // Initial State + s[0] = ctx->s[0]; + s[1] = ctx->s[1]; + s[2] = ctx->s[2]; + s[3] = ctx->s[3]; + s[4] = ctx->s[4]; + s[5] = ctx->s[5]; + s[6] = ctx->s[6]; + s[7] = ctx->s[7]; + + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 0, w[0]); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 1, w[1]); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 2, w[2]); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 3, w[3]); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 4, w[4]); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 5, w[5]); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 6, w[6]); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 7, w[7]); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 8, w[8]); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 9, w[9]); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 10, w[10]); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 11, w[11]); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 12, w[12]); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 13, w[13]); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 14, w[14]); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 15, w[15]); + w[16] = ADD4_32(WSIGMA1_AVX(w[14]), w[0], w[9], WSIGMA0_AVX(w[1])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 16, w[16]); + w[17] = ADD4_32(WSIGMA1_AVX(w[15]), w[1], w[10], WSIGMA0_AVX(w[2])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 17, w[17]); + w[18] = ADD4_32(WSIGMA1_AVX(w[16]), w[2], w[11], WSIGMA0_AVX(w[3])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 18, w[18]); + w[19] = ADD4_32(WSIGMA1_AVX(w[17]), w[3], w[12], WSIGMA0_AVX(w[4])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 19, w[19]); + w[20] = ADD4_32(WSIGMA1_AVX(w[18]), w[4], w[13], WSIGMA0_AVX(w[5])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 20, w[20]); + w[21] = ADD4_32(WSIGMA1_AVX(w[19]), w[5], w[14], WSIGMA0_AVX(w[6])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 21, w[21]); + w[22] = ADD4_32(WSIGMA1_AVX(w[20]), w[6], w[15], WSIGMA0_AVX(w[7])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 22, w[22]); + w[23] = ADD4_32(WSIGMA1_AVX(w[21]), w[7], w[16], WSIGMA0_AVX(w[8])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 23, w[23]); + w[24] = ADD4_32(WSIGMA1_AVX(w[22]), w[8], w[17], WSIGMA0_AVX(w[9])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 24, w[24]); + w[25] = ADD4_32(WSIGMA1_AVX(w[23]), w[9], w[18], WSIGMA0_AVX(w[10])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 25, w[25]); + w[26] = ADD4_32(WSIGMA1_AVX(w[24]), w[10], w[19], WSIGMA0_AVX(w[11])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 26, w[26]); + w[27] = ADD4_32(WSIGMA1_AVX(w[25]), w[11], w[20], WSIGMA0_AVX(w[12])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 27, w[27]); + w[28] = ADD4_32(WSIGMA1_AVX(w[26]), w[12], w[21], WSIGMA0_AVX(w[13])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 28, w[28]); + w[29] = ADD4_32(WSIGMA1_AVX(w[27]), w[13], w[22], WSIGMA0_AVX(w[14])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 29, w[29]); + w[30] = ADD4_32(WSIGMA1_AVX(w[28]), w[14], w[23], WSIGMA0_AVX(w[15])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 30, w[30]); + w[31] = ADD4_32(WSIGMA1_AVX(w[29]), w[15], w[24], WSIGMA0_AVX(w[16])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 31, w[31]); + w[32] = ADD4_32(WSIGMA1_AVX(w[30]), w[16], w[25], WSIGMA0_AVX(w[17])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 32, w[32]); + w[33] = ADD4_32(WSIGMA1_AVX(w[31]), w[17], w[26], WSIGMA0_AVX(w[18])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 33, w[33]); + w[34] = ADD4_32(WSIGMA1_AVX(w[32]), w[18], w[27], WSIGMA0_AVX(w[19])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 34, w[34]); + w[35] = ADD4_32(WSIGMA1_AVX(w[33]), w[19], w[28], WSIGMA0_AVX(w[20])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 35, w[35]); + w[36] = ADD4_32(WSIGMA1_AVX(w[34]), w[20], w[29], WSIGMA0_AVX(w[21])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 36, w[36]); + w[37] = ADD4_32(WSIGMA1_AVX(w[35]), w[21], w[30], WSIGMA0_AVX(w[22])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 37, w[37]); + w[38] = ADD4_32(WSIGMA1_AVX(w[36]), w[22], w[31], WSIGMA0_AVX(w[23])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 38, w[38]); + w[39] = ADD4_32(WSIGMA1_AVX(w[37]), w[23], w[32], WSIGMA0_AVX(w[24])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 39, w[39]); + w[40] = ADD4_32(WSIGMA1_AVX(w[38]), w[24], w[33], WSIGMA0_AVX(w[25])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 40, w[40]); + w[41] = ADD4_32(WSIGMA1_AVX(w[39]), w[25], w[34], WSIGMA0_AVX(w[26])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 41, w[41]); + w[42] = ADD4_32(WSIGMA1_AVX(w[40]), w[26], w[35], WSIGMA0_AVX(w[27])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 42, w[42]); + w[43] = ADD4_32(WSIGMA1_AVX(w[41]), w[27], w[36], WSIGMA0_AVX(w[28])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 43, w[43]); + w[44] = ADD4_32(WSIGMA1_AVX(w[42]), w[28], w[37], WSIGMA0_AVX(w[29])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 44, w[44]); + w[45] = ADD4_32(WSIGMA1_AVX(w[43]), w[29], w[38], WSIGMA0_AVX(w[30])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 45, w[45]); + w[46] = ADD4_32(WSIGMA1_AVX(w[44]), w[30], w[39], WSIGMA0_AVX(w[31])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 46, w[46]); + w[47] = ADD4_32(WSIGMA1_AVX(w[45]), w[31], w[40], WSIGMA0_AVX(w[32])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 47, w[47]); + w[48] = ADD4_32(WSIGMA1_AVX(w[46]), w[32], w[41], WSIGMA0_AVX(w[33])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 48, w[48]); + w[49] = ADD4_32(WSIGMA1_AVX(w[47]), w[33], w[42], WSIGMA0_AVX(w[34])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 49, w[49]); + w[50] = ADD4_32(WSIGMA1_AVX(w[48]), w[34], w[43], WSIGMA0_AVX(w[35])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 50, w[50]); + w[51] = ADD4_32(WSIGMA1_AVX(w[49]), w[35], w[44], WSIGMA0_AVX(w[36])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 51, w[51]); + w[52] = ADD4_32(WSIGMA1_AVX(w[50]), w[36], w[45], WSIGMA0_AVX(w[37])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 52, w[52]); + w[53] = ADD4_32(WSIGMA1_AVX(w[51]), w[37], w[46], WSIGMA0_AVX(w[38])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 53, w[53]); + w[54] = ADD4_32(WSIGMA1_AVX(w[52]), w[38], w[47], WSIGMA0_AVX(w[39])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 54, w[54]); + w[55] = ADD4_32(WSIGMA1_AVX(w[53]), w[39], w[48], WSIGMA0_AVX(w[40])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 55, w[55]); + w[56] = ADD4_32(WSIGMA1_AVX(w[54]), w[40], w[49], WSIGMA0_AVX(w[41])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 56, w[56]); + w[57] = ADD4_32(WSIGMA1_AVX(w[55]), w[41], w[50], WSIGMA0_AVX(w[42])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 57, w[57]); + w[58] = ADD4_32(WSIGMA1_AVX(w[56]), w[42], w[51], WSIGMA0_AVX(w[43])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 58, w[58]); + w[59] = ADD4_32(WSIGMA1_AVX(w[57]), w[43], w[52], WSIGMA0_AVX(w[44])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 59, w[59]); + w[60] = ADD4_32(WSIGMA1_AVX(w[58]), w[44], w[53], WSIGMA0_AVX(w[45])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 60, w[60]); + w[61] = ADD4_32(WSIGMA1_AVX(w[59]), w[45], w[54], WSIGMA0_AVX(w[46])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 61, w[61]); + w[62] = ADD4_32(WSIGMA1_AVX(w[60]), w[46], w[55], WSIGMA0_AVX(w[47])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 62, w[62]); + w[63] = ADD4_32(WSIGMA1_AVX(w[61]), w[47], w[56], WSIGMA0_AVX(w[48])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 63, w[63]); + + // Feed Forward + ctx->s[0] = ADD32(s[0], ctx->s[0]); + ctx->s[1] = ADD32(s[1], ctx->s[1]); + ctx->s[2] = ADD32(s[2], ctx->s[2]); + ctx->s[3] = ADD32(s[3], ctx->s[3]); + ctx->s[4] = ADD32(s[4], ctx->s[4]); + ctx->s[5] = ADD32(s[5], ctx->s[5]); + ctx->s[6] = ADD32(s[6], ctx->s[6]); + ctx->s[7] = ADD32(s[7], ctx->s[7]); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/sha256avx.h b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/sha256avx.h new file mode 100644 index 00000000..8560336c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/sha256avx.h @@ -0,0 +1,103 @@ +#ifndef SHA256AVX_H +#define SHA256AVX_H + +#include +#include + +static const unsigned int RC[] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +#define u32 uint32_t +#define u256 __m256i + +#define XOR _mm256_xor_si256 +#define OR _mm256_or_si256 +#define AND _mm256_and_si256 +#define ADD32 _mm256_add_epi32 +#define NOT(x) _mm256_xor_si256(x, _mm256_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1)) + +#define LOAD(src) _mm256_loadu_si256((__m256i *)(src)) +#define STORE(dest,src) _mm256_storeu_si256((__m256i *)(dest),src) + +#define BYTESWAP(x) _mm256_shuffle_epi8(x, _mm256_set_epi8(0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3,0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3)) + +#define SHIFTR32(x, y) _mm256_srli_epi32(x, y) +#define SHIFTL32(x, y) _mm256_slli_epi32(x, y) + +#define ROTR32(x, y) OR(SHIFTR32(x, y), SHIFTL32(x, 32 - (y))) +#define ROTL32(x, y) OR(SHIFTL32(x, y), SHIFTR32(x, 32 - (y))) + +#define XOR3(a, b, c) XOR(XOR(a, b), c) + +#define ADD3_32(a, b, c) ADD32(ADD32(a, b), c) +#define ADD4_32(a, b, c, d) ADD32(ADD32(ADD32(a, b), c), d) +#define ADD5_32(a, b, c, d, e) ADD32(ADD32(ADD32(ADD32(a, b), c), d), e) + +#define MAJ_AVX(a, b, c) XOR3(AND(a, b), AND(a, c), AND(b, c)) +#define CH_AVX(a, b, c) XOR(AND(a, b), AND(NOT(a), c)) + +#define SIGMA1_AVX(x) XOR3(ROTR32(x, 6), ROTR32(x, 11), ROTR32(x, 25)) +#define SIGMA0_AVX(x) XOR3(ROTR32(x, 2), ROTR32(x, 13), ROTR32(x, 22)) + +#define WSIGMA1_AVX(x) XOR3(ROTR32(x, 17), ROTR32(x, 19), SHIFTR32(x, 10)) +#define WSIGMA0_AVX(x) XOR3(ROTR32(x, 7), ROTR32(x, 18), SHIFTR32(x, 3)) + +#define SHA256ROUND_AVX(a, b, c, d, e, f, g, h, rc, w) \ + T0 = ADD5_32(h, SIGMA1_AVX(e), CH_AVX(e, f, g), _mm256_set1_epi32((int)RC[rc]), w); \ + (d) = ADD32(d, T0); \ + T1 = ADD32(SIGMA0_AVX(a), MAJ_AVX(a, b, c)); \ + (h) = ADD32(T0, T1); + +typedef struct SHA256state { + u256 s[8]; + uint8_t msgblocks[8 * 64]; + unsigned int datalen; + uint64_t msglen; +} sha256ctxx8; + + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_transpose(u256 s[8]); +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_init_frombytes_x8(sha256ctxx8 *ctx, const uint8_t *s, unsigned long long msglen); +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_init8x(sha256ctxx8 *ctx); +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_update8x(sha256ctxx8 *ctx, + const unsigned char *d0, + const unsigned char *d1, + const unsigned char *d2, + const unsigned char *d3, + const unsigned char *d4, + const unsigned char *d5, + const unsigned char *d6, + const unsigned char *d7, + unsigned long long len); +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_final8x(sha256ctxx8 *ctx, + unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7); + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_transform8x(sha256ctxx8 *ctx, const unsigned char *data); + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_clone_statex8(sha256ctxx8 *outctx, const sha256ctxx8 *inctx); + + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/sha256x8.c b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/sha256x8.c new file mode 100644 index 00000000..45cebb47 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/sha256x8.c @@ -0,0 +1,128 @@ +#include + +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_seed_statex8(sha256ctxx8 *ctx, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N; i < PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_init8x(ctx); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_update8x(ctx, block, block, block, block, block, block, block, block, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES); + +} + +/* This provides a wrapper around the internals of 8x parallel SHA256 */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256x8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned long long inlen) { + sha256ctxx8 ctx; + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_init8x(&ctx); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_update8x(&ctx, in0, in1, in2, in3, in4, in5, in6, in7, inlen); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_final8x(&ctx, out0, out1, out2, out3, out4, out5, out6, out7); +} + +/** + * Note that inlen should be sufficiently small that it still allows for + * an array to be allocated on the stack. Typically 'in' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_mgf1x8( + unsigned char *outx8, + unsigned long outlen, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, + unsigned long inlen) { + unsigned char inbufx8[8 * ((PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES) + 4)]; + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned long i; + unsigned int j; + + memcpy(inbufx8 + 0 * (inlen + 4), in0, inlen); + memcpy(inbufx8 + 1 * (inlen + 4), in1, inlen); + memcpy(inbufx8 + 2 * (inlen + 4), in2, inlen); + memcpy(inbufx8 + 3 * (inlen + 4), in3, inlen); + memcpy(inbufx8 + 4 * (inlen + 4), in4, inlen); + memcpy(inbufx8 + 5 * (inlen + 4), in5, inlen); + memcpy(inbufx8 + 6 * (inlen + 4), in6, inlen); + memcpy(inbufx8 + 7 * (inlen + 4), in7, inlen); + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES <= outlen; i++) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ull_to_bytes(inbufx8 + inlen + j * (inlen + 4), 4, i); + } + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256x8(outx8 + 0 * outlen, + outx8 + 1 * outlen, + outx8 + 2 * outlen, + outx8 + 3 * outlen, + outx8 + 4 * outlen, + outx8 + 5 * outlen, + outx8 + 6 * outlen, + outx8 + 7 * outlen, + inbufx8 + 0 * (inlen + 4), + inbufx8 + 1 * (inlen + 4), + inbufx8 + 2 * (inlen + 4), + inbufx8 + 3 * (inlen + 4), + inbufx8 + 4 * (inlen + 4), + inbufx8 + 5 * (inlen + 4), + inbufx8 + 6 * (inlen + 4), + inbufx8 + 7 * (inlen + 4), inlen + 4); + outx8 += PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ull_to_bytes(inbufx8 + inlen + j * (inlen + 4), 4, i); + } + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256x8(outbufx8 + 0 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, + inbufx8 + 0 * (inlen + 4), + inbufx8 + 1 * (inlen + 4), + inbufx8 + 2 * (inlen + 4), + inbufx8 + 3 * (inlen + 4), + inbufx8 + 4 * (inlen + 4), + inbufx8 + 5 * (inlen + 4), + inbufx8 + 6 * (inlen + 4), + inbufx8 + 7 * (inlen + 4), inlen + 4); + + for (j = 0; j < 8; j++) { + memcpy(outx8 + j * outlen, + outbufx8 + j * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outlen - i * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/sha256x8.h b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/sha256x8.h new file mode 100644 index 00000000..4d52cfa3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/sha256x8.h @@ -0,0 +1,44 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256X8_H +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256X8_H + +#include "sha256avx.h" + +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N */ + +/* This provides a wrapper around the internals of 8x parallel SHA256 */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256x8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned long long inlen); + +/** + * Note that inlen should be sufficiently small that it still allows for + * an array to be allocated on the stack. Typically 'in' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_mgf1x8(unsigned char *outx8, unsigned long outlen, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, + unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_seed_statex8(sha256ctxx8 *ctx, const unsigned char *pub_seed); +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/sign.c b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/sign.c new file mode 100644 index 00000000..9ed7a228 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_D - 1); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, sk + 3 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N; + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_D; i++) { + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_BYTES; + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N; + unsigned char mhash[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_D; i++) { + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_BYTES; + + if (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_BYTES, sm + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/thash.h b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/thash.h new file mode 100644 index 00000000..0a8a7ea7 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_THASH_H +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/thash_sha256_robust.c b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/thash_sha256_robust.c new file mode 100644 index 00000000..5c56e524 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/thash_sha256_robust.c @@ -0,0 +1,78 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "sha2.h" +#include "sha256.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N bytes. + */ +static void thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char outbuf[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned char *bitmask = buf + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + 4; + sha256ctx sha2_state; + unsigned int i; + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_compress_address(buf + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, addr); + /* MGF1 requires us to have 4 extra bytes in 'buf' */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_mgf1(bitmask, inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, buf, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES); + + /* Retrieve precomputed state containing pub_seed */ + sha256_inc_ctx_clone(&sha2_state, &hash_state_seeded->x1); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N; i++) { + buf[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + i] = in[i] ^ bitmask[i]; + } + + sha256_inc_finalize(outbuf, &sha2_state, buf + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + 4 + 1 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N]; + thash(out, buf, in, 1, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + 4 + 2 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N]; + thash(out, buf, in, 2, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + 4 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N]; + thash(out, buf, in, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LEN, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + 4 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N]; + thash(out, buf, in, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_TREES, pub_seed, addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/thash_sha256_robustx8.c b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/thash_sha256_robustx8.c new file mode 100644 index 00000000..41264eea --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/thash_sha256_robustx8.c @@ -0,0 +1,156 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "thashx8.h" +#include "utils.h" + +/** + * 8-way parallel version of thash; takes 8x as much input and output + */ +static void thashx8(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + uint8_t *out4, + uint8_t *out5, + uint8_t *out6, + uint8_t *out7, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + const uint8_t *in4, + const uint8_t *in5, + const uint8_t *in6, + const uint8_t *in7, + unsigned int inblocks, + const uint8_t *pub_seed, + uint32_t addrx8[8 * 8], + uint8_t *bufx8, + uint8_t *bitmaskx8, + const hash_state *state_seeded) { + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned int i; + sha256ctxx8 ctx; + + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + + for (i = 0; i < 8; i++) { + memcpy(bufx8 + i * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), + pub_seed, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_compress_address(bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + + i * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), + addrx8 + i * 8); + } + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_mgf1x8(bitmaskx8, inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + bufx8 + 0 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), + bufx8 + 1 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), + bufx8 + 2 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), + bufx8 + 3 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), + bufx8 + 4 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), + bufx8 + 5 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), + bufx8 + 6 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), + bufx8 + 7 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + ); + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_clone_statex8(&ctx, &state_seeded->x8); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N; i++) { + bufx8[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + i + + 0 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)] = + in0[i] ^ bitmaskx8[i + 0 * (inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + i + + 1 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)] = + in1[i] ^ bitmaskx8[i + 1 * (inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + i + + 2 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)] = + in2[i] ^ bitmaskx8[i + 2 * (inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + i + + 3 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)] = + in3[i] ^ bitmaskx8[i + 3 * (inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + i + + 4 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)] = + in4[i] ^ bitmaskx8[i + 4 * (inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + i + + 5 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)] = + in5[i] ^ bitmaskx8[i + 5 * (inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + i + + 6 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)] = + in6[i] ^ bitmaskx8[i + 6 * (inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + i + + 7 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)] = + in7[i] ^ bitmaskx8[i + 7 * (inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)]; + } + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_update8x(&ctx, + bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + 0 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + 1 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + 2 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + 3 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + 4 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + 5 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + 6 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + 7 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_final8x(&ctx, + outbufx8 + 0 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES); + + memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); +} + +#define thash_size_variant(name, size) \ + void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_thashx8_##name(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + unsigned char *out4, \ + unsigned char *out5, \ + unsigned char *out6, \ + unsigned char *out7, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *in4, \ + const unsigned char *in5, \ + const unsigned char *in6, \ + const unsigned char *in7, \ + const unsigned char *pub_seed, \ + uint32_t addrx8[8*8], \ + const hash_state *state_seeded) { \ + const unsigned int inblocks = (size); \ + uint8_t bufx8[8*(PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + (size)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)]; \ + uint8_t bitmaskx8[8*((size) * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)]; \ + thashx8(out0, out1, out2, out3, out4, out5, out6, out7, \ + in0, in1, in2, in3, in4, in5, in6, in7, inblocks, \ + pub_seed, addrx8, bufx8, bitmaskx8, state_seeded); \ + } + +thash_size_variant(1, 1) +thash_size_variant(2, 2) +thash_size_variant(WOTS_LEN, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LEN) +thash_size_variant(FORS_TREES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_TREES) + +#undef thash_size_variant diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/thashx8.h b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/thashx8.h new file mode 100644 index 00000000..fc1817af --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/thashx8.h @@ -0,0 +1,39 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_THASHX8_H +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_THASHX8_H + +#include + +#include "hash_state.h" +#include "sha256avx.h" + + +#define thashx8_variant(name) \ + void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_thashx8_##name( \ + unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + unsigned char *out4, \ + unsigned char *out5, \ + unsigned char *out6, \ + unsigned char *out7, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *in4, \ + const unsigned char *in5, \ + const unsigned char *in6, \ + const unsigned char *in7, \ + const unsigned char *pub_seed, \ + uint32_t addrx8[8*8], \ + const hash_state *state_seeded) + + +thashx8_variant(1); +thashx8_variant(2); +thashx8_variant(WOTS_LEN); +thashx8_variant(FORS_TREES); + +#undef thashx8_variant +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/utils.c b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/utils.c new file mode 100644 index 00000000..a6309428 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, leaf, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + memcpy(buffer + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, auth_path, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_thash_2( + buffer + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + } else { + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, auth_path, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/utils.h b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/utils.h new file mode 100644 index 00000000..4cc7cba3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_UTILS_H +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/utilsx8.c b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/utilsx8.c new file mode 100644 index 00000000..05c39b7d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/utilsx8.c @@ -0,0 +1,172 @@ +#include + +#include "address.h" +#include "params.h" +#include "thashx8.h" +#include "utils.h" + +#include "utilsx8.h" + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void treehashx8(unsigned char *rootx8, unsigned char *auth_pathx8, + unsigned char *stackx8, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t leaf_idx[8], uint32_t idx_offset[8], + uint32_t tree_height, + void (*gen_leafx8)( + unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + unsigned char * /* leaf4 */, + unsigned char * /* leaf5 */, + unsigned char * /* leaf6 */, + unsigned char * /* leaf7 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + uint32_t /* addr_idx4 */, + uint32_t /* addr_idx5 */, + uint32_t /* addr_idx6 */, + uint32_t /* addr_idx7 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx8[8 * 8], + const hash_state *state_seeded) { + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + unsigned int j; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leafx8(stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + sk_seed, pub_seed, + idx + idx_offset[0], + idx + idx_offset[1], + idx + idx_offset[2], + idx + idx_offset[3], + idx + idx_offset[4], + idx + idx_offset[5], + idx + idx_offset[6], + idx + idx_offset[7], + tree_addrx8, + state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + for (j = 0; j < 8; j++) { + if ((leaf_idx[j] ^ 0x1) == idx) { + memcpy(auth_pathx8 + j * tree_height * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + (offset - 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + } + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_height(tree_addrx8 + j * 8, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_tree_index(tree_addrx8 + j * 8, + tree_idx + (idx_offset[j] >> (heights[offset - 1] + 1))); + } + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_thashx8_2(stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + pub_seed, tree_addrx8, state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + for (j = 0; j < 8; j++) { + if (((leaf_idx[j] >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_pathx8 + j * tree_height * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + heights[offset - 1]*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + (offset - 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + } + } + } + } + + for (j = 0; j < 8; j++) { + memcpy(rootx8 + j * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + } +} + +/* The wrappers below ensure we used fixed-size buffers on the stack (no VLAs) */ + + +#define treehashx8_variant(name, size) \ + void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_treehashx8_##name( \ + unsigned char *rootx8, unsigned char *auth_pathx8, \ + const unsigned char *sk_seed, const unsigned char *pub_seed, \ + const uint32_t leaf_idx[8], uint32_t idx_offset[8], \ + void (*gen_leafx8)( \ + unsigned char* /* leaf0 */, \ + unsigned char* /* leaf1 */, \ + unsigned char* /* leaf2 */, \ + unsigned char* /* leaf3 */, \ + unsigned char* /* leaf4 */, \ + unsigned char* /* leaf5 */, \ + unsigned char* /* leaf6 */, \ + unsigned char* /* leaf7 */, \ + const unsigned char* /* sk_seed */, \ + const unsigned char* /* pub_seed */, \ + uint32_t /* addr_idx0 */, \ + uint32_t /* addr_idx1 */, \ + uint32_t /* addr_idx2 */, \ + uint32_t /* addr_idx3 */, \ + uint32_t /* addr_idx4 */, \ + uint32_t /* addr_idx5 */, \ + uint32_t /* addr_idx6 */, \ + uint32_t /* addr_idx7 */, \ + const uint32_t[8] /* tree_addr */, \ + const hash_state* /* state_seeded */), \ + uint32_t tree_addrx8[8*8], \ + const hash_state *state_seeded) \ + { \ + const uint32_t tree_height = (size); \ + unsigned char stackx8[8*((size) + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N]; \ + unsigned int heights[(size) + 1]; \ + treehashx8(rootx8, auth_pathx8, stackx8, heights, sk_seed, pub_seed, \ + leaf_idx, idx_offset, tree_height, gen_leafx8, tree_addrx8, state_seeded); \ + } + +treehashx8_variant(FORS_HEIGHT, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_HEIGHT) + +#undef treehashx8_variant diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/utilsx8.h b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/utilsx8.h new file mode 100644 index 00000000..5ce6d630 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/utilsx8.h @@ -0,0 +1,46 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_UTILSX8_H +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_UTILSX8_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ + +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_treehashx8_FORS_HEIGHT( + unsigned char *rootx8, unsigned char *auth_pathx8, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t leaf_idx[8], uint32_t idx_offset[8], + void (*gen_leafx8)( + unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + unsigned char * /* leaf4 */, + unsigned char * /* leaf5 */, + unsigned char * /* leaf6 */, + unsigned char * /* leaf7 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + uint32_t /* addr_idx4 */, + uint32_t /* addr_idx5 */, + uint32_t /* addr_idx6 */, + uint32_t /* addr_idx7 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx8[8 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/wots.c b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/wots.c new file mode 100644 index 00000000..61f5d54a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/wots.c @@ -0,0 +1,240 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "hashx8.h" +#include "params.h" +#include "thash.h" +#include "thashx8.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], const hash_state *state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_prf_addr(sk, sk_seed, wots_addr, state_seeded); +} + +/** + * 8-way parallel version of wots_gen_sk; expects 8x as much space in sk + */ +static void wots_gen_skx8(unsigned char *skx8, const unsigned char *sk_seed, + uint32_t wots_addrx8[8 * 8]) { + unsigned int j; + + /* Make sure that the hash address is actually zeroed. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_hash_addr(wots_addrx8 + j * 8, 0); + } + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_prf_addrx8(skx8 + 0 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + skx8 + 1 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + skx8 + 2 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + skx8 + 3 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + skx8 + 4 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + skx8 + 5 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + skx8 + 6 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + skx8 + 7 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + sk_seed, wots_addrx8); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_W; i++) { + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_thash_1(out, out, pub_seed, addr, state_seeded); + } +} + +/** + * 8-way parallel version of gen_chain; expects 8x as much space in out, and + * 8x as much space in inx8. Assumes start and step identical across chains. + */ +static void gen_chainx8(unsigned char *outx8, const unsigned char *inx8, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addrx8[8 * 8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + /* Initialize outx8 with the value at position 'start'. */ + memcpy(outx8, inx8, 8 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_W; i++) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_hash_addr(addrx8 + j * 8, i); + } + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_thashx8_1(outx8 + 0 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + outx8 + 1 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + outx8 + 2 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + outx8 + 3 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + outx8 + 4 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + outx8 + 5 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + outx8 + 6 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + outx8 + 7 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + outx8 + 0 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + outx8 + 1 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + outx8 + 2 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + outx8 + 3 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + outx8 + 4 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + outx8 + 5 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + outx8 + 6 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + outx8 + 7 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + pub_seed, addrx8, state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { + int in = 0; + int out = 0; + unsigned char total = 0; + int bits = 0; + int consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LOGW; + output[out] = (unsigned int)(total >> bits) & (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_W - 1); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_wots_gen_pk(unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + uint32_t addrx8[8 * 8]; + unsigned char pkbuf[8 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N]; + + for (j = 0; j < 8; j++) { + memcpy(addrx8 + j * 8, addr, sizeof(uint32_t) * 8); + } + + /* The last iteration typically does not have complete set of 4 chains, + but because we use pkbuf, this is not an issue -- we still do as many + in parallel as possible. */ + for (i = 0; i < ((PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LEN + 7) & ~0x7); i += 8) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_chain_addr(addrx8 + j * 8, i + j); + } + wots_gen_skx8(pkbuf, sk_seed, addrx8); + gen_chainx8(pkbuf, pkbuf, 0, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_W - 1, pub_seed, addrx8, state_seeded); + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LEN) { + memcpy(pk + (i + j)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, pkbuf + j * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); + } + } + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_wots_sign(unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, sk_seed, addr, state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, sig + i * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, 0, lengths[i], pub_seed, addr, state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_wots_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, sig + i * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, + lengths[i], PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_W - 1 - lengths[i], pub_seed, addr, state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/wots.h b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/wots.h new file mode 100644 index 00000000..cbd26fea --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/avx2/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_H +#define PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/LICENSE b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..23f26b7d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-sha256-192s-robust_clean.lib +OBJECTS=address.obj wots.obj utils.obj fors.obj sign.obj hash_sha256.obj thash_sha256_robust.obj sha256.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/address.c b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/address.c new file mode 100644 index 00000000..0da3ca4b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/address.h b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/address.h new file mode 100644 index 00000000..f354877c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ADDRESS_H +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/api.h b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/api.h new file mode 100644 index 00000000..cbcbd636 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_API_H +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_CRYPTO_SECRETKEYBYTES 96 +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_CRYPTO_PUBLICKEYBYTES 48 +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_CRYPTO_BYTES 17064 +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_CRYPTO_SEEDBYTES 72 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/fors.c b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/fors.c new file mode 100644 index 00000000..8ff15d59 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/fors.c @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "thash.h" +#include "utils.h" + +static void fors_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t fors_leaf_addr[8], const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_prf_addr( + sk, sk_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_thash_1( + leaf, sk, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t fors_tree_addr[8], + const hash_state *hash_state_seeded) { + uint32_t fors_leaf_addr[8] = {0}; + + /* Only copy the parts that must be kept in fors_leaf_addr. */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_copy_keypair_addr( + fors_leaf_addr, fors_tree_addr); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_type( + fors_leaf_addr, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_tree_index( + fors_leaf_addr, addr_idx); + + fors_gen_sk(leaf, sk_seed, fors_leaf_addr, hash_state_seeded); + fors_sk_to_leaf(leaf, leaf, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_copy_keypair_addr( + fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_copy_keypair_addr( + fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_type( + fors_tree_addr, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_type( + fors_pk_addr, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_tree_height( + fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_tree_index( + fors_tree_addr, indices[i] + idx_offset); + + /* Include the secret key part that produces the selected leaf node. */ + fors_gen_sk(sig, sk_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N; + + /* Compute the authentication path for this leaf node. */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_treehash_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, sig, sk_seed, pub_seed, + indices[i], idx_offset, fors_gen_leaf, fors_tree_addr, + hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_thash_FORS_TREES( + pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_compute_root(roots + i * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, leaf, indices[i], idx_offset, sig, + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_HEIGHT, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/fors.h b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/fors.h new file mode 100644 index 00000000..b4036312 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_H +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/hash.h b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/hash.h new file mode 100644 index 00000000..379c3362 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_HASH_H +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/hash_sha256.c b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/hash_sha256.c new file mode 100644 index 00000000..88f7c47e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/hash_sha256.c @@ -0,0 +1,162 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "sha2.h" +#include "sha256.h" + +/* For SHA256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed) { + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_seed_state(hash_state_seeded, pub_seed); + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* Clean up hash state */ +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_destroy_hash_function(hash_state *hash_state_seeded) { + sha256_inc_ctx_release(hash_state_seeded); +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N bytes and an address + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_ADDR_BYTES]; + unsigned char outbuf[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_OUTPUT_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_compress_address(buf + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, addr); + + sha256(outbuf, buf, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_ADDR_BYTES); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed as a key + * for HMAC, and an optional randomization value prefixed to the message. + * This requires m to have at least PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N space + * available in front of the pointer, i.e. before the message to use for the + * prefix. This is necessary to prevent having to move the message around (and + * allocate memory for it). + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_OUTPUT_BYTES]; + sha256ctx state; + int i; + + /* This implements HMAC-SHA256 */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N; i++) { + buf[i] = 0x36 ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, 0x36, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N); + + sha256_inc_init(&state); + sha256_inc_blocks(&state, buf, 1); + + memcpy(buf, optrand, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N); + + /* If optrand + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N + mlen < PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_BLOCK_BYTES) { + memcpy(buf + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, m, mlen); + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_BLOCK_BYTES, &state, + buf, mlen + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(buf + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, m, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N); + sha256_inc_blocks(&state, buf, 1); + + m += PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N; + mlen -= PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N; + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_BLOCK_BYTES, &state, m, mlen); + } + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N; i++) { + buf[i] = 0x5c ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, 0x5c, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N); + + sha256(buf, buf, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_OUTPUT_BYTES); + memcpy(R, buf, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_TREE_BITS (PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_TREE_HEIGHT * (PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_D - 1)) +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_TREE_BYTES ((PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_LEAF_BITS PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_LEAF_BYTES ((PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_DGST_BYTES (PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_TREE_BYTES + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_LEAF_BYTES) + + unsigned char seed[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_OUTPUT_BYTES + 4]; + + /* Round to nearest multiple of PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_BLOCK_BYTES */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_INBLOCKS (((PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_PK_BYTES + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_BLOCK_BYTES - 1) & \ + -PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_BLOCK_BYTES) / PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_BLOCK_BYTES) + unsigned char inbuf[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_BLOCK_BYTES]; + + unsigned char buf[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_DGST_BYTES]; + unsigned char *bufp = buf; + sha256ctx state; + + sha256_inc_init(&state); + + memcpy(inbuf, R, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N); + memcpy(inbuf + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, pk, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_PK_BYTES); + + /* If R + pk + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_PK_BYTES + mlen < PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_BLOCK_BYTES) { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_PK_BYTES, m, mlen); + sha256_inc_finalize(seed, &state, inbuf, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_PK_BYTES + mlen); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_PK_BYTES, m, + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N - PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_PK_BYTES); + sha256_inc_blocks(&state, inbuf, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_INBLOCKS); + + m += PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N - PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_PK_BYTES; + mlen -= PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N - PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_PK_BYTES; + sha256_inc_finalize(seed, &state, m, mlen); + } + + /* By doing this in two steps, we prevent hashing the message twice; + otherwise each iteration in MGF1 would hash the message again. */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_mgf1(bufp, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_DGST_BYTES, seed, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_OUTPUT_BYTES); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_bytes_to_ull(bufp, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/hash_state.h b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/hash_state.h new file mode 100644 index 00000000..19fc335e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/hash_state.h @@ -0,0 +1,26 @@ +#ifndef SPX_HASH_STATE_H +#define SPX_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + */ + +#include "sha2.h" +#define hash_state sha256ctx + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/params.h b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/params.h new file mode 100644 index 00000000..1d4451ad --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_PARAMS_H +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N 24 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FULL_HEIGHT 64 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_D 8 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_HEIGHT 16 +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_TREES 14 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N / PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_LEN (PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_LEN1 + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_BYTES (PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_PK_BYTES PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_TREE_HEIGHT (PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FULL_HEIGHT / PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_BYTES ((PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_PK_BYTES PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_BYTES (PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_BYTES + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_D * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FULL_HEIGHT * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_PK_BYTES (2 * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SK_BYTES (2 * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/sha256.c b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/sha256.c new file mode 100644 index 00000000..ca42a31e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/sha256.c @@ -0,0 +1,71 @@ +/* Based on the public domain implementation in + * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html + * by D. J. Bernstein */ + +#include +#include +#include + +#include "sha2.h" +#include "sha256.h" +#include "utils.h" + +/* + * Compresses an address to a 22-byte sequence. + * This reduces the number of required SHA256 compression calls, as the last + * block of input is padded with at least 65 bits. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_compress_address(unsigned char *out, const uint32_t addr[8]) { + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ull_to_bytes(out, 1, addr[0]); /* drop 3 bytes of the layer field */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ull_to_bytes(out + 1, 4, addr[2]); /* drop the highest tree address word */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ull_to_bytes(out + 5, 4, addr[3]); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ull_to_bytes(out + 9, 1, addr[4]); /* drop 3 bytes of the type field */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ull_to_bytes(out + 10, 4, addr[5]); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ull_to_bytes(out + 14, 4, addr[6]); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ull_to_bytes(out + 18, 4, addr[7]); +} + +/** + * Requires 'input_plus_four_bytes' to have 'inlen' + 4 bytes, so that the last + * four bytes can be used for the counter. Typically 'input' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen) { + unsigned char outbuf[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_OUTPUT_BYTES]; + unsigned long i; + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_OUTPUT_BYTES <= outlen; i++) { + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(out, input_plus_four_bytes, inlen + 4); + out += PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + if (outlen > i * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_OUTPUT_BYTES) { + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(outbuf, input_plus_four_bytes, inlen + 4); + memcpy(out, outbuf, outlen - i * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_OUTPUT_BYTES); + } +} + + +/** + * Absorb the constant pub_seed using one round of the compression function + * This initializes hash_state_seeded, which can then be reused in thash + **/ +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N; i < PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + sha256_inc_init(hash_state_seeded); + sha256_inc_blocks(hash_state_seeded, block, 1); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/sha256.h b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/sha256.h new file mode 100644 index 00000000..011c2dca --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/sha256.h @@ -0,0 +1,21 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_H +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_H + +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N */ +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_ADDR_BYTES 22 + +#include +#include + +#include "sha2.h" + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_compress_address(unsigned char *out, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/sign.c b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/sign.c new file mode 100644 index 00000000..7216f9c6 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_D - 1); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, sk + 3 * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N); + + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N; + + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_BYTES; + + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N; + unsigned char mhash[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_BYTES; + + if (PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_BYTES, sm + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/thash.h b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/thash.h new file mode 100644 index 00000000..1b6f5f82 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_THASH_H +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/thash_sha256_robust.c b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/thash_sha256_robust.c new file mode 100644 index 00000000..996c7b32 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/thash_sha256_robust.c @@ -0,0 +1,82 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "sha2.h" +#include "sha256.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N bytes. + */ +static void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char outbuf[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_OUTPUT_BYTES]; + unsigned char *bitmask = buf + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_ADDR_BYTES + 4; + sha256ctx sha2_state; + unsigned int i; + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_compress_address(buf + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, addr); + /* MGF1 requires us to have 4 extra bytes in 'buf' */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_mgf1(bitmask, inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, buf, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_ADDR_BYTES); + + /* Retrieve precomputed state containing pub_seed */ + sha256_inc_ctx_clone(&sha2_state, hash_state_seeded); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N; i++) { + buf[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_ADDR_BYTES + i] = in[i] ^ bitmask[i]; + } + + sha256_inc_finalize(outbuf, &sha2_state, buf + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_ADDR_BYTES + 4 + 1 * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_thash( + out, buf, in, 1, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_ADDR_BYTES + 4 + 2 * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_thash( + out, buf, in, 2, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_ADDR_BYTES + 4 + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_LEN, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_SHA256_ADDR_BYTES + 4 + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_TREES, pub_seed, addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/utils.c b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/utils.c new file mode 100644 index 00000000..eb0125e8 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, leaf, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N); + memcpy(buffer + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_thash_2( + buffer + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N); + } else { + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/utils.h b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/utils.h new file mode 100644 index 00000000..4c4868d6 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_UTILS_H +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/wots.c b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/wots.c new file mode 100644 index 00000000..097716d4 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/wots.c @@ -0,0 +1,167 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], + const hash_state *hash_state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_prf_addr(sk, sk_seed, wots_addr, hash_state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_W; i++) { + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_thash_1( + out, out, pub_seed, addr, hash_state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const size_t out_len, + const unsigned char *input) { + size_t in = 0; + size_t out = 0; + unsigned char total = 0; + unsigned int bits = 0; + size_t consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_LOGW; + output[out] = (unsigned int)((total >> bits) & (PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_W - 1)); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, + const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_ull_to_bytes( + csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(pk + i * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, pk + i * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, + 0, PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_W - 1, pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, 0, lengths[i], pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_N, + lengths[i], PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_W - 1 - lengths[i], pub_seed, addr, + hash_state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/wots.h b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/wots.h new file mode 100644 index 00000000..7178ae1b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-robust/clean/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_H +#define PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192SROBUST_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/META.yml b/crypto_sign/sphincs/sphincs-sha256-192s-simple/META.yml new file mode 100644 index 00000000..3f84eef1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/META.yml @@ -0,0 +1,35 @@ +name: SPHINCS+ +type: signature +claimed-nist-level: 3 +length-public-key: 48 +length-secret-key: 96 +length-signature: 17064 +testvectors-sha256: ee413e410a29274a9647b9440d6a554670e0f9587efaaddedf82e4923f68f80e +nistkat-sha256: 02b192ff93bc8977a80e9efc8fa6814ae85c2ad939f7185a959b428c1eb77150 +principal-submitters: + - Andreas Hülsing +auxiliary-submitters: + - Jean-Philippe Aumasson + - Daniel J. Bernstein, + - Christoph Dobraunig + - Maria Eichlseder + - Scott Fluhrer + - Stefan-Lukas Gazdag + - Panos Kampanakis + - Stefan Kölbl + - Tanja Lange + - Martin M. Lauridsen + - Florian Mendel + - Ruben Niederhagen + - Christian Rechberger + - Joost Rijneveld + - Peter Schwabe +implementations: + - name: clean + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + - name: avx2 + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + supported_platforms: + - architecture: x86_64 + required_flags: + - avx2 diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/LICENSE b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/Makefile.Microsoft_nmake new file mode 100644 index 00000000..c1fdc0fb --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-sha256-192s-simple_avx2.lib +OBJECTS=address.obj wots.obj utils.obj utilsx8.obj sha256avx.obj sha256x8.obj fors.obj sign.obj hash_sha256.obj thash_sha256_simple.obj hash_sha256x8.obj thash_sha256_simplex8.obj sha256.obj + +CFLAGS=/nologo /arch:AVX2 /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/address.c b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/address.c new file mode 100644 index 00000000..9731a754 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/address.h b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/address.h new file mode 100644 index 00000000..2963af31 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDRESS_H +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/api.h b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/api.h new file mode 100644 index 00000000..06c958e0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_API_H +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES 96 +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES 48 +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_CRYPTO_BYTES 17064 +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_CRYPTO_SEEDBYTES 72 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/fors.c b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/fors.c new file mode 100644 index 00000000..1afc681b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/fors.c @@ -0,0 +1,240 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hashx8.h" +#include "thash.h" +#include "thashx8.h" +#include "utils.h" +#include "utilsx8.h" + +static void fors_gen_skx8(unsigned char *sk0, + unsigned char *sk1, + unsigned char *sk2, + unsigned char *sk3, + unsigned char *sk4, + unsigned char *sk5, + unsigned char *sk6, + unsigned char *sk7, const unsigned char *sk_seed, + uint32_t fors_leaf_addrx8[8 * 8]) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_prf_addrx8(sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7, + sk_seed, fors_leaf_addrx8); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thash_1(leaf, sk, pub_seed, fors_leaf_addr, state_seeded); +} + +static void fors_sk_to_leafx8(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + unsigned char *leaf4, + unsigned char *leaf5, + unsigned char *leaf6, + unsigned char *leaf7, + const unsigned char *sk0, + const unsigned char *sk1, + const unsigned char *sk2, + const unsigned char *sk3, + const unsigned char *sk4, + const unsigned char *sk5, + const unsigned char *sk6, + const unsigned char *sk7, + const unsigned char *pub_seed, + uint32_t fors_leaf_addrx8[8 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thashx8_1(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7, + pub_seed, fors_leaf_addrx8, state_seeded); +} + +static void fors_gen_leafx8(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + unsigned char *leaf4, + unsigned char *leaf5, + unsigned char *leaf6, + unsigned char *leaf7, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx0, + uint32_t addr_idx1, + uint32_t addr_idx2, + uint32_t addr_idx3, + uint32_t addr_idx4, + uint32_t addr_idx5, + uint32_t addr_idx6, + uint32_t addr_idx7, + const uint32_t fors_tree_addr[8], + const hash_state *state_seeded) { + uint32_t fors_leaf_addrx8[8 * 8] = {0}; + unsigned int j; + + /* Only copy the parts that must be kept in fors_leaf_addrx8. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_copy_keypair_addr(fors_leaf_addrx8 + j * 8, fors_tree_addr); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_type(fors_leaf_addrx8 + j * 8, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 0 * 8, addr_idx0); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 1 * 8, addr_idx1); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 2 * 8, addr_idx2); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 3 * 8, addr_idx3); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 4 * 8, addr_idx4); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 5 * 8, addr_idx5); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 6 * 8, addr_idx6); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 7 * 8, addr_idx7); + + fors_gen_skx8(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + sk_seed, fors_leaf_addrx8); + fors_sk_to_leafx8(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + pub_seed, fors_leaf_addrx8, state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + /* Round up to multiple of 8 to prevent out-of-bounds for x8 parallelism */ + uint32_t indices[(PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_TREES + 7) & ~7] = {0}; + unsigned char roots[((PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_TREES + 7) & ~7) * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N]; + /* Sign to a buffer, since we may not have a nice multiple of 8 and would + otherwise overrun the signature. */ + unsigned char sigbufx8[8 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N * (1 + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_HEIGHT)]; + uint32_t fors_tree_addrx8[8 * 8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset[8] = {0}; + unsigned int i, j; + + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_copy_keypair_addr(fors_tree_addrx8 + j * 8, fors_addr); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_type(fors_tree_addrx8 + j * 8, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < ((PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_TREES + 7) & ~0x7); i += 8) { + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_TREES) { + idx_offset[j] = (i + j) * (1 << PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_height(fors_tree_addrx8 + j * 8, 0); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_index(fors_tree_addrx8 + j * 8, + indices[i + j] + idx_offset[j]); + } + } + + /* Include the secret key part that produces the selected leaf nodes. */ + fors_gen_skx8(sigbufx8 + 0 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + sigbufx8 + 1 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + sigbufx8 + 2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + sigbufx8 + 3 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + sigbufx8 + 4 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + sigbufx8 + 5 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + sigbufx8 + 6 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + sigbufx8 + 7 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + sk_seed, fors_tree_addrx8); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_treehashx8_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, sigbufx8 + 8 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, sk_seed, pub_seed, + &indices[i], idx_offset, fors_gen_leafx8, fors_tree_addrx8, + state_seeded); + + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_TREES) { + memcpy(sig, sigbufx8 + j * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + memcpy(sig + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + sigbufx8 + 8 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + j * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_HEIGHT, + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_HEIGHT); + sig += PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N * (1 + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_HEIGHT); + } + } + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_fors_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_compute_root(roots + i * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, leaf, indices[i], idx_offset, + sig, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_HEIGHT, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/fors.h b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/fors.h new file mode 100644 index 00000000..833e7983 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_H +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/hash.h b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/hash.h new file mode 100644 index 00000000..dd1da382 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_HASH_H +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/hash_sha256.c b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/hash_sha256.c new file mode 100644 index 00000000..40301ba1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/hash_sha256.c @@ -0,0 +1,166 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "sha2.h" +#include "sha256.h" +#include "sha256x8.h" + +/** + * Initializes the hash function states + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_seed_state(&hash_state_seeded->x1, pub_seed); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_seed_statex8(&hash_state_seeded->x8, pub_seed); + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/** + * Cleans up the hash function states + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_destroy_hash_function(hash_state *hash_state_seeded) { + sha256_inc_ctx_release(&hash_state_seeded->x1); +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N bytes and an address + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_prf_addr(unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES]; + unsigned char outbuf[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_compress_address(buf + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, addr); + + sha256(outbuf, buf, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed as a key + * for HMAC, and an optional randomization value prefixed to the message. + * This requires m to have at least PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N space + * available in front of the pointer, i.e. before the message to use for the + * prefix. This is necessary to prevent having to move the message around (and + * allocate memory for it). + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + sha256ctx state; + int i; + + /* This implements HMAC-SHA256 */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N; i++) { + buf[i] = 0x36 ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, 0x36, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + + sha256_inc_init(&state); + sha256_inc_blocks(&state, buf, 1); + + memcpy(buf, optrand, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + + /* If optrand + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + mlen < PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES) { + memcpy(buf + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, m, mlen); + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES, &state, + buf, mlen + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(buf + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, m, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + sha256_inc_blocks(&state, buf, 1); + + m += PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N; + mlen -= PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N; + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES, &state, m, mlen); + } + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N; i++) { + buf[i] = 0x5c ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, 0x5c, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + + sha256(buf, buf, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + memcpy(R, buf, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_TREE_BITS (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_TREE_HEIGHT * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_D - 1)) +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_TREE_BYTES ((PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_LEAF_BITS PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_LEAF_BYTES ((PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_DGST_BYTES (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_TREE_BYTES + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_LEAF_BYTES) + + unsigned char seed[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES + 4]; + + /* Round to nearest multiple of PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_INBLOCKS (((PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_PK_BYTES + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES - 1) & \ + -PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES) / PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES) + unsigned char inbuf[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES]; + + unsigned char buf[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_DGST_BYTES]; + unsigned char *bufp = buf; + sha256ctx state; + + sha256_inc_init(&state); + + memcpy(inbuf, R, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + memcpy(inbuf + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, pk, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_PK_BYTES); + + /* If R + pk + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_PK_BYTES + mlen < PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES) { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_PK_BYTES, m, mlen); + sha256_inc_finalize(seed, &state, inbuf, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_PK_BYTES + mlen); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_PK_BYTES, m, + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N - PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_PK_BYTES); + sha256_inc_blocks(&state, inbuf, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_INBLOCKS); + + m += PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N - PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_PK_BYTES; + mlen -= PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N - PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_PK_BYTES; + sha256_inc_finalize(seed, &state, m, mlen); + } + + /* By doing this in two steps, we prevent hashing the message twice; + otherwise each iteration in MGF1 would hash the message again. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_mgf1(bufp, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_DGST_BYTES, seed, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_bytes_to_ull(bufp, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/hash_sha256x8.c b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/hash_sha256x8.c new file mode 100644 index 00000000..835ff826 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/hash_sha256x8.c @@ -0,0 +1,61 @@ +#include +#include + +#include "address.h" +#include "hashx8.h" +#include "params.h" +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "utils.h" + +/* + * 8-way parallel version of prf_addr; takes 8x as much input and output + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_prf_addrx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *key, + const uint32_t addrx8[8 * 8]) { + unsigned char bufx8[8 * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES)]; + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + unsigned int j; + + for (j = 0; j < 8; j++) { + memcpy(bufx8 + j * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES), key, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_compress_address(bufx8 + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + j * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES), + addrx8 + j * 8); + } + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256x8(outbufx8 + 0 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + bufx8 + 0 * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 1 * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 2 * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 3 * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 4 * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 5 * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 6 * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 7 * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES), + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES); + + memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/hash_state.h b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/hash_state.h new file mode 100644 index 00000000..aebebdb8 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/hash_state.h @@ -0,0 +1,33 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_HASH_STATE_H +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * We use a struct to differentiate between the x1 and x8 variants of SHA256. + */ + +#include "sha2.h" +#include "sha256avx.h" + +typedef struct { + sha256ctx x1; + sha256ctxx8 x8; +} hash_state; + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/hashx8.h b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/hashx8.h new file mode 100644 index 00000000..d079b5c3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/hashx8.h @@ -0,0 +1,19 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_HASHX8_H +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_HASHX8_H + +#include + +#include "params.h" + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_prf_addrx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *key, + const uint32_t addrx8[8 * 8]); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/params.h b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/params.h new file mode 100644 index 00000000..7aae9618 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_PARAMS_H +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N 24 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FULL_HEIGHT 64 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_D 8 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_HEIGHT 16 +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_TREES 14 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N / PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LEN (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LEN1 + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_BYTES (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_PK_BYTES PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_TREE_HEIGHT (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FULL_HEIGHT / PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_BYTES ((PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_PK_BYTES PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_BYTES (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_BYTES + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_D * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FULL_HEIGHT * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_PK_BYTES (2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SK_BYTES (2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/sha256.c b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/sha256.c new file mode 100644 index 00000000..96e520e5 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/sha256.c @@ -0,0 +1,71 @@ +/* Based on the public domain implementation in + * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html + * by D. J. Bernstein */ + +#include +#include +#include + +#include "sha2.h" +#include "sha256.h" +#include "utils.h" + +/* + * Compresses an address to a 22-byte sequence. + * This reduces the number of required SHA256 compression calls, as the last + * block of input is padded with at least 65 bits. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_compress_address(unsigned char *out, const uint32_t addr[8]) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ull_to_bytes(out, 1, addr[0]); /* drop 3 bytes of the layer field */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ull_to_bytes(out + 1, 4, addr[2]); /* drop the highest tree address word */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ull_to_bytes(out + 5, 4, addr[3]); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ull_to_bytes(out + 9, 1, addr[4]); /* drop 3 bytes of the type field */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ull_to_bytes(out + 10, 4, addr[5]); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ull_to_bytes(out + 14, 4, addr[6]); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ull_to_bytes(out + 18, 4, addr[7]); +} + +/** + * Requires 'input_plus_four_bytes' to have 'inlen' + 4 bytes, so that the last + * four bytes can be used for the counter. Typically 'input' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen) { + unsigned char outbuf[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + unsigned long i; + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES <= outlen; i++) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(out, input_plus_four_bytes, inlen + 4); + out += PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + if (outlen > i * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(outbuf, input_plus_four_bytes, inlen + 4); + memcpy(out, outbuf, outlen - i * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + } +} + + +/** + * Absorb the constant pub_seed using one round of the compression function + * This initializes hash_state_seeded, which can then be reused in thash + **/ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N; i < PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + sha256_inc_init(hash_state_seeded); + sha256_inc_blocks(hash_state_seeded, block, 1); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/sha256.h b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/sha256.h new file mode 100644 index 00000000..273f9428 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/sha256.h @@ -0,0 +1,21 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_H +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_H + +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES 22 + +#include +#include + +#include "sha2.h" + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_compress_address(unsigned char *out, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/sha256avx.c b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/sha256avx.c new file mode 100644 index 00000000..90429644 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/sha256avx.c @@ -0,0 +1,296 @@ +#include +#include +#include + +#include "sha256avx.h" + +// Transpose 8 vectors containing 32-bit values +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_transpose(u256 s[8]) { + u256 tmp0[8]; + u256 tmp1[8]; + tmp0[0] = _mm256_unpacklo_epi32(s[0], s[1]); + tmp0[1] = _mm256_unpackhi_epi32(s[0], s[1]); + tmp0[2] = _mm256_unpacklo_epi32(s[2], s[3]); + tmp0[3] = _mm256_unpackhi_epi32(s[2], s[3]); + tmp0[4] = _mm256_unpacklo_epi32(s[4], s[5]); + tmp0[5] = _mm256_unpackhi_epi32(s[4], s[5]); + tmp0[6] = _mm256_unpacklo_epi32(s[6], s[7]); + tmp0[7] = _mm256_unpackhi_epi32(s[6], s[7]); + tmp1[0] = _mm256_unpacklo_epi64(tmp0[0], tmp0[2]); + tmp1[1] = _mm256_unpackhi_epi64(tmp0[0], tmp0[2]); + tmp1[2] = _mm256_unpacklo_epi64(tmp0[1], tmp0[3]); + tmp1[3] = _mm256_unpackhi_epi64(tmp0[1], tmp0[3]); + tmp1[4] = _mm256_unpacklo_epi64(tmp0[4], tmp0[6]); + tmp1[5] = _mm256_unpackhi_epi64(tmp0[4], tmp0[6]); + tmp1[6] = _mm256_unpacklo_epi64(tmp0[5], tmp0[7]); + tmp1[7] = _mm256_unpackhi_epi64(tmp0[5], tmp0[7]); + s[0] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x20); + s[1] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x20); + s[2] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x20); + s[3] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x20); + s[4] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x31); + s[5] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x31); + s[6] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x31); + s[7] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x31); +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256_clone_statex8(sha256ctxx8 *outctx, const sha256ctxx8 *inctx) { + memcpy(outctx, inctx, sizeof(sha256ctxx8)); +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256_init8x(sha256ctxx8 *ctx) { + ctx->s[0] = _mm256_set_epi32((int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667); + ctx->s[1] = _mm256_set_epi32((int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85); + ctx->s[2] = _mm256_set_epi32((int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372); + ctx->s[3] = _mm256_set_epi32((int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a); + ctx->s[4] = _mm256_set_epi32((int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f); + ctx->s[5] = _mm256_set_epi32((int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c); + ctx->s[6] = _mm256_set_epi32((int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab); + ctx->s[7] = _mm256_set_epi32((int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19); + + ctx->datalen = 0; + ctx->msglen = 0; +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256_update8x(sha256ctxx8 *ctx, + const unsigned char *d0, + const unsigned char *d1, + const unsigned char *d2, + const unsigned char *d3, + const unsigned char *d4, + const unsigned char *d5, + const unsigned char *d6, + const unsigned char *d7, + unsigned long long len) { + size_t i = 0; + size_t bytes_to_copy; + + while (i < len) { + bytes_to_copy = (size_t)len - i; + if (bytes_to_copy > 64) { + bytes_to_copy = 64; + } + memcpy(&ctx->msgblocks[64 * 0], d0 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 1], d1 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 2], d2 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 3], d3 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 4], d4 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 5], d5 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 6], d6 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 7], d7 + i, bytes_to_copy); + ctx->datalen += (unsigned int)bytes_to_copy; + i += bytes_to_copy; + if (ctx->datalen == 64) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + ctx->msglen += 512; + ctx->datalen = 0; + } + } +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256_final8x(sha256ctxx8 *ctx, + unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7) { + unsigned int i, curlen; + + // Padding + if (ctx->datalen < 56) { + for (i = 0; i < 8; ++i) { + curlen = ctx->datalen; + ctx->msgblocks[64 * i + curlen++] = 0x80; + while (curlen < 64) { + ctx->msgblocks[64 * i + curlen++] = 0x00; + } + } + } else { + for (i = 0; i < 8; ++i) { + curlen = ctx->datalen; + ctx->msgblocks[64 * i + curlen++] = 0x80; + while (curlen < 64) { + ctx->msgblocks[64 * i + curlen++] = 0x00; + } + } + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + memset(ctx->msgblocks, 0, 8 * 64); + } + + // Add length of the message to each block + ctx->msglen += ctx->datalen * 8; + for (i = 0; i < 8; i++) { + ctx->msgblocks[64 * i + 63] = (unsigned char)ctx->msglen; + ctx->msgblocks[64 * i + 62] = (unsigned char)(ctx->msglen >> 8); + ctx->msgblocks[64 * i + 61] = (unsigned char)(ctx->msglen >> 16); + ctx->msgblocks[64 * i + 60] = (unsigned char)(ctx->msglen >> 24); + ctx->msgblocks[64 * i + 59] = (unsigned char)(ctx->msglen >> 32); + ctx->msgblocks[64 * i + 58] = (unsigned char)(ctx->msglen >> 40); + ctx->msgblocks[64 * i + 57] = (unsigned char)(ctx->msglen >> 48); + ctx->msgblocks[64 * i + 56] = (unsigned char)(ctx->msglen >> 56); + } + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + + // Compute final hash output + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_transpose(ctx->s); + + // Store Hash value + STORE(out0, BYTESWAP(ctx->s[0])); + STORE(out1, BYTESWAP(ctx->s[1])); + STORE(out2, BYTESWAP(ctx->s[2])); + STORE(out3, BYTESWAP(ctx->s[3])); + STORE(out4, BYTESWAP(ctx->s[4])); + STORE(out5, BYTESWAP(ctx->s[5])); + STORE(out6, BYTESWAP(ctx->s[6])); + STORE(out7, BYTESWAP(ctx->s[7])); +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256_transform8x(sha256ctxx8 *ctx, const unsigned char *data) { + u256 s[8], w[64], T0, T1; + int i; + + // Load words and transform data correctly + for (i = 0; i < 8; i++) { + w[i] = BYTESWAP(LOAD(data + 64 * i)); + w[i + 8] = BYTESWAP(LOAD(data + 32 + 64 * i)); + } + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_transpose(w); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_transpose(w + 8); + + // Initial State + s[0] = ctx->s[0]; + s[1] = ctx->s[1]; + s[2] = ctx->s[2]; + s[3] = ctx->s[3]; + s[4] = ctx->s[4]; + s[5] = ctx->s[5]; + s[6] = ctx->s[6]; + s[7] = ctx->s[7]; + + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 0, w[0]); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 1, w[1]); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 2, w[2]); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 3, w[3]); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 4, w[4]); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 5, w[5]); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 6, w[6]); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 7, w[7]); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 8, w[8]); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 9, w[9]); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 10, w[10]); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 11, w[11]); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 12, w[12]); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 13, w[13]); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 14, w[14]); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 15, w[15]); + w[16] = ADD4_32(WSIGMA1_AVX(w[14]), w[0], w[9], WSIGMA0_AVX(w[1])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 16, w[16]); + w[17] = ADD4_32(WSIGMA1_AVX(w[15]), w[1], w[10], WSIGMA0_AVX(w[2])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 17, w[17]); + w[18] = ADD4_32(WSIGMA1_AVX(w[16]), w[2], w[11], WSIGMA0_AVX(w[3])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 18, w[18]); + w[19] = ADD4_32(WSIGMA1_AVX(w[17]), w[3], w[12], WSIGMA0_AVX(w[4])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 19, w[19]); + w[20] = ADD4_32(WSIGMA1_AVX(w[18]), w[4], w[13], WSIGMA0_AVX(w[5])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 20, w[20]); + w[21] = ADD4_32(WSIGMA1_AVX(w[19]), w[5], w[14], WSIGMA0_AVX(w[6])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 21, w[21]); + w[22] = ADD4_32(WSIGMA1_AVX(w[20]), w[6], w[15], WSIGMA0_AVX(w[7])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 22, w[22]); + w[23] = ADD4_32(WSIGMA1_AVX(w[21]), w[7], w[16], WSIGMA0_AVX(w[8])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 23, w[23]); + w[24] = ADD4_32(WSIGMA1_AVX(w[22]), w[8], w[17], WSIGMA0_AVX(w[9])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 24, w[24]); + w[25] = ADD4_32(WSIGMA1_AVX(w[23]), w[9], w[18], WSIGMA0_AVX(w[10])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 25, w[25]); + w[26] = ADD4_32(WSIGMA1_AVX(w[24]), w[10], w[19], WSIGMA0_AVX(w[11])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 26, w[26]); + w[27] = ADD4_32(WSIGMA1_AVX(w[25]), w[11], w[20], WSIGMA0_AVX(w[12])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 27, w[27]); + w[28] = ADD4_32(WSIGMA1_AVX(w[26]), w[12], w[21], WSIGMA0_AVX(w[13])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 28, w[28]); + w[29] = ADD4_32(WSIGMA1_AVX(w[27]), w[13], w[22], WSIGMA0_AVX(w[14])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 29, w[29]); + w[30] = ADD4_32(WSIGMA1_AVX(w[28]), w[14], w[23], WSIGMA0_AVX(w[15])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 30, w[30]); + w[31] = ADD4_32(WSIGMA1_AVX(w[29]), w[15], w[24], WSIGMA0_AVX(w[16])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 31, w[31]); + w[32] = ADD4_32(WSIGMA1_AVX(w[30]), w[16], w[25], WSIGMA0_AVX(w[17])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 32, w[32]); + w[33] = ADD4_32(WSIGMA1_AVX(w[31]), w[17], w[26], WSIGMA0_AVX(w[18])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 33, w[33]); + w[34] = ADD4_32(WSIGMA1_AVX(w[32]), w[18], w[27], WSIGMA0_AVX(w[19])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 34, w[34]); + w[35] = ADD4_32(WSIGMA1_AVX(w[33]), w[19], w[28], WSIGMA0_AVX(w[20])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 35, w[35]); + w[36] = ADD4_32(WSIGMA1_AVX(w[34]), w[20], w[29], WSIGMA0_AVX(w[21])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 36, w[36]); + w[37] = ADD4_32(WSIGMA1_AVX(w[35]), w[21], w[30], WSIGMA0_AVX(w[22])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 37, w[37]); + w[38] = ADD4_32(WSIGMA1_AVX(w[36]), w[22], w[31], WSIGMA0_AVX(w[23])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 38, w[38]); + w[39] = ADD4_32(WSIGMA1_AVX(w[37]), w[23], w[32], WSIGMA0_AVX(w[24])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 39, w[39]); + w[40] = ADD4_32(WSIGMA1_AVX(w[38]), w[24], w[33], WSIGMA0_AVX(w[25])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 40, w[40]); + w[41] = ADD4_32(WSIGMA1_AVX(w[39]), w[25], w[34], WSIGMA0_AVX(w[26])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 41, w[41]); + w[42] = ADD4_32(WSIGMA1_AVX(w[40]), w[26], w[35], WSIGMA0_AVX(w[27])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 42, w[42]); + w[43] = ADD4_32(WSIGMA1_AVX(w[41]), w[27], w[36], WSIGMA0_AVX(w[28])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 43, w[43]); + w[44] = ADD4_32(WSIGMA1_AVX(w[42]), w[28], w[37], WSIGMA0_AVX(w[29])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 44, w[44]); + w[45] = ADD4_32(WSIGMA1_AVX(w[43]), w[29], w[38], WSIGMA0_AVX(w[30])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 45, w[45]); + w[46] = ADD4_32(WSIGMA1_AVX(w[44]), w[30], w[39], WSIGMA0_AVX(w[31])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 46, w[46]); + w[47] = ADD4_32(WSIGMA1_AVX(w[45]), w[31], w[40], WSIGMA0_AVX(w[32])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 47, w[47]); + w[48] = ADD4_32(WSIGMA1_AVX(w[46]), w[32], w[41], WSIGMA0_AVX(w[33])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 48, w[48]); + w[49] = ADD4_32(WSIGMA1_AVX(w[47]), w[33], w[42], WSIGMA0_AVX(w[34])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 49, w[49]); + w[50] = ADD4_32(WSIGMA1_AVX(w[48]), w[34], w[43], WSIGMA0_AVX(w[35])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 50, w[50]); + w[51] = ADD4_32(WSIGMA1_AVX(w[49]), w[35], w[44], WSIGMA0_AVX(w[36])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 51, w[51]); + w[52] = ADD4_32(WSIGMA1_AVX(w[50]), w[36], w[45], WSIGMA0_AVX(w[37])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 52, w[52]); + w[53] = ADD4_32(WSIGMA1_AVX(w[51]), w[37], w[46], WSIGMA0_AVX(w[38])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 53, w[53]); + w[54] = ADD4_32(WSIGMA1_AVX(w[52]), w[38], w[47], WSIGMA0_AVX(w[39])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 54, w[54]); + w[55] = ADD4_32(WSIGMA1_AVX(w[53]), w[39], w[48], WSIGMA0_AVX(w[40])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 55, w[55]); + w[56] = ADD4_32(WSIGMA1_AVX(w[54]), w[40], w[49], WSIGMA0_AVX(w[41])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 56, w[56]); + w[57] = ADD4_32(WSIGMA1_AVX(w[55]), w[41], w[50], WSIGMA0_AVX(w[42])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 57, w[57]); + w[58] = ADD4_32(WSIGMA1_AVX(w[56]), w[42], w[51], WSIGMA0_AVX(w[43])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 58, w[58]); + w[59] = ADD4_32(WSIGMA1_AVX(w[57]), w[43], w[52], WSIGMA0_AVX(w[44])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 59, w[59]); + w[60] = ADD4_32(WSIGMA1_AVX(w[58]), w[44], w[53], WSIGMA0_AVX(w[45])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 60, w[60]); + w[61] = ADD4_32(WSIGMA1_AVX(w[59]), w[45], w[54], WSIGMA0_AVX(w[46])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 61, w[61]); + w[62] = ADD4_32(WSIGMA1_AVX(w[60]), w[46], w[55], WSIGMA0_AVX(w[47])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 62, w[62]); + w[63] = ADD4_32(WSIGMA1_AVX(w[61]), w[47], w[56], WSIGMA0_AVX(w[48])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 63, w[63]); + + // Feed Forward + ctx->s[0] = ADD32(s[0], ctx->s[0]); + ctx->s[1] = ADD32(s[1], ctx->s[1]); + ctx->s[2] = ADD32(s[2], ctx->s[2]); + ctx->s[3] = ADD32(s[3], ctx->s[3]); + ctx->s[4] = ADD32(s[4], ctx->s[4]); + ctx->s[5] = ADD32(s[5], ctx->s[5]); + ctx->s[6] = ADD32(s[6], ctx->s[6]); + ctx->s[7] = ADD32(s[7], ctx->s[7]); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/sha256avx.h b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/sha256avx.h new file mode 100644 index 00000000..608b38ae --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/sha256avx.h @@ -0,0 +1,103 @@ +#ifndef SHA256AVX_H +#define SHA256AVX_H + +#include +#include + +static const unsigned int RC[] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +#define u32 uint32_t +#define u256 __m256i + +#define XOR _mm256_xor_si256 +#define OR _mm256_or_si256 +#define AND _mm256_and_si256 +#define ADD32 _mm256_add_epi32 +#define NOT(x) _mm256_xor_si256(x, _mm256_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1)) + +#define LOAD(src) _mm256_loadu_si256((__m256i *)(src)) +#define STORE(dest,src) _mm256_storeu_si256((__m256i *)(dest),src) + +#define BYTESWAP(x) _mm256_shuffle_epi8(x, _mm256_set_epi8(0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3,0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3)) + +#define SHIFTR32(x, y) _mm256_srli_epi32(x, y) +#define SHIFTL32(x, y) _mm256_slli_epi32(x, y) + +#define ROTR32(x, y) OR(SHIFTR32(x, y), SHIFTL32(x, 32 - (y))) +#define ROTL32(x, y) OR(SHIFTL32(x, y), SHIFTR32(x, 32 - (y))) + +#define XOR3(a, b, c) XOR(XOR(a, b), c) + +#define ADD3_32(a, b, c) ADD32(ADD32(a, b), c) +#define ADD4_32(a, b, c, d) ADD32(ADD32(ADD32(a, b), c), d) +#define ADD5_32(a, b, c, d, e) ADD32(ADD32(ADD32(ADD32(a, b), c), d), e) + +#define MAJ_AVX(a, b, c) XOR3(AND(a, b), AND(a, c), AND(b, c)) +#define CH_AVX(a, b, c) XOR(AND(a, b), AND(NOT(a), c)) + +#define SIGMA1_AVX(x) XOR3(ROTR32(x, 6), ROTR32(x, 11), ROTR32(x, 25)) +#define SIGMA0_AVX(x) XOR3(ROTR32(x, 2), ROTR32(x, 13), ROTR32(x, 22)) + +#define WSIGMA1_AVX(x) XOR3(ROTR32(x, 17), ROTR32(x, 19), SHIFTR32(x, 10)) +#define WSIGMA0_AVX(x) XOR3(ROTR32(x, 7), ROTR32(x, 18), SHIFTR32(x, 3)) + +#define SHA256ROUND_AVX(a, b, c, d, e, f, g, h, rc, w) \ + T0 = ADD5_32(h, SIGMA1_AVX(e), CH_AVX(e, f, g), _mm256_set1_epi32((int)RC[rc]), w); \ + (d) = ADD32(d, T0); \ + T1 = ADD32(SIGMA0_AVX(a), MAJ_AVX(a, b, c)); \ + (h) = ADD32(T0, T1); + +typedef struct SHA256state { + u256 s[8]; + uint8_t msgblocks[8 * 64]; + unsigned int datalen; + uint64_t msglen; +} sha256ctxx8; + + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_transpose(u256 s[8]); +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256_init_frombytes_x8(sha256ctxx8 *ctx, const uint8_t *s, unsigned long long msglen); +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256_init8x(sha256ctxx8 *ctx); +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256_update8x(sha256ctxx8 *ctx, + const unsigned char *d0, + const unsigned char *d1, + const unsigned char *d2, + const unsigned char *d3, + const unsigned char *d4, + const unsigned char *d5, + const unsigned char *d6, + const unsigned char *d7, + unsigned long long len); +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256_final8x(sha256ctxx8 *ctx, + unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256_transform8x(sha256ctxx8 *ctx, const unsigned char *data); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256_clone_statex8(sha256ctxx8 *outctx, const sha256ctxx8 *inctx); + + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/sha256x8.c b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/sha256x8.c new file mode 100644 index 00000000..9ea319ec --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/sha256x8.c @@ -0,0 +1,128 @@ +#include + +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_seed_statex8(sha256ctxx8 *ctx, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N; i < PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256_init8x(ctx); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256_update8x(ctx, block, block, block, block, block, block, block, block, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES); + +} + +/* This provides a wrapper around the internals of 8x parallel SHA256 */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256x8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned long long inlen) { + sha256ctxx8 ctx; + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256_init8x(&ctx); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256_update8x(&ctx, in0, in1, in2, in3, in4, in5, in6, in7, inlen); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256_final8x(&ctx, out0, out1, out2, out3, out4, out5, out6, out7); +} + +/** + * Note that inlen should be sufficiently small that it still allows for + * an array to be allocated on the stack. Typically 'in' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_mgf1x8( + unsigned char *outx8, + unsigned long outlen, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, + unsigned long inlen) { + unsigned char inbufx8[8 * ((PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES) + 4)]; + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + unsigned long i; + unsigned int j; + + memcpy(inbufx8 + 0 * (inlen + 4), in0, inlen); + memcpy(inbufx8 + 1 * (inlen + 4), in1, inlen); + memcpy(inbufx8 + 2 * (inlen + 4), in2, inlen); + memcpy(inbufx8 + 3 * (inlen + 4), in3, inlen); + memcpy(inbufx8 + 4 * (inlen + 4), in4, inlen); + memcpy(inbufx8 + 5 * (inlen + 4), in5, inlen); + memcpy(inbufx8 + 6 * (inlen + 4), in6, inlen); + memcpy(inbufx8 + 7 * (inlen + 4), in7, inlen); + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES <= outlen; i++) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ull_to_bytes(inbufx8 + inlen + j * (inlen + 4), 4, i); + } + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256x8(outx8 + 0 * outlen, + outx8 + 1 * outlen, + outx8 + 2 * outlen, + outx8 + 3 * outlen, + outx8 + 4 * outlen, + outx8 + 5 * outlen, + outx8 + 6 * outlen, + outx8 + 7 * outlen, + inbufx8 + 0 * (inlen + 4), + inbufx8 + 1 * (inlen + 4), + inbufx8 + 2 * (inlen + 4), + inbufx8 + 3 * (inlen + 4), + inbufx8 + 4 * (inlen + 4), + inbufx8 + 5 * (inlen + 4), + inbufx8 + 6 * (inlen + 4), + inbufx8 + 7 * (inlen + 4), inlen + 4); + outx8 += PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ull_to_bytes(inbufx8 + inlen + j * (inlen + 4), 4, i); + } + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256x8(outbufx8 + 0 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + inbufx8 + 0 * (inlen + 4), + inbufx8 + 1 * (inlen + 4), + inbufx8 + 2 * (inlen + 4), + inbufx8 + 3 * (inlen + 4), + inbufx8 + 4 * (inlen + 4), + inbufx8 + 5 * (inlen + 4), + inbufx8 + 6 * (inlen + 4), + inbufx8 + 7 * (inlen + 4), inlen + 4); + + for (j = 0; j < 8; j++) { + memcpy(outx8 + j * outlen, + outbufx8 + j * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outlen - i * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/sha256x8.h b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/sha256x8.h new file mode 100644 index 00000000..a6a1d867 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/sha256x8.h @@ -0,0 +1,44 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256X8_H +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256X8_H + +#include "sha256avx.h" + +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N */ + +/* This provides a wrapper around the internals of 8x parallel SHA256 */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256x8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned long long inlen); + +/** + * Note that inlen should be sufficiently small that it still allows for + * an array to be allocated on the stack. Typically 'in' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_mgf1x8(unsigned char *outx8, unsigned long outlen, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, + unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_seed_statex8(sha256ctxx8 *ctx, const unsigned char *pub_seed); +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/sign.c b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/sign.c new file mode 100644 index 00000000..c3702bf6 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_D - 1); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, sk + 3 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N; + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_D; i++) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_BYTES; + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N; + unsigned char mhash[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_D; i++) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_BYTES; + + if (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_BYTES, sm + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/thash.h b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/thash.h new file mode 100644 index 00000000..25572bb9 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_THASH_H +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/thash_sha256_simple.c b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/thash_sha256_simple.c new file mode 100644 index 00000000..4f6c8c01 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/thash_sha256_simple.c @@ -0,0 +1,75 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "sha2.h" +#include "sha256.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N bytes. + */ +static void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char outbuf[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + sha256ctx sha2_state; + + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + + /* Retrieve precomputed state containing pub_seed */ + sha256_inc_ctx_clone(&sha2_state, &hash_state_seeded->x1); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_compress_address(buf, addr); + memcpy(buf + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES, in, inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + + sha256_inc_finalize(outbuf, &sha2_state, buf, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thash( + out, buf, in, 1, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thash( + out, buf, in, 2, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LEN, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_TREES, pub_seed, addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/thash_sha256_simplex8.c b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/thash_sha256_simplex8.c new file mode 100644 index 00000000..1a1835d5 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/thash_sha256_simplex8.c @@ -0,0 +1,129 @@ +#include +#include + +#include "address.h" +#include "hash_state.h" +#include "params.h" +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "thashx8.h" +#include "utils.h" + +/** + * 8-way parallel version of thash; takes 8x as much input and output + */ +static void thashx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addrx8[8 * 8], + uint8_t *bufx8, + const hash_state *state_seeded) { + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + unsigned int i; + sha256ctxx8 ctx; + + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256_clone_statex8(&ctx, &state_seeded->x8); + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_compress_address(bufx8 + i * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N), + addrx8 + i * 8); + } + + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + + 0 * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N), in0, inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + + 1 * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N), in1, inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + + 2 * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N), in2, inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + + 3 * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N), in3, inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + + 4 * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N), in4, inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + + 5 * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N), in5, inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + + 6 * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N), in6, inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + + 7 * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N), in7, inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256_update8x(&ctx, + bufx8 + 0 * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N), + bufx8 + 1 * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N), + bufx8 + 2 * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N), + bufx8 + 3 * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N), + bufx8 + 4 * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N), + bufx8 + 5 * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N), + bufx8 + 6 * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N), + bufx8 + 7 * (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N), + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_sha256_final8x(&ctx, + outbufx8 + 0 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + + memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); +} + +#define thashx8_variant_impl(name, size) \ + void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thashx8_##name(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + unsigned char *out4, \ + unsigned char *out5, \ + unsigned char *out6, \ + unsigned char *out7, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *in4, \ + const unsigned char *in5, \ + const unsigned char *in6, \ + const unsigned char *in7, \ + const unsigned char *pub_seed, \ + uint32_t addrx8[8*8], \ + const hash_state *state_seeded) \ + { \ + const unsigned int inblocks = (size); \ + uint8_t bufx8[8*(PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_SHA256_ADDR_BYTES + (size)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N)]; \ + thashx8(out0, out1, out2, out3, out4, out5, out6, out7, \ + in0, in1, in2, in3, in4, in5, in6, in7, inblocks, \ + pub_seed, addrx8, bufx8, state_seeded); \ + } + +thashx8_variant_impl(1, 1) +thashx8_variant_impl(2, 2) +thashx8_variant_impl(WOTS_LEN, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LEN) +thashx8_variant_impl(FORS_TREES, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_TREES) + +#undef thashx8_variant_impl diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/thashx8.h b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/thashx8.h new file mode 100644 index 00000000..a2dfebf4 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/thashx8.h @@ -0,0 +1,39 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_THASHX8_H +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_THASHX8_H + +#include + +#include "hash_state.h" +#include "sha256avx.h" + + +#define thashx8_variant(name) \ + void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thashx8_##name( \ + unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + unsigned char *out4, \ + unsigned char *out5, \ + unsigned char *out6, \ + unsigned char *out7, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *in4, \ + const unsigned char *in5, \ + const unsigned char *in6, \ + const unsigned char *in7, \ + const unsigned char *pub_seed, \ + uint32_t addrx8[8*8], \ + const hash_state *state_seeded) + + +thashx8_variant(1); +thashx8_variant(2); +thashx8_variant(WOTS_LEN); +thashx8_variant(FORS_TREES); + +#undef thashx8_variant +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/utils.c b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/utils.c new file mode 100644 index 00000000..d9bee51e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, leaf, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + memcpy(buffer + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, auth_path, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thash_2( + buffer + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + } else { + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, auth_path, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/utils.h b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/utils.h new file mode 100644 index 00000000..e7eaa762 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_UTILS_H +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/utilsx8.c b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/utilsx8.c new file mode 100644 index 00000000..7ecdb4f5 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/utilsx8.c @@ -0,0 +1,172 @@ +#include + +#include "address.h" +#include "params.h" +#include "thashx8.h" +#include "utils.h" + +#include "utilsx8.h" + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void treehashx8(unsigned char *rootx8, unsigned char *auth_pathx8, + unsigned char *stackx8, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t leaf_idx[8], uint32_t idx_offset[8], + uint32_t tree_height, + void (*gen_leafx8)( + unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + unsigned char * /* leaf4 */, + unsigned char * /* leaf5 */, + unsigned char * /* leaf6 */, + unsigned char * /* leaf7 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + uint32_t /* addr_idx4 */, + uint32_t /* addr_idx5 */, + uint32_t /* addr_idx6 */, + uint32_t /* addr_idx7 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx8[8 * 8], + const hash_state *state_seeded) { + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + unsigned int j; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leafx8(stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + sk_seed, pub_seed, + idx + idx_offset[0], + idx + idx_offset[1], + idx + idx_offset[2], + idx + idx_offset[3], + idx + idx_offset[4], + idx + idx_offset[5], + idx + idx_offset[6], + idx + idx_offset[7], + tree_addrx8, + state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + for (j = 0; j < 8; j++) { + if ((leaf_idx[j] ^ 0x1) == idx) { + memcpy(auth_pathx8 + j * tree_height * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + (offset - 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + } + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_height(tree_addrx8 + j * 8, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_tree_index(tree_addrx8 + j * 8, + tree_idx + (idx_offset[j] >> (heights[offset - 1] + 1))); + } + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thashx8_2(stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + pub_seed, tree_addrx8, state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + for (j = 0; j < 8; j++) { + if (((leaf_idx[j] >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_pathx8 + j * tree_height * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + heights[offset - 1]*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N + (offset - 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + } + } + } + } + + for (j = 0; j < 8; j++) { + memcpy(rootx8 + j * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + } +} + +/* The wrappers below ensure we used fixed-size buffers on the stack (no VLAs) */ + + +#define treehashx8_variant(name, size) \ + void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_treehashx8_##name( \ + unsigned char *rootx8, unsigned char *auth_pathx8, \ + const unsigned char *sk_seed, const unsigned char *pub_seed, \ + const uint32_t leaf_idx[8], uint32_t idx_offset[8], \ + void (*gen_leafx8)( \ + unsigned char* /* leaf0 */, \ + unsigned char* /* leaf1 */, \ + unsigned char* /* leaf2 */, \ + unsigned char* /* leaf3 */, \ + unsigned char* /* leaf4 */, \ + unsigned char* /* leaf5 */, \ + unsigned char* /* leaf6 */, \ + unsigned char* /* leaf7 */, \ + const unsigned char* /* sk_seed */, \ + const unsigned char* /* pub_seed */, \ + uint32_t /* addr_idx0 */, \ + uint32_t /* addr_idx1 */, \ + uint32_t /* addr_idx2 */, \ + uint32_t /* addr_idx3 */, \ + uint32_t /* addr_idx4 */, \ + uint32_t /* addr_idx5 */, \ + uint32_t /* addr_idx6 */, \ + uint32_t /* addr_idx7 */, \ + const uint32_t[8] /* tree_addr */, \ + const hash_state* /* state_seeded */), \ + uint32_t tree_addrx8[8*8], \ + const hash_state *state_seeded) \ + { \ + const uint32_t tree_height = (size); \ + unsigned char stackx8[8*((size) + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N]; \ + unsigned int heights[(size) + 1]; \ + treehashx8(rootx8, auth_pathx8, stackx8, heights, sk_seed, pub_seed, \ + leaf_idx, idx_offset, tree_height, gen_leafx8, tree_addrx8, state_seeded); \ + } + +treehashx8_variant(FORS_HEIGHT, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_FORS_HEIGHT) + +#undef treehashx8_variant diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/utilsx8.h b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/utilsx8.h new file mode 100644 index 00000000..a33a8578 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/utilsx8.h @@ -0,0 +1,46 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_UTILSX8_H +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_UTILSX8_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_treehashx8_FORS_HEIGHT( + unsigned char *rootx8, unsigned char *auth_pathx8, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t leaf_idx[8], uint32_t idx_offset[8], + void (*gen_leafx8)( + unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + unsigned char * /* leaf4 */, + unsigned char * /* leaf5 */, + unsigned char * /* leaf6 */, + unsigned char * /* leaf7 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + uint32_t /* addr_idx4 */, + uint32_t /* addr_idx5 */, + uint32_t /* addr_idx6 */, + uint32_t /* addr_idx7 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx8[8 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/wots.c b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/wots.c new file mode 100644 index 00000000..706919f5 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/wots.c @@ -0,0 +1,240 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "hashx8.h" +#include "params.h" +#include "thash.h" +#include "thashx8.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], const hash_state *state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_prf_addr(sk, sk_seed, wots_addr, state_seeded); +} + +/** + * 8-way parallel version of wots_gen_sk; expects 8x as much space in sk + */ +static void wots_gen_skx8(unsigned char *skx8, const unsigned char *sk_seed, + uint32_t wots_addrx8[8 * 8]) { + unsigned int j; + + /* Make sure that the hash address is actually zeroed. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_hash_addr(wots_addrx8 + j * 8, 0); + } + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_prf_addrx8(skx8 + 0 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + skx8 + 1 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + skx8 + 2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + skx8 + 3 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + skx8 + 4 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + skx8 + 5 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + skx8 + 6 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + skx8 + 7 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + sk_seed, wots_addrx8); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_W; i++) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thash_1(out, out, pub_seed, addr, state_seeded); + } +} + +/** + * 8-way parallel version of gen_chain; expects 8x as much space in out, and + * 8x as much space in inx8. Assumes start and step identical across chains. + */ +static void gen_chainx8(unsigned char *outx8, const unsigned char *inx8, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addrx8[8 * 8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + /* Initialize outx8 with the value at position 'start'. */ + memcpy(outx8, inx8, 8 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_W; i++) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_hash_addr(addrx8 + j * 8, i); + } + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_thashx8_1(outx8 + 0 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + outx8 + 1 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + outx8 + 2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + outx8 + 3 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + outx8 + 4 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + outx8 + 5 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + outx8 + 6 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + outx8 + 7 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + outx8 + 0 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + outx8 + 1 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + outx8 + 2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + outx8 + 3 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + outx8 + 4 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + outx8 + 5 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + outx8 + 6 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + outx8 + 7 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + pub_seed, addrx8, state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { + int in = 0; + int out = 0; + unsigned char total = 0; + int bits = 0; + int consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LOGW; + output[out] = (unsigned int)(total >> bits) & (PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_W - 1); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_wots_gen_pk(unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + uint32_t addrx8[8 * 8]; + unsigned char pkbuf[8 * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N]; + + for (j = 0; j < 8; j++) { + memcpy(addrx8 + j * 8, addr, sizeof(uint32_t) * 8); + } + + /* The last iteration typically does not have complete set of 4 chains, + but because we use pkbuf, this is not an issue -- we still do as many + in parallel as possible. */ + for (i = 0; i < ((PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LEN + 7) & ~0x7); i += 8) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_chain_addr(addrx8 + j * 8, i + j); + } + wots_gen_skx8(pkbuf, sk_seed, addrx8); + gen_chainx8(pkbuf, pkbuf, 0, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_W - 1, pub_seed, addrx8, state_seeded); + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LEN) { + memcpy(pk + (i + j)*PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, pkbuf + j * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N); + } + } + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_wots_sign(unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, sk_seed, addr, state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, sig + i * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, 0, lengths[i], pub_seed, addr, state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_wots_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, sig + i * PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_N, + lengths[i], PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_W - 1 - lengths[i], pub_seed, addr, state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/wots.h b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/wots.h new file mode 100644 index 00000000..5edda14f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/avx2/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_H +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_AVX2_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/LICENSE b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..6221a3d8 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-sha256-192s-simple_clean.lib +OBJECTS=address.obj wots.obj utils.obj fors.obj sign.obj hash_sha256.obj thash_sha256_simple.obj sha256.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/address.c b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/address.c new file mode 100644 index 00000000..6167103d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/address.h b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/address.h new file mode 100644 index 00000000..9f5ba413 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ADDRESS_H +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/api.h b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/api.h new file mode 100644 index 00000000..a55e5c04 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_API_H +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_CRYPTO_SECRETKEYBYTES 96 +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_CRYPTO_PUBLICKEYBYTES 48 +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_CRYPTO_BYTES 17064 +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_CRYPTO_SEEDBYTES 72 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/fors.c b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/fors.c new file mode 100644 index 00000000..6a073d12 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/fors.c @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "thash.h" +#include "utils.h" + +static void fors_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t fors_leaf_addr[8], const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_prf_addr( + sk, sk_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_thash_1( + leaf, sk, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t fors_tree_addr[8], + const hash_state *hash_state_seeded) { + uint32_t fors_leaf_addr[8] = {0}; + + /* Only copy the parts that must be kept in fors_leaf_addr. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_copy_keypair_addr( + fors_leaf_addr, fors_tree_addr); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_type( + fors_leaf_addr, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_tree_index( + fors_leaf_addr, addr_idx); + + fors_gen_sk(leaf, sk_seed, fors_leaf_addr, hash_state_seeded); + fors_sk_to_leaf(leaf, leaf, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_copy_keypair_addr( + fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_copy_keypair_addr( + fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_type( + fors_tree_addr, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_type( + fors_pk_addr, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_tree_height( + fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_tree_index( + fors_tree_addr, indices[i] + idx_offset); + + /* Include the secret key part that produces the selected leaf node. */ + fors_gen_sk(sig, sk_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N; + + /* Compute the authentication path for this leaf node. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_treehash_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, sig, sk_seed, pub_seed, + indices[i], idx_offset, fors_gen_leaf, fors_tree_addr, + hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_thash_FORS_TREES( + pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_compute_root(roots + i * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, leaf, indices[i], idx_offset, sig, + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_HEIGHT, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/fors.h b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/fors.h new file mode 100644 index 00000000..a0defabe --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_H +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/hash.h b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/hash.h new file mode 100644 index 00000000..54bac4cf --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_HASH_H +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/hash_sha256.c b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/hash_sha256.c new file mode 100644 index 00000000..8b7aeee2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/hash_sha256.c @@ -0,0 +1,162 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "sha2.h" +#include "sha256.h" + +/* For SHA256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_seed_state(hash_state_seeded, pub_seed); + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* Clean up hash state */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_destroy_hash_function(hash_state *hash_state_seeded) { + sha256_inc_ctx_release(hash_state_seeded); +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N bytes and an address + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_ADDR_BYTES]; + unsigned char outbuf[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_compress_address(buf + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, addr); + + sha256(outbuf, buf, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_ADDR_BYTES); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed as a key + * for HMAC, and an optional randomization value prefixed to the message. + * This requires m to have at least PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N space + * available in front of the pointer, i.e. before the message to use for the + * prefix. This is necessary to prevent having to move the message around (and + * allocate memory for it). + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES]; + sha256ctx state; + int i; + + /* This implements HMAC-SHA256 */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N; i++) { + buf[i] = 0x36 ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, 0x36, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N); + + sha256_inc_init(&state); + sha256_inc_blocks(&state, buf, 1); + + memcpy(buf, optrand, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N); + + /* If optrand + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N + mlen < PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_BLOCK_BYTES) { + memcpy(buf + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, m, mlen); + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_BLOCK_BYTES, &state, + buf, mlen + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(buf + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, m, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N); + sha256_inc_blocks(&state, buf, 1); + + m += PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N; + mlen -= PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N; + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_BLOCK_BYTES, &state, m, mlen); + } + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N; i++) { + buf[i] = 0x5c ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, 0x5c, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N); + + sha256(buf, buf, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES); + memcpy(R, buf, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_TREE_BITS (PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_TREE_HEIGHT * (PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_D - 1)) +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_TREE_BYTES ((PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_LEAF_BITS PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_LEAF_BYTES ((PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_DGST_BYTES (PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_TREE_BYTES + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_LEAF_BYTES) + + unsigned char seed[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES + 4]; + + /* Round to nearest multiple of PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_BLOCK_BYTES */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_INBLOCKS (((PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_PK_BYTES + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - 1) & \ + -PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_BLOCK_BYTES) / PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_BLOCK_BYTES) + unsigned char inbuf[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_BLOCK_BYTES]; + + unsigned char buf[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_DGST_BYTES]; + unsigned char *bufp = buf; + sha256ctx state; + + sha256_inc_init(&state); + + memcpy(inbuf, R, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N); + memcpy(inbuf + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, pk, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_PK_BYTES); + + /* If R + pk + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_PK_BYTES + mlen < PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_BLOCK_BYTES) { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_PK_BYTES, m, mlen); + sha256_inc_finalize(seed, &state, inbuf, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_PK_BYTES + mlen); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_PK_BYTES, m, + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N - PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_PK_BYTES); + sha256_inc_blocks(&state, inbuf, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_INBLOCKS); + + m += PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N - PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_PK_BYTES; + mlen -= PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N - PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_PK_BYTES; + sha256_inc_finalize(seed, &state, m, mlen); + } + + /* By doing this in two steps, we prevent hashing the message twice; + otherwise each iteration in MGF1 would hash the message again. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_mgf1(bufp, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_DGST_BYTES, seed, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_bytes_to_ull(bufp, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/hash_state.h b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/hash_state.h new file mode 100644 index 00000000..19fc335e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/hash_state.h @@ -0,0 +1,26 @@ +#ifndef SPX_HASH_STATE_H +#define SPX_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + */ + +#include "sha2.h" +#define hash_state sha256ctx + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/params.h b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/params.h new file mode 100644 index 00000000..fe1a82c9 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_PARAMS_H +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N 24 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FULL_HEIGHT 64 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_D 8 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_HEIGHT 16 +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_TREES 14 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N / PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_LEN (PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_LEN1 + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_BYTES (PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_PK_BYTES PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_TREE_HEIGHT (PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FULL_HEIGHT / PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_BYTES ((PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_PK_BYTES PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_BYTES (PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_BYTES + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_D * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FULL_HEIGHT * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_PK_BYTES (2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SK_BYTES (2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/sha256.c b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/sha256.c new file mode 100644 index 00000000..7ffadf35 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/sha256.c @@ -0,0 +1,71 @@ +/* Based on the public domain implementation in + * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html + * by D. J. Bernstein */ + +#include +#include +#include + +#include "sha2.h" +#include "sha256.h" +#include "utils.h" + +/* + * Compresses an address to a 22-byte sequence. + * This reduces the number of required SHA256 compression calls, as the last + * block of input is padded with at least 65 bits. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_compress_address(unsigned char *out, const uint32_t addr[8]) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ull_to_bytes(out, 1, addr[0]); /* drop 3 bytes of the layer field */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ull_to_bytes(out + 1, 4, addr[2]); /* drop the highest tree address word */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ull_to_bytes(out + 5, 4, addr[3]); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ull_to_bytes(out + 9, 1, addr[4]); /* drop 3 bytes of the type field */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ull_to_bytes(out + 10, 4, addr[5]); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ull_to_bytes(out + 14, 4, addr[6]); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ull_to_bytes(out + 18, 4, addr[7]); +} + +/** + * Requires 'input_plus_four_bytes' to have 'inlen' + 4 bytes, so that the last + * four bytes can be used for the counter. Typically 'input' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen) { + unsigned char outbuf[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES]; + unsigned long i; + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES <= outlen; i++) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(out, input_plus_four_bytes, inlen + 4); + out += PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + if (outlen > i * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(outbuf, input_plus_four_bytes, inlen + 4); + memcpy(out, outbuf, outlen - i * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES); + } +} + + +/** + * Absorb the constant pub_seed using one round of the compression function + * This initializes hash_state_seeded, which can then be reused in thash + **/ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N; i < PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + sha256_inc_init(hash_state_seeded); + sha256_inc_blocks(hash_state_seeded, block, 1); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/sha256.h b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/sha256.h new file mode 100644 index 00000000..c0f8f8f6 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/sha256.h @@ -0,0 +1,21 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_H +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_H + +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N */ +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_ADDR_BYTES 22 + +#include +#include + +#include "sha2.h" + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_compress_address(unsigned char *out, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/sign.c b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/sign.c new file mode 100644 index 00000000..abf5aba2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_D - 1); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, sk + 3 * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N; + + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_BYTES; + + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N; + unsigned char mhash[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_BYTES; + + if (PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_BYTES, sm + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/thash.h b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/thash.h new file mode 100644 index 00000000..ba826230 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_THASH_H +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/thash_sha256_simple.c b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/thash_sha256_simple.c new file mode 100644 index 00000000..7a59f4e8 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/thash_sha256_simple.c @@ -0,0 +1,75 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "sha2.h" +#include "sha256.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N bytes. + */ +static void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char outbuf[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES]; + sha256ctx sha2_state; + + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + + /* Retrieve precomputed state containing pub_seed */ + sha256_inc_ctx_clone(&sha2_state, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_compress_address(buf, addr); + memcpy(buf + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_ADDR_BYTES, in, inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N); + + sha256_inc_finalize(outbuf, &sha2_state, buf, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_thash( + out, buf, in, 1, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_thash( + out, buf, in, 2, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_ADDR_BYTES + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_LEN, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_SHA256_ADDR_BYTES + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_TREES, pub_seed, addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/utils.c b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/utils.c new file mode 100644 index 00000000..1a29a6d6 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, leaf, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N); + memcpy(buffer + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_thash_2( + buffer + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N); + } else { + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/utils.h b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/utils.h new file mode 100644 index 00000000..97f890fd --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_UTILS_H +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/wots.c b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/wots.c new file mode 100644 index 00000000..354ff91f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/wots.c @@ -0,0 +1,167 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], + const hash_state *hash_state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_prf_addr(sk, sk_seed, wots_addr, hash_state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_W; i++) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_thash_1( + out, out, pub_seed, addr, hash_state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const size_t out_len, + const unsigned char *input) { + size_t in = 0; + size_t out = 0; + unsigned char total = 0; + unsigned int bits = 0; + size_t consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_LOGW; + output[out] = (unsigned int)((total >> bits) & (PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_W - 1)); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, + const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_ull_to_bytes( + csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(pk + i * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, pk + i * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, + 0, PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_W - 1, pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, 0, lengths[i], pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_N, + lengths[i], PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_W - 1 - lengths[i], pub_seed, addr, + hash_state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/wots.h b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/wots.h new file mode 100644 index 00000000..1d966fd4 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-192s-simple/clean/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_H +#define PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256192SSIMPLE_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/META.yml b/crypto_sign/sphincs/sphincs-sha256-256f-robust/META.yml new file mode 100644 index 00000000..aa444d0c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/META.yml @@ -0,0 +1,35 @@ +name: SPHINCS+ +type: signature +claimed-nist-level: 5 +length-public-key: 64 +length-secret-key: 128 +length-signature: 49216 +testvectors-sha256: 14dd19ba3ff75bad890949050289ab0f178d7baa6dcb8ff6bcd6a873692a5686 +nistkat-sha256: e6fafb97dc3575d5dcd79183a4d7faad4f2c986745c63e61ddae3648559664f7 +principal-submitters: + - Andreas Hülsing +auxiliary-submitters: + - Jean-Philippe Aumasson + - Daniel J. Bernstein, + - Christoph Dobraunig + - Maria Eichlseder + - Scott Fluhrer + - Stefan-Lukas Gazdag + - Panos Kampanakis + - Stefan Kölbl + - Tanja Lange + - Martin M. Lauridsen + - Florian Mendel + - Ruben Niederhagen + - Christian Rechberger + - Joost Rijneveld + - Peter Schwabe +implementations: + - name: clean + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + - name: avx2 + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + supported_platforms: + - architecture: x86_64 + required_flags: + - avx2 diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/LICENSE b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/Makefile.Microsoft_nmake new file mode 100644 index 00000000..6054b4cf --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-sha256-256f-robust_avx2.lib +OBJECTS=address.obj wots.obj utils.obj utilsx8.obj sha256avx.obj sha256x8.obj fors.obj sign.obj hash_sha256.obj thash_sha256_robust.obj hash_sha256x8.obj thash_sha256_robustx8.obj sha256.obj + +CFLAGS=/nologo /arch:AVX2 /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/address.c b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/address.c new file mode 100644 index 00000000..3139a2f3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/address.h b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/address.h new file mode 100644 index 00000000..aa353124 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDRESS_H +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/api.h b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/api.h new file mode 100644 index 00000000..8d570365 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_API_H +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_CRYPTO_SECRETKEYBYTES 128 +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES 64 +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_CRYPTO_BYTES 49216 +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_CRYPTO_SEEDBYTES 96 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/fors.c b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/fors.c new file mode 100644 index 00000000..b64a28cd --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/fors.c @@ -0,0 +1,240 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hashx8.h" +#include "thash.h" +#include "thashx8.h" +#include "utils.h" +#include "utilsx8.h" + +static void fors_gen_skx8(unsigned char *sk0, + unsigned char *sk1, + unsigned char *sk2, + unsigned char *sk3, + unsigned char *sk4, + unsigned char *sk5, + unsigned char *sk6, + unsigned char *sk7, const unsigned char *sk_seed, + uint32_t fors_leaf_addrx8[8 * 8]) { + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_prf_addrx8(sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7, + sk_seed, fors_leaf_addrx8); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_thash_1(leaf, sk, pub_seed, fors_leaf_addr, state_seeded); +} + +static void fors_sk_to_leafx8(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + unsigned char *leaf4, + unsigned char *leaf5, + unsigned char *leaf6, + unsigned char *leaf7, + const unsigned char *sk0, + const unsigned char *sk1, + const unsigned char *sk2, + const unsigned char *sk3, + const unsigned char *sk4, + const unsigned char *sk5, + const unsigned char *sk6, + const unsigned char *sk7, + const unsigned char *pub_seed, + uint32_t fors_leaf_addrx8[8 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_thashx8_1(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7, + pub_seed, fors_leaf_addrx8, state_seeded); +} + +static void fors_gen_leafx8(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + unsigned char *leaf4, + unsigned char *leaf5, + unsigned char *leaf6, + unsigned char *leaf7, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx0, + uint32_t addr_idx1, + uint32_t addr_idx2, + uint32_t addr_idx3, + uint32_t addr_idx4, + uint32_t addr_idx5, + uint32_t addr_idx6, + uint32_t addr_idx7, + const uint32_t fors_tree_addr[8], + const hash_state *state_seeded) { + uint32_t fors_leaf_addrx8[8 * 8] = {0}; + unsigned int j; + + /* Only copy the parts that must be kept in fors_leaf_addrx8. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_copy_keypair_addr(fors_leaf_addrx8 + j * 8, fors_tree_addr); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_type(fors_leaf_addrx8 + j * 8, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 0 * 8, addr_idx0); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 1 * 8, addr_idx1); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 2 * 8, addr_idx2); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 3 * 8, addr_idx3); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 4 * 8, addr_idx4); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 5 * 8, addr_idx5); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 6 * 8, addr_idx6); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 7 * 8, addr_idx7); + + fors_gen_skx8(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + sk_seed, fors_leaf_addrx8); + fors_sk_to_leafx8(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + pub_seed, fors_leaf_addrx8, state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + /* Round up to multiple of 8 to prevent out-of-bounds for x8 parallelism */ + uint32_t indices[(PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_TREES + 7) & ~7] = {0}; + unsigned char roots[((PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_TREES + 7) & ~7) * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N]; + /* Sign to a buffer, since we may not have a nice multiple of 8 and would + otherwise overrun the signature. */ + unsigned char sigbufx8[8 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N * (1 + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_HEIGHT)]; + uint32_t fors_tree_addrx8[8 * 8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset[8] = {0}; + unsigned int i, j; + + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_copy_keypair_addr(fors_tree_addrx8 + j * 8, fors_addr); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_type(fors_tree_addrx8 + j * 8, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < ((PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_TREES + 7) & ~0x7); i += 8) { + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_TREES) { + idx_offset[j] = (i + j) * (1 << PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_height(fors_tree_addrx8 + j * 8, 0); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_index(fors_tree_addrx8 + j * 8, + indices[i + j] + idx_offset[j]); + } + } + + /* Include the secret key part that produces the selected leaf nodes. */ + fors_gen_skx8(sigbufx8 + 0 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + sigbufx8 + 1 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + sigbufx8 + 2 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + sigbufx8 + 3 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + sigbufx8 + 4 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + sigbufx8 + 5 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + sigbufx8 + 6 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + sigbufx8 + 7 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + sk_seed, fors_tree_addrx8); + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_treehashx8_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, sigbufx8 + 8 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, sk_seed, pub_seed, + &indices[i], idx_offset, fors_gen_leafx8, fors_tree_addrx8, + state_seeded); + + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_TREES) { + memcpy(sig, sigbufx8 + j * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + memcpy(sig + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + sigbufx8 + 8 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + j * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_HEIGHT, + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_HEIGHT); + sig += PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N * (1 + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_HEIGHT); + } + } + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_fors_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_compute_root(roots + i * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, leaf, indices[i], idx_offset, + sig, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_HEIGHT, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/fors.h b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/fors.h new file mode 100644 index 00000000..73730f3d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_H +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/hash.h b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/hash.h new file mode 100644 index 00000000..617163d4 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_HASH_H +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/hash_sha256.c b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/hash_sha256.c new file mode 100644 index 00000000..1e609a52 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/hash_sha256.c @@ -0,0 +1,166 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "sha2.h" +#include "sha256.h" +#include "sha256x8.h" + +/** + * Initializes the hash function states + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed) { + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_seed_state(&hash_state_seeded->x1, pub_seed); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_seed_statex8(&hash_state_seeded->x8, pub_seed); + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/** + * Cleans up the hash function states + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_destroy_hash_function(hash_state *hash_state_seeded) { + sha256_inc_ctx_release(&hash_state_seeded->x1); +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N bytes and an address + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_prf_addr(unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES]; + unsigned char outbuf[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_compress_address(buf + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, addr); + + sha256(outbuf, buf, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed as a key + * for HMAC, and an optional randomization value prefixed to the message. + * This requires m to have at least PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N space + * available in front of the pointer, i.e. before the message to use for the + * prefix. This is necessary to prevent having to move the message around (and + * allocate memory for it). + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES]; + sha256ctx state; + int i; + + /* This implements HMAC-SHA256 */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N; i++) { + buf[i] = 0x36 ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, 0x36, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + + sha256_inc_init(&state); + sha256_inc_blocks(&state, buf, 1); + + memcpy(buf, optrand, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + + /* If optrand + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + mlen < PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES) { + memcpy(buf + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, m, mlen); + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES, &state, + buf, mlen + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(buf + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, m, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + sha256_inc_blocks(&state, buf, 1); + + m += PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N; + mlen -= PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N; + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES, &state, m, mlen); + } + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N; i++) { + buf[i] = 0x5c ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, 0x5c, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + + sha256(buf, buf, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES); + memcpy(R, buf, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_TREE_BITS (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_TREE_HEIGHT * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_D - 1)) +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_TREE_BYTES ((PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_LEAF_BITS PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_LEAF_BYTES ((PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_DGST_BYTES (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_TREE_BYTES + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_LEAF_BYTES) + + unsigned char seed[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES + 4]; + + /* Round to nearest multiple of PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_INBLOCKS (((PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_PK_BYTES + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES - 1) & \ + -PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES) / PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES) + unsigned char inbuf[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES]; + + unsigned char buf[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_DGST_BYTES]; + unsigned char *bufp = buf; + sha256ctx state; + + sha256_inc_init(&state); + + memcpy(inbuf, R, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + memcpy(inbuf + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, pk, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_PK_BYTES); + + /* If R + pk + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_PK_BYTES + mlen < PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES) { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_PK_BYTES, m, mlen); + sha256_inc_finalize(seed, &state, inbuf, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_PK_BYTES + mlen); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_PK_BYTES, m, + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N - PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_PK_BYTES); + sha256_inc_blocks(&state, inbuf, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_INBLOCKS); + + m += PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N - PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_PK_BYTES; + mlen -= PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N - PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_PK_BYTES; + sha256_inc_finalize(seed, &state, m, mlen); + } + + /* By doing this in two steps, we prevent hashing the message twice; + otherwise each iteration in MGF1 would hash the message again. */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_mgf1(bufp, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_DGST_BYTES, seed, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_bytes_to_ull(bufp, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/hash_sha256x8.c b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/hash_sha256x8.c new file mode 100644 index 00000000..a8ebd596 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/hash_sha256x8.c @@ -0,0 +1,61 @@ +#include +#include + +#include "address.h" +#include "hashx8.h" +#include "params.h" +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "utils.h" + +/* + * 8-way parallel version of prf_addr; takes 8x as much input and output + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_prf_addrx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *key, + const uint32_t addrx8[8 * 8]) { + unsigned char bufx8[8 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES)]; + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned int j; + + for (j = 0; j < 8; j++) { + memcpy(bufx8 + j * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES), key, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_compress_address(bufx8 + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + j * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES), + addrx8 + j * 8); + } + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256x8(outbufx8 + 0 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, + bufx8 + 0 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 1 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 2 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 3 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 4 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 5 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 6 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 7 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES), + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES); + + memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/hash_state.h b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/hash_state.h new file mode 100644 index 00000000..c6161f01 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/hash_state.h @@ -0,0 +1,33 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_HASH_STATE_H +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * We use a struct to differentiate between the x1 and x8 variants of SHA256. + */ + +#include "sha2.h" +#include "sha256avx.h" + +typedef struct { + sha256ctx x1; + sha256ctxx8 x8; +} hash_state; + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/hashx8.h b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/hashx8.h new file mode 100644 index 00000000..098c68b8 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/hashx8.h @@ -0,0 +1,19 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_HASHX8_H +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_HASHX8_H + +#include + +#include "params.h" + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_prf_addrx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *key, + const uint32_t addrx8[8 * 8]); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/params.h b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/params.h new file mode 100644 index 00000000..80770eb9 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_PARAMS_H +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N 32 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FULL_HEIGHT 68 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_D 17 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_HEIGHT 10 +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_TREES 30 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N / PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LEN (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LEN1 + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_BYTES (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_PK_BYTES PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_TREE_HEIGHT (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FULL_HEIGHT / PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_BYTES ((PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_PK_BYTES PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_BYTES (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_BYTES + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_D * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FULL_HEIGHT * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_PK_BYTES (2 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SK_BYTES (2 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/sha256.c b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/sha256.c new file mode 100644 index 00000000..77876318 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/sha256.c @@ -0,0 +1,71 @@ +/* Based on the public domain implementation in + * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html + * by D. J. Bernstein */ + +#include +#include +#include + +#include "sha2.h" +#include "sha256.h" +#include "utils.h" + +/* + * Compresses an address to a 22-byte sequence. + * This reduces the number of required SHA256 compression calls, as the last + * block of input is padded with at least 65 bits. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_compress_address(unsigned char *out, const uint32_t addr[8]) { + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ull_to_bytes(out, 1, addr[0]); /* drop 3 bytes of the layer field */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ull_to_bytes(out + 1, 4, addr[2]); /* drop the highest tree address word */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ull_to_bytes(out + 5, 4, addr[3]); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ull_to_bytes(out + 9, 1, addr[4]); /* drop 3 bytes of the type field */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ull_to_bytes(out + 10, 4, addr[5]); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ull_to_bytes(out + 14, 4, addr[6]); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ull_to_bytes(out + 18, 4, addr[7]); +} + +/** + * Requires 'input_plus_four_bytes' to have 'inlen' + 4 bytes, so that the last + * four bytes can be used for the counter. Typically 'input' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen) { + unsigned char outbuf[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned long i; + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES <= outlen; i++) { + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(out, input_plus_four_bytes, inlen + 4); + out += PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + if (outlen > i * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES) { + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(outbuf, input_plus_four_bytes, inlen + 4); + memcpy(out, outbuf, outlen - i * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES); + } +} + + +/** + * Absorb the constant pub_seed using one round of the compression function + * This initializes hash_state_seeded, which can then be reused in thash + **/ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N; i < PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + sha256_inc_init(hash_state_seeded); + sha256_inc_blocks(hash_state_seeded, block, 1); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/sha256.h b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/sha256.h new file mode 100644 index 00000000..16646e77 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/sha256.h @@ -0,0 +1,21 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_H +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_H + +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES 22 + +#include +#include + +#include "sha2.h" + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_compress_address(unsigned char *out, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/sha256avx.c b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/sha256avx.c new file mode 100644 index 00000000..fe3cb4d7 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/sha256avx.c @@ -0,0 +1,296 @@ +#include +#include +#include + +#include "sha256avx.h" + +// Transpose 8 vectors containing 32-bit values +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_transpose(u256 s[8]) { + u256 tmp0[8]; + u256 tmp1[8]; + tmp0[0] = _mm256_unpacklo_epi32(s[0], s[1]); + tmp0[1] = _mm256_unpackhi_epi32(s[0], s[1]); + tmp0[2] = _mm256_unpacklo_epi32(s[2], s[3]); + tmp0[3] = _mm256_unpackhi_epi32(s[2], s[3]); + tmp0[4] = _mm256_unpacklo_epi32(s[4], s[5]); + tmp0[5] = _mm256_unpackhi_epi32(s[4], s[5]); + tmp0[6] = _mm256_unpacklo_epi32(s[6], s[7]); + tmp0[7] = _mm256_unpackhi_epi32(s[6], s[7]); + tmp1[0] = _mm256_unpacklo_epi64(tmp0[0], tmp0[2]); + tmp1[1] = _mm256_unpackhi_epi64(tmp0[0], tmp0[2]); + tmp1[2] = _mm256_unpacklo_epi64(tmp0[1], tmp0[3]); + tmp1[3] = _mm256_unpackhi_epi64(tmp0[1], tmp0[3]); + tmp1[4] = _mm256_unpacklo_epi64(tmp0[4], tmp0[6]); + tmp1[5] = _mm256_unpackhi_epi64(tmp0[4], tmp0[6]); + tmp1[6] = _mm256_unpacklo_epi64(tmp0[5], tmp0[7]); + tmp1[7] = _mm256_unpackhi_epi64(tmp0[5], tmp0[7]); + s[0] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x20); + s[1] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x20); + s[2] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x20); + s[3] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x20); + s[4] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x31); + s[5] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x31); + s[6] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x31); + s[7] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x31); +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256_clone_statex8(sha256ctxx8 *outctx, const sha256ctxx8 *inctx) { + memcpy(outctx, inctx, sizeof(sha256ctxx8)); +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256_init8x(sha256ctxx8 *ctx) { + ctx->s[0] = _mm256_set_epi32((int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667); + ctx->s[1] = _mm256_set_epi32((int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85); + ctx->s[2] = _mm256_set_epi32((int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372); + ctx->s[3] = _mm256_set_epi32((int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a); + ctx->s[4] = _mm256_set_epi32((int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f); + ctx->s[5] = _mm256_set_epi32((int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c); + ctx->s[6] = _mm256_set_epi32((int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab); + ctx->s[7] = _mm256_set_epi32((int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19); + + ctx->datalen = 0; + ctx->msglen = 0; +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256_update8x(sha256ctxx8 *ctx, + const unsigned char *d0, + const unsigned char *d1, + const unsigned char *d2, + const unsigned char *d3, + const unsigned char *d4, + const unsigned char *d5, + const unsigned char *d6, + const unsigned char *d7, + unsigned long long len) { + size_t i = 0; + size_t bytes_to_copy; + + while (i < len) { + bytes_to_copy = (size_t)len - i; + if (bytes_to_copy > 64) { + bytes_to_copy = 64; + } + memcpy(&ctx->msgblocks[64 * 0], d0 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 1], d1 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 2], d2 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 3], d3 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 4], d4 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 5], d5 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 6], d6 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 7], d7 + i, bytes_to_copy); + ctx->datalen += (unsigned int)bytes_to_copy; + i += bytes_to_copy; + if (ctx->datalen == 64) { + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + ctx->msglen += 512; + ctx->datalen = 0; + } + } +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256_final8x(sha256ctxx8 *ctx, + unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7) { + unsigned int i, curlen; + + // Padding + if (ctx->datalen < 56) { + for (i = 0; i < 8; ++i) { + curlen = ctx->datalen; + ctx->msgblocks[64 * i + curlen++] = 0x80; + while (curlen < 64) { + ctx->msgblocks[64 * i + curlen++] = 0x00; + } + } + } else { + for (i = 0; i < 8; ++i) { + curlen = ctx->datalen; + ctx->msgblocks[64 * i + curlen++] = 0x80; + while (curlen < 64) { + ctx->msgblocks[64 * i + curlen++] = 0x00; + } + } + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + memset(ctx->msgblocks, 0, 8 * 64); + } + + // Add length of the message to each block + ctx->msglen += ctx->datalen * 8; + for (i = 0; i < 8; i++) { + ctx->msgblocks[64 * i + 63] = (unsigned char)ctx->msglen; + ctx->msgblocks[64 * i + 62] = (unsigned char)(ctx->msglen >> 8); + ctx->msgblocks[64 * i + 61] = (unsigned char)(ctx->msglen >> 16); + ctx->msgblocks[64 * i + 60] = (unsigned char)(ctx->msglen >> 24); + ctx->msgblocks[64 * i + 59] = (unsigned char)(ctx->msglen >> 32); + ctx->msgblocks[64 * i + 58] = (unsigned char)(ctx->msglen >> 40); + ctx->msgblocks[64 * i + 57] = (unsigned char)(ctx->msglen >> 48); + ctx->msgblocks[64 * i + 56] = (unsigned char)(ctx->msglen >> 56); + } + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + + // Compute final hash output + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_transpose(ctx->s); + + // Store Hash value + STORE(out0, BYTESWAP(ctx->s[0])); + STORE(out1, BYTESWAP(ctx->s[1])); + STORE(out2, BYTESWAP(ctx->s[2])); + STORE(out3, BYTESWAP(ctx->s[3])); + STORE(out4, BYTESWAP(ctx->s[4])); + STORE(out5, BYTESWAP(ctx->s[5])); + STORE(out6, BYTESWAP(ctx->s[6])); + STORE(out7, BYTESWAP(ctx->s[7])); +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256_transform8x(sha256ctxx8 *ctx, const unsigned char *data) { + u256 s[8], w[64], T0, T1; + int i; + + // Load words and transform data correctly + for (i = 0; i < 8; i++) { + w[i] = BYTESWAP(LOAD(data + 64 * i)); + w[i + 8] = BYTESWAP(LOAD(data + 32 + 64 * i)); + } + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_transpose(w); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_transpose(w + 8); + + // Initial State + s[0] = ctx->s[0]; + s[1] = ctx->s[1]; + s[2] = ctx->s[2]; + s[3] = ctx->s[3]; + s[4] = ctx->s[4]; + s[5] = ctx->s[5]; + s[6] = ctx->s[6]; + s[7] = ctx->s[7]; + + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 0, w[0]); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 1, w[1]); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 2, w[2]); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 3, w[3]); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 4, w[4]); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 5, w[5]); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 6, w[6]); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 7, w[7]); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 8, w[8]); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 9, w[9]); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 10, w[10]); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 11, w[11]); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 12, w[12]); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 13, w[13]); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 14, w[14]); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 15, w[15]); + w[16] = ADD4_32(WSIGMA1_AVX(w[14]), w[0], w[9], WSIGMA0_AVX(w[1])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 16, w[16]); + w[17] = ADD4_32(WSIGMA1_AVX(w[15]), w[1], w[10], WSIGMA0_AVX(w[2])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 17, w[17]); + w[18] = ADD4_32(WSIGMA1_AVX(w[16]), w[2], w[11], WSIGMA0_AVX(w[3])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 18, w[18]); + w[19] = ADD4_32(WSIGMA1_AVX(w[17]), w[3], w[12], WSIGMA0_AVX(w[4])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 19, w[19]); + w[20] = ADD4_32(WSIGMA1_AVX(w[18]), w[4], w[13], WSIGMA0_AVX(w[5])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 20, w[20]); + w[21] = ADD4_32(WSIGMA1_AVX(w[19]), w[5], w[14], WSIGMA0_AVX(w[6])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 21, w[21]); + w[22] = ADD4_32(WSIGMA1_AVX(w[20]), w[6], w[15], WSIGMA0_AVX(w[7])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 22, w[22]); + w[23] = ADD4_32(WSIGMA1_AVX(w[21]), w[7], w[16], WSIGMA0_AVX(w[8])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 23, w[23]); + w[24] = ADD4_32(WSIGMA1_AVX(w[22]), w[8], w[17], WSIGMA0_AVX(w[9])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 24, w[24]); + w[25] = ADD4_32(WSIGMA1_AVX(w[23]), w[9], w[18], WSIGMA0_AVX(w[10])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 25, w[25]); + w[26] = ADD4_32(WSIGMA1_AVX(w[24]), w[10], w[19], WSIGMA0_AVX(w[11])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 26, w[26]); + w[27] = ADD4_32(WSIGMA1_AVX(w[25]), w[11], w[20], WSIGMA0_AVX(w[12])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 27, w[27]); + w[28] = ADD4_32(WSIGMA1_AVX(w[26]), w[12], w[21], WSIGMA0_AVX(w[13])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 28, w[28]); + w[29] = ADD4_32(WSIGMA1_AVX(w[27]), w[13], w[22], WSIGMA0_AVX(w[14])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 29, w[29]); + w[30] = ADD4_32(WSIGMA1_AVX(w[28]), w[14], w[23], WSIGMA0_AVX(w[15])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 30, w[30]); + w[31] = ADD4_32(WSIGMA1_AVX(w[29]), w[15], w[24], WSIGMA0_AVX(w[16])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 31, w[31]); + w[32] = ADD4_32(WSIGMA1_AVX(w[30]), w[16], w[25], WSIGMA0_AVX(w[17])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 32, w[32]); + w[33] = ADD4_32(WSIGMA1_AVX(w[31]), w[17], w[26], WSIGMA0_AVX(w[18])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 33, w[33]); + w[34] = ADD4_32(WSIGMA1_AVX(w[32]), w[18], w[27], WSIGMA0_AVX(w[19])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 34, w[34]); + w[35] = ADD4_32(WSIGMA1_AVX(w[33]), w[19], w[28], WSIGMA0_AVX(w[20])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 35, w[35]); + w[36] = ADD4_32(WSIGMA1_AVX(w[34]), w[20], w[29], WSIGMA0_AVX(w[21])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 36, w[36]); + w[37] = ADD4_32(WSIGMA1_AVX(w[35]), w[21], w[30], WSIGMA0_AVX(w[22])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 37, w[37]); + w[38] = ADD4_32(WSIGMA1_AVX(w[36]), w[22], w[31], WSIGMA0_AVX(w[23])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 38, w[38]); + w[39] = ADD4_32(WSIGMA1_AVX(w[37]), w[23], w[32], WSIGMA0_AVX(w[24])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 39, w[39]); + w[40] = ADD4_32(WSIGMA1_AVX(w[38]), w[24], w[33], WSIGMA0_AVX(w[25])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 40, w[40]); + w[41] = ADD4_32(WSIGMA1_AVX(w[39]), w[25], w[34], WSIGMA0_AVX(w[26])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 41, w[41]); + w[42] = ADD4_32(WSIGMA1_AVX(w[40]), w[26], w[35], WSIGMA0_AVX(w[27])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 42, w[42]); + w[43] = ADD4_32(WSIGMA1_AVX(w[41]), w[27], w[36], WSIGMA0_AVX(w[28])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 43, w[43]); + w[44] = ADD4_32(WSIGMA1_AVX(w[42]), w[28], w[37], WSIGMA0_AVX(w[29])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 44, w[44]); + w[45] = ADD4_32(WSIGMA1_AVX(w[43]), w[29], w[38], WSIGMA0_AVX(w[30])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 45, w[45]); + w[46] = ADD4_32(WSIGMA1_AVX(w[44]), w[30], w[39], WSIGMA0_AVX(w[31])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 46, w[46]); + w[47] = ADD4_32(WSIGMA1_AVX(w[45]), w[31], w[40], WSIGMA0_AVX(w[32])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 47, w[47]); + w[48] = ADD4_32(WSIGMA1_AVX(w[46]), w[32], w[41], WSIGMA0_AVX(w[33])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 48, w[48]); + w[49] = ADD4_32(WSIGMA1_AVX(w[47]), w[33], w[42], WSIGMA0_AVX(w[34])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 49, w[49]); + w[50] = ADD4_32(WSIGMA1_AVX(w[48]), w[34], w[43], WSIGMA0_AVX(w[35])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 50, w[50]); + w[51] = ADD4_32(WSIGMA1_AVX(w[49]), w[35], w[44], WSIGMA0_AVX(w[36])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 51, w[51]); + w[52] = ADD4_32(WSIGMA1_AVX(w[50]), w[36], w[45], WSIGMA0_AVX(w[37])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 52, w[52]); + w[53] = ADD4_32(WSIGMA1_AVX(w[51]), w[37], w[46], WSIGMA0_AVX(w[38])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 53, w[53]); + w[54] = ADD4_32(WSIGMA1_AVX(w[52]), w[38], w[47], WSIGMA0_AVX(w[39])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 54, w[54]); + w[55] = ADD4_32(WSIGMA1_AVX(w[53]), w[39], w[48], WSIGMA0_AVX(w[40])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 55, w[55]); + w[56] = ADD4_32(WSIGMA1_AVX(w[54]), w[40], w[49], WSIGMA0_AVX(w[41])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 56, w[56]); + w[57] = ADD4_32(WSIGMA1_AVX(w[55]), w[41], w[50], WSIGMA0_AVX(w[42])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 57, w[57]); + w[58] = ADD4_32(WSIGMA1_AVX(w[56]), w[42], w[51], WSIGMA0_AVX(w[43])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 58, w[58]); + w[59] = ADD4_32(WSIGMA1_AVX(w[57]), w[43], w[52], WSIGMA0_AVX(w[44])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 59, w[59]); + w[60] = ADD4_32(WSIGMA1_AVX(w[58]), w[44], w[53], WSIGMA0_AVX(w[45])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 60, w[60]); + w[61] = ADD4_32(WSIGMA1_AVX(w[59]), w[45], w[54], WSIGMA0_AVX(w[46])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 61, w[61]); + w[62] = ADD4_32(WSIGMA1_AVX(w[60]), w[46], w[55], WSIGMA0_AVX(w[47])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 62, w[62]); + w[63] = ADD4_32(WSIGMA1_AVX(w[61]), w[47], w[56], WSIGMA0_AVX(w[48])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 63, w[63]); + + // Feed Forward + ctx->s[0] = ADD32(s[0], ctx->s[0]); + ctx->s[1] = ADD32(s[1], ctx->s[1]); + ctx->s[2] = ADD32(s[2], ctx->s[2]); + ctx->s[3] = ADD32(s[3], ctx->s[3]); + ctx->s[4] = ADD32(s[4], ctx->s[4]); + ctx->s[5] = ADD32(s[5], ctx->s[5]); + ctx->s[6] = ADD32(s[6], ctx->s[6]); + ctx->s[7] = ADD32(s[7], ctx->s[7]); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/sha256avx.h b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/sha256avx.h new file mode 100644 index 00000000..d2c9d867 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/sha256avx.h @@ -0,0 +1,103 @@ +#ifndef SHA256AVX_H +#define SHA256AVX_H + +#include +#include + +static const unsigned int RC[] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +#define u32 uint32_t +#define u256 __m256i + +#define XOR _mm256_xor_si256 +#define OR _mm256_or_si256 +#define AND _mm256_and_si256 +#define ADD32 _mm256_add_epi32 +#define NOT(x) _mm256_xor_si256(x, _mm256_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1)) + +#define LOAD(src) _mm256_loadu_si256((__m256i *)(src)) +#define STORE(dest,src) _mm256_storeu_si256((__m256i *)(dest),src) + +#define BYTESWAP(x) _mm256_shuffle_epi8(x, _mm256_set_epi8(0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3,0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3)) + +#define SHIFTR32(x, y) _mm256_srli_epi32(x, y) +#define SHIFTL32(x, y) _mm256_slli_epi32(x, y) + +#define ROTR32(x, y) OR(SHIFTR32(x, y), SHIFTL32(x, 32 - (y))) +#define ROTL32(x, y) OR(SHIFTL32(x, y), SHIFTR32(x, 32 - (y))) + +#define XOR3(a, b, c) XOR(XOR(a, b), c) + +#define ADD3_32(a, b, c) ADD32(ADD32(a, b), c) +#define ADD4_32(a, b, c, d) ADD32(ADD32(ADD32(a, b), c), d) +#define ADD5_32(a, b, c, d, e) ADD32(ADD32(ADD32(ADD32(a, b), c), d), e) + +#define MAJ_AVX(a, b, c) XOR3(AND(a, b), AND(a, c), AND(b, c)) +#define CH_AVX(a, b, c) XOR(AND(a, b), AND(NOT(a), c)) + +#define SIGMA1_AVX(x) XOR3(ROTR32(x, 6), ROTR32(x, 11), ROTR32(x, 25)) +#define SIGMA0_AVX(x) XOR3(ROTR32(x, 2), ROTR32(x, 13), ROTR32(x, 22)) + +#define WSIGMA1_AVX(x) XOR3(ROTR32(x, 17), ROTR32(x, 19), SHIFTR32(x, 10)) +#define WSIGMA0_AVX(x) XOR3(ROTR32(x, 7), ROTR32(x, 18), SHIFTR32(x, 3)) + +#define SHA256ROUND_AVX(a, b, c, d, e, f, g, h, rc, w) \ + T0 = ADD5_32(h, SIGMA1_AVX(e), CH_AVX(e, f, g), _mm256_set1_epi32((int)RC[rc]), w); \ + (d) = ADD32(d, T0); \ + T1 = ADD32(SIGMA0_AVX(a), MAJ_AVX(a, b, c)); \ + (h) = ADD32(T0, T1); + +typedef struct SHA256state { + u256 s[8]; + uint8_t msgblocks[8 * 64]; + unsigned int datalen; + uint64_t msglen; +} sha256ctxx8; + + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_transpose(u256 s[8]); +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256_init_frombytes_x8(sha256ctxx8 *ctx, const uint8_t *s, unsigned long long msglen); +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256_init8x(sha256ctxx8 *ctx); +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256_update8x(sha256ctxx8 *ctx, + const unsigned char *d0, + const unsigned char *d1, + const unsigned char *d2, + const unsigned char *d3, + const unsigned char *d4, + const unsigned char *d5, + const unsigned char *d6, + const unsigned char *d7, + unsigned long long len); +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256_final8x(sha256ctxx8 *ctx, + unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7); + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256_transform8x(sha256ctxx8 *ctx, const unsigned char *data); + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256_clone_statex8(sha256ctxx8 *outctx, const sha256ctxx8 *inctx); + + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/sha256x8.c b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/sha256x8.c new file mode 100644 index 00000000..072faa06 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/sha256x8.c @@ -0,0 +1,128 @@ +#include + +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_seed_statex8(sha256ctxx8 *ctx, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N; i < PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256_init8x(ctx); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256_update8x(ctx, block, block, block, block, block, block, block, block, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES); + +} + +/* This provides a wrapper around the internals of 8x parallel SHA256 */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256x8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned long long inlen) { + sha256ctxx8 ctx; + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256_init8x(&ctx); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256_update8x(&ctx, in0, in1, in2, in3, in4, in5, in6, in7, inlen); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256_final8x(&ctx, out0, out1, out2, out3, out4, out5, out6, out7); +} + +/** + * Note that inlen should be sufficiently small that it still allows for + * an array to be allocated on the stack. Typically 'in' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_mgf1x8( + unsigned char *outx8, + unsigned long outlen, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, + unsigned long inlen) { + unsigned char inbufx8[8 * ((PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES) + 4)]; + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned long i; + unsigned int j; + + memcpy(inbufx8 + 0 * (inlen + 4), in0, inlen); + memcpy(inbufx8 + 1 * (inlen + 4), in1, inlen); + memcpy(inbufx8 + 2 * (inlen + 4), in2, inlen); + memcpy(inbufx8 + 3 * (inlen + 4), in3, inlen); + memcpy(inbufx8 + 4 * (inlen + 4), in4, inlen); + memcpy(inbufx8 + 5 * (inlen + 4), in5, inlen); + memcpy(inbufx8 + 6 * (inlen + 4), in6, inlen); + memcpy(inbufx8 + 7 * (inlen + 4), in7, inlen); + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES <= outlen; i++) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ull_to_bytes(inbufx8 + inlen + j * (inlen + 4), 4, i); + } + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256x8(outx8 + 0 * outlen, + outx8 + 1 * outlen, + outx8 + 2 * outlen, + outx8 + 3 * outlen, + outx8 + 4 * outlen, + outx8 + 5 * outlen, + outx8 + 6 * outlen, + outx8 + 7 * outlen, + inbufx8 + 0 * (inlen + 4), + inbufx8 + 1 * (inlen + 4), + inbufx8 + 2 * (inlen + 4), + inbufx8 + 3 * (inlen + 4), + inbufx8 + 4 * (inlen + 4), + inbufx8 + 5 * (inlen + 4), + inbufx8 + 6 * (inlen + 4), + inbufx8 + 7 * (inlen + 4), inlen + 4); + outx8 += PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ull_to_bytes(inbufx8 + inlen + j * (inlen + 4), 4, i); + } + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256x8(outbufx8 + 0 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, + inbufx8 + 0 * (inlen + 4), + inbufx8 + 1 * (inlen + 4), + inbufx8 + 2 * (inlen + 4), + inbufx8 + 3 * (inlen + 4), + inbufx8 + 4 * (inlen + 4), + inbufx8 + 5 * (inlen + 4), + inbufx8 + 6 * (inlen + 4), + inbufx8 + 7 * (inlen + 4), inlen + 4); + + for (j = 0; j < 8; j++) { + memcpy(outx8 + j * outlen, + outbufx8 + j * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outlen - i * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/sha256x8.h b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/sha256x8.h new file mode 100644 index 00000000..12100f35 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/sha256x8.h @@ -0,0 +1,44 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256X8_H +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256X8_H + +#include "sha256avx.h" + +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N */ + +/* This provides a wrapper around the internals of 8x parallel SHA256 */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256x8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned long long inlen); + +/** + * Note that inlen should be sufficiently small that it still allows for + * an array to be allocated on the stack. Typically 'in' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_mgf1x8(unsigned char *outx8, unsigned long outlen, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, + unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_seed_statex8(sha256ctxx8 *ctx, const unsigned char *pub_seed); +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/sign.c b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/sign.c new file mode 100644 index 00000000..493c12e1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_D - 1); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, sk + 3 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N; + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_D; i++) { + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_BYTES; + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N; + unsigned char mhash[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_D; i++) { + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_BYTES; + + if (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_BYTES, sm + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/thash.h b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/thash.h new file mode 100644 index 00000000..7813c73c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_THASH_H +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/thash_sha256_robust.c b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/thash_sha256_robust.c new file mode 100644 index 00000000..09014cc9 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/thash_sha256_robust.c @@ -0,0 +1,78 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "sha2.h" +#include "sha256.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N bytes. + */ +static void thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char outbuf[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned char *bitmask = buf + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + 4; + sha256ctx sha2_state; + unsigned int i; + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_compress_address(buf + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, addr); + /* MGF1 requires us to have 4 extra bytes in 'buf' */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_mgf1(bitmask, inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, buf, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES); + + /* Retrieve precomputed state containing pub_seed */ + sha256_inc_ctx_clone(&sha2_state, &hash_state_seeded->x1); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N; i++) { + buf[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + i] = in[i] ^ bitmask[i]; + } + + sha256_inc_finalize(outbuf, &sha2_state, buf + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + 4 + 1 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N]; + thash(out, buf, in, 1, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + 4 + 2 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N]; + thash(out, buf, in, 2, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + 4 + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N]; + thash(out, buf, in, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LEN, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + 4 + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N]; + thash(out, buf, in, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_TREES, pub_seed, addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/thash_sha256_robustx8.c b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/thash_sha256_robustx8.c new file mode 100644 index 00000000..e9a1acb2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/thash_sha256_robustx8.c @@ -0,0 +1,156 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "thashx8.h" +#include "utils.h" + +/** + * 8-way parallel version of thash; takes 8x as much input and output + */ +static void thashx8(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + uint8_t *out4, + uint8_t *out5, + uint8_t *out6, + uint8_t *out7, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + const uint8_t *in4, + const uint8_t *in5, + const uint8_t *in6, + const uint8_t *in7, + unsigned int inblocks, + const uint8_t *pub_seed, + uint32_t addrx8[8 * 8], + uint8_t *bufx8, + uint8_t *bitmaskx8, + const hash_state *state_seeded) { + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned int i; + sha256ctxx8 ctx; + + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + + for (i = 0; i < 8; i++) { + memcpy(bufx8 + i * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N), + pub_seed, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_compress_address(bufx8 + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + + i * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N), + addrx8 + i * 8); + } + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_mgf1x8(bitmaskx8, inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + bufx8 + 0 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N), + bufx8 + 1 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N), + bufx8 + 2 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N), + bufx8 + 3 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N), + bufx8 + 4 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N), + bufx8 + 5 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N), + bufx8 + 6 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N), + bufx8 + 7 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N), + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + ); + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256_clone_statex8(&ctx, &state_seeded->x8); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N; i++) { + bufx8[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + i + + 0 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N)] = + in0[i] ^ bitmaskx8[i + 0 * (inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + i + + 1 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N)] = + in1[i] ^ bitmaskx8[i + 1 * (inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + i + + 2 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N)] = + in2[i] ^ bitmaskx8[i + 2 * (inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + i + + 3 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N)] = + in3[i] ^ bitmaskx8[i + 3 * (inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + i + + 4 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N)] = + in4[i] ^ bitmaskx8[i + 4 * (inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + i + + 5 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N)] = + in5[i] ^ bitmaskx8[i + 5 * (inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + i + + 6 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N)] = + in6[i] ^ bitmaskx8[i + 6 * (inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + i + + 7 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N)] = + in7[i] ^ bitmaskx8[i + 7 * (inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N)]; + } + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256_update8x(&ctx, + bufx8 + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + 0 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + 1 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + 2 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + 3 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + 4 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + 5 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + 6 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + 7 * (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N), + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_sha256_final8x(&ctx, + outbufx8 + 0 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES); + + memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); +} + +#define thash_size_variant(name, size) \ + void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_thashx8_##name(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + unsigned char *out4, \ + unsigned char *out5, \ + unsigned char *out6, \ + unsigned char *out7, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *in4, \ + const unsigned char *in5, \ + const unsigned char *in6, \ + const unsigned char *in7, \ + const unsigned char *pub_seed, \ + uint32_t addrx8[8*8], \ + const hash_state *state_seeded) { \ + const unsigned int inblocks = (size); \ + uint8_t bufx8[8*(PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_SHA256_ADDR_BYTES + (size)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N)]; \ + uint8_t bitmaskx8[8*((size) * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N)]; \ + thashx8(out0, out1, out2, out3, out4, out5, out6, out7, \ + in0, in1, in2, in3, in4, in5, in6, in7, inblocks, \ + pub_seed, addrx8, bufx8, bitmaskx8, state_seeded); \ + } + +thash_size_variant(1, 1) +thash_size_variant(2, 2) +thash_size_variant(WOTS_LEN, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LEN) +thash_size_variant(FORS_TREES, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_TREES) + +#undef thash_size_variant diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/thashx8.h b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/thashx8.h new file mode 100644 index 00000000..40b4bada --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/thashx8.h @@ -0,0 +1,39 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_THASHX8_H +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_THASHX8_H + +#include + +#include "hash_state.h" +#include "sha256avx.h" + + +#define thashx8_variant(name) \ + void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_thashx8_##name( \ + unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + unsigned char *out4, \ + unsigned char *out5, \ + unsigned char *out6, \ + unsigned char *out7, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *in4, \ + const unsigned char *in5, \ + const unsigned char *in6, \ + const unsigned char *in7, \ + const unsigned char *pub_seed, \ + uint32_t addrx8[8*8], \ + const hash_state *state_seeded) + + +thashx8_variant(1); +thashx8_variant(2); +thashx8_variant(WOTS_LEN); +thashx8_variant(FORS_TREES); + +#undef thashx8_variant +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/utils.c b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/utils.c new file mode 100644 index 00000000..1730ace5 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, leaf, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + memcpy(buffer + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, auth_path, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_thash_2( + buffer + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + } else { + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, auth_path, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/utils.h b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/utils.h new file mode 100644 index 00000000..fa75f953 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_UTILS_H +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/utilsx8.c b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/utilsx8.c new file mode 100644 index 00000000..6688c669 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/utilsx8.c @@ -0,0 +1,172 @@ +#include + +#include "address.h" +#include "params.h" +#include "thashx8.h" +#include "utils.h" + +#include "utilsx8.h" + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void treehashx8(unsigned char *rootx8, unsigned char *auth_pathx8, + unsigned char *stackx8, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t leaf_idx[8], uint32_t idx_offset[8], + uint32_t tree_height, + void (*gen_leafx8)( + unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + unsigned char * /* leaf4 */, + unsigned char * /* leaf5 */, + unsigned char * /* leaf6 */, + unsigned char * /* leaf7 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + uint32_t /* addr_idx4 */, + uint32_t /* addr_idx5 */, + uint32_t /* addr_idx6 */, + uint32_t /* addr_idx7 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx8[8 * 8], + const hash_state *state_seeded) { + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + unsigned int j; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leafx8(stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + sk_seed, pub_seed, + idx + idx_offset[0], + idx + idx_offset[1], + idx + idx_offset[2], + idx + idx_offset[3], + idx + idx_offset[4], + idx + idx_offset[5], + idx + idx_offset[6], + idx + idx_offset[7], + tree_addrx8, + state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + for (j = 0; j < 8; j++) { + if ((leaf_idx[j] ^ 0x1) == idx) { + memcpy(auth_pathx8 + j * tree_height * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + (offset - 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + } + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_height(tree_addrx8 + j * 8, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_tree_index(tree_addrx8 + j * 8, + tree_idx + (idx_offset[j] >> (heights[offset - 1] + 1))); + } + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_thashx8_2(stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + pub_seed, tree_addrx8, state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + for (j = 0; j < 8; j++) { + if (((leaf_idx[j] >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_pathx8 + j * tree_height * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + heights[offset - 1]*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N + (offset - 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + } + } + } + } + + for (j = 0; j < 8; j++) { + memcpy(rootx8 + j * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + } +} + +/* The wrappers below ensure we used fixed-size buffers on the stack (no VLAs) */ + + +#define treehashx8_variant(name, size) \ + void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_treehashx8_##name( \ + unsigned char *rootx8, unsigned char *auth_pathx8, \ + const unsigned char *sk_seed, const unsigned char *pub_seed, \ + const uint32_t leaf_idx[8], uint32_t idx_offset[8], \ + void (*gen_leafx8)( \ + unsigned char* /* leaf0 */, \ + unsigned char* /* leaf1 */, \ + unsigned char* /* leaf2 */, \ + unsigned char* /* leaf3 */, \ + unsigned char* /* leaf4 */, \ + unsigned char* /* leaf5 */, \ + unsigned char* /* leaf6 */, \ + unsigned char* /* leaf7 */, \ + const unsigned char* /* sk_seed */, \ + const unsigned char* /* pub_seed */, \ + uint32_t /* addr_idx0 */, \ + uint32_t /* addr_idx1 */, \ + uint32_t /* addr_idx2 */, \ + uint32_t /* addr_idx3 */, \ + uint32_t /* addr_idx4 */, \ + uint32_t /* addr_idx5 */, \ + uint32_t /* addr_idx6 */, \ + uint32_t /* addr_idx7 */, \ + const uint32_t[8] /* tree_addr */, \ + const hash_state* /* state_seeded */), \ + uint32_t tree_addrx8[8*8], \ + const hash_state *state_seeded) \ + { \ + const uint32_t tree_height = (size); \ + unsigned char stackx8[8*((size) + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N]; \ + unsigned int heights[(size) + 1]; \ + treehashx8(rootx8, auth_pathx8, stackx8, heights, sk_seed, pub_seed, \ + leaf_idx, idx_offset, tree_height, gen_leafx8, tree_addrx8, state_seeded); \ + } + +treehashx8_variant(FORS_HEIGHT, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_FORS_HEIGHT) + +#undef treehashx8_variant diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/utilsx8.h b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/utilsx8.h new file mode 100644 index 00000000..f94033d6 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/utilsx8.h @@ -0,0 +1,46 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_UTILSX8_H +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_UTILSX8_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ + +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_treehashx8_FORS_HEIGHT( + unsigned char *rootx8, unsigned char *auth_pathx8, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t leaf_idx[8], uint32_t idx_offset[8], + void (*gen_leafx8)( + unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + unsigned char * /* leaf4 */, + unsigned char * /* leaf5 */, + unsigned char * /* leaf6 */, + unsigned char * /* leaf7 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + uint32_t /* addr_idx4 */, + uint32_t /* addr_idx5 */, + uint32_t /* addr_idx6 */, + uint32_t /* addr_idx7 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx8[8 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/wots.c b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/wots.c new file mode 100644 index 00000000..7dd41319 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/wots.c @@ -0,0 +1,240 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "hashx8.h" +#include "params.h" +#include "thash.h" +#include "thashx8.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], const hash_state *state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_prf_addr(sk, sk_seed, wots_addr, state_seeded); +} + +/** + * 8-way parallel version of wots_gen_sk; expects 8x as much space in sk + */ +static void wots_gen_skx8(unsigned char *skx8, const unsigned char *sk_seed, + uint32_t wots_addrx8[8 * 8]) { + unsigned int j; + + /* Make sure that the hash address is actually zeroed. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_hash_addr(wots_addrx8 + j * 8, 0); + } + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_prf_addrx8(skx8 + 0 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + skx8 + 1 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + skx8 + 2 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + skx8 + 3 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + skx8 + 4 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + skx8 + 5 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + skx8 + 6 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + skx8 + 7 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + sk_seed, wots_addrx8); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_W; i++) { + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_thash_1(out, out, pub_seed, addr, state_seeded); + } +} + +/** + * 8-way parallel version of gen_chain; expects 8x as much space in out, and + * 8x as much space in inx8. Assumes start and step identical across chains. + */ +static void gen_chainx8(unsigned char *outx8, const unsigned char *inx8, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addrx8[8 * 8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + /* Initialize outx8 with the value at position 'start'. */ + memcpy(outx8, inx8, 8 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_W; i++) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_hash_addr(addrx8 + j * 8, i); + } + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_thashx8_1(outx8 + 0 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + outx8 + 1 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + outx8 + 2 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + outx8 + 3 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + outx8 + 4 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + outx8 + 5 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + outx8 + 6 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + outx8 + 7 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + outx8 + 0 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + outx8 + 1 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + outx8 + 2 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + outx8 + 3 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + outx8 + 4 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + outx8 + 5 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + outx8 + 6 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + outx8 + 7 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + pub_seed, addrx8, state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { + int in = 0; + int out = 0; + unsigned char total = 0; + int bits = 0; + int consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LOGW; + output[out] = (unsigned int)(total >> bits) & (PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_W - 1); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_wots_gen_pk(unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + uint32_t addrx8[8 * 8]; + unsigned char pkbuf[8 * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N]; + + for (j = 0; j < 8; j++) { + memcpy(addrx8 + j * 8, addr, sizeof(uint32_t) * 8); + } + + /* The last iteration typically does not have complete set of 4 chains, + but because we use pkbuf, this is not an issue -- we still do as many + in parallel as possible. */ + for (i = 0; i < ((PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LEN + 7) & ~0x7); i += 8) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_chain_addr(addrx8 + j * 8, i + j); + } + wots_gen_skx8(pkbuf, sk_seed, addrx8); + gen_chainx8(pkbuf, pkbuf, 0, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_W - 1, pub_seed, addrx8, state_seeded); + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LEN) { + memcpy(pk + (i + j)*PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, pkbuf + j * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N); + } + } + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_wots_sign(unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, sk_seed, addr, state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, sig + i * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, 0, lengths[i], pub_seed, addr, state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_wots_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, sig + i * PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_N, + lengths[i], PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_W - 1 - lengths[i], pub_seed, addr, state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/wots.h b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/wots.h new file mode 100644 index 00000000..d6e440ba --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/avx2/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_H +#define PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_AVX2_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/LICENSE b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..e38bc824 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-sha256-256f-robust_clean.lib +OBJECTS=address.obj wots.obj utils.obj fors.obj sign.obj hash_sha256.obj thash_sha256_robust.obj sha256.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/address.c b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/address.c new file mode 100644 index 00000000..8bae3cf3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/address.h b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/address.h new file mode 100644 index 00000000..7add74b4 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ADDRESS_H +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/api.h b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/api.h new file mode 100644 index 00000000..4380b40d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_API_H +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_CRYPTO_SECRETKEYBYTES 128 +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_CRYPTO_PUBLICKEYBYTES 64 +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_CRYPTO_BYTES 49216 +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_CRYPTO_SEEDBYTES 96 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/fors.c b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/fors.c new file mode 100644 index 00000000..4852f81e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/fors.c @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "thash.h" +#include "utils.h" + +static void fors_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t fors_leaf_addr[8], const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_prf_addr( + sk, sk_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_thash_1( + leaf, sk, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t fors_tree_addr[8], + const hash_state *hash_state_seeded) { + uint32_t fors_leaf_addr[8] = {0}; + + /* Only copy the parts that must be kept in fors_leaf_addr. */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_copy_keypair_addr( + fors_leaf_addr, fors_tree_addr); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_type( + fors_leaf_addr, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_tree_index( + fors_leaf_addr, addr_idx); + + fors_gen_sk(leaf, sk_seed, fors_leaf_addr, hash_state_seeded); + fors_sk_to_leaf(leaf, leaf, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_copy_keypair_addr( + fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_copy_keypair_addr( + fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_type( + fors_tree_addr, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_type( + fors_pk_addr, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_tree_height( + fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_tree_index( + fors_tree_addr, indices[i] + idx_offset); + + /* Include the secret key part that produces the selected leaf node. */ + fors_gen_sk(sig, sk_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N; + + /* Compute the authentication path for this leaf node. */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_treehash_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, sig, sk_seed, pub_seed, + indices[i], idx_offset, fors_gen_leaf, fors_tree_addr, + hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_thash_FORS_TREES( + pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_compute_root(roots + i * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, leaf, indices[i], idx_offset, sig, + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_HEIGHT, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/fors.h b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/fors.h new file mode 100644 index 00000000..a0965162 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_H +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/hash.h b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/hash.h new file mode 100644 index 00000000..daaf116c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_HASH_H +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/hash_sha256.c b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/hash_sha256.c new file mode 100644 index 00000000..ccd81976 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/hash_sha256.c @@ -0,0 +1,162 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "sha2.h" +#include "sha256.h" + +/* For SHA256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed) { + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_seed_state(hash_state_seeded, pub_seed); + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* Clean up hash state */ +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_destroy_hash_function(hash_state *hash_state_seeded) { + sha256_inc_ctx_release(hash_state_seeded); +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N bytes and an address + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_ADDR_BYTES]; + unsigned char outbuf[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_OUTPUT_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_compress_address(buf + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, addr); + + sha256(outbuf, buf, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_ADDR_BYTES); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed as a key + * for HMAC, and an optional randomization value prefixed to the message. + * This requires m to have at least PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N space + * available in front of the pointer, i.e. before the message to use for the + * prefix. This is necessary to prevent having to move the message around (and + * allocate memory for it). + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_OUTPUT_BYTES]; + sha256ctx state; + int i; + + /* This implements HMAC-SHA256 */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N; i++) { + buf[i] = 0x36 ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, 0x36, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N); + + sha256_inc_init(&state); + sha256_inc_blocks(&state, buf, 1); + + memcpy(buf, optrand, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N); + + /* If optrand + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N + mlen < PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_BLOCK_BYTES) { + memcpy(buf + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, m, mlen); + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_BLOCK_BYTES, &state, + buf, mlen + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(buf + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, m, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N); + sha256_inc_blocks(&state, buf, 1); + + m += PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N; + mlen -= PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N; + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_BLOCK_BYTES, &state, m, mlen); + } + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N; i++) { + buf[i] = 0x5c ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, 0x5c, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N); + + sha256(buf, buf, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_OUTPUT_BYTES); + memcpy(R, buf, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_TREE_BITS (PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_TREE_HEIGHT * (PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_D - 1)) +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_TREE_BYTES ((PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_LEAF_BITS PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_LEAF_BYTES ((PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_DGST_BYTES (PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_TREE_BYTES + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_LEAF_BYTES) + + unsigned char seed[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_OUTPUT_BYTES + 4]; + + /* Round to nearest multiple of PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_BLOCK_BYTES */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_INBLOCKS (((PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_PK_BYTES + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_BLOCK_BYTES - 1) & \ + -PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_BLOCK_BYTES) / PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_BLOCK_BYTES) + unsigned char inbuf[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_BLOCK_BYTES]; + + unsigned char buf[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_DGST_BYTES]; + unsigned char *bufp = buf; + sha256ctx state; + + sha256_inc_init(&state); + + memcpy(inbuf, R, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N); + memcpy(inbuf + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, pk, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_PK_BYTES); + + /* If R + pk + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_PK_BYTES + mlen < PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_BLOCK_BYTES) { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_PK_BYTES, m, mlen); + sha256_inc_finalize(seed, &state, inbuf, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_PK_BYTES + mlen); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_PK_BYTES, m, + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N - PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_PK_BYTES); + sha256_inc_blocks(&state, inbuf, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_INBLOCKS); + + m += PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N - PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_PK_BYTES; + mlen -= PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N - PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_PK_BYTES; + sha256_inc_finalize(seed, &state, m, mlen); + } + + /* By doing this in two steps, we prevent hashing the message twice; + otherwise each iteration in MGF1 would hash the message again. */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_mgf1(bufp, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_DGST_BYTES, seed, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_OUTPUT_BYTES); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_bytes_to_ull(bufp, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/hash_state.h b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/hash_state.h new file mode 100644 index 00000000..19fc335e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/hash_state.h @@ -0,0 +1,26 @@ +#ifndef SPX_HASH_STATE_H +#define SPX_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + */ + +#include "sha2.h" +#define hash_state sha256ctx + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/params.h b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/params.h new file mode 100644 index 00000000..0f13fb0c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_PARAMS_H +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N 32 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FULL_HEIGHT 68 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_D 17 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_HEIGHT 10 +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_TREES 30 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N / PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_LEN (PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_LEN1 + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_BYTES (PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_PK_BYTES PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_TREE_HEIGHT (PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FULL_HEIGHT / PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_BYTES ((PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_PK_BYTES PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_BYTES (PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_BYTES + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_D * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FULL_HEIGHT * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_PK_BYTES (2 * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SK_BYTES (2 * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/sha256.c b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/sha256.c new file mode 100644 index 00000000..92228374 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/sha256.c @@ -0,0 +1,71 @@ +/* Based on the public domain implementation in + * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html + * by D. J. Bernstein */ + +#include +#include +#include + +#include "sha2.h" +#include "sha256.h" +#include "utils.h" + +/* + * Compresses an address to a 22-byte sequence. + * This reduces the number of required SHA256 compression calls, as the last + * block of input is padded with at least 65 bits. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_compress_address(unsigned char *out, const uint32_t addr[8]) { + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ull_to_bytes(out, 1, addr[0]); /* drop 3 bytes of the layer field */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ull_to_bytes(out + 1, 4, addr[2]); /* drop the highest tree address word */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ull_to_bytes(out + 5, 4, addr[3]); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ull_to_bytes(out + 9, 1, addr[4]); /* drop 3 bytes of the type field */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ull_to_bytes(out + 10, 4, addr[5]); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ull_to_bytes(out + 14, 4, addr[6]); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ull_to_bytes(out + 18, 4, addr[7]); +} + +/** + * Requires 'input_plus_four_bytes' to have 'inlen' + 4 bytes, so that the last + * four bytes can be used for the counter. Typically 'input' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen) { + unsigned char outbuf[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_OUTPUT_BYTES]; + unsigned long i; + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_OUTPUT_BYTES <= outlen; i++) { + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(out, input_plus_four_bytes, inlen + 4); + out += PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + if (outlen > i * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_OUTPUT_BYTES) { + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(outbuf, input_plus_four_bytes, inlen + 4); + memcpy(out, outbuf, outlen - i * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_OUTPUT_BYTES); + } +} + + +/** + * Absorb the constant pub_seed using one round of the compression function + * This initializes hash_state_seeded, which can then be reused in thash + **/ +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N; i < PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + sha256_inc_init(hash_state_seeded); + sha256_inc_blocks(hash_state_seeded, block, 1); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/sha256.h b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/sha256.h new file mode 100644 index 00000000..52234c29 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/sha256.h @@ -0,0 +1,21 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_H +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_H + +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N */ +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_ADDR_BYTES 22 + +#include +#include + +#include "sha2.h" + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_compress_address(unsigned char *out, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/sign.c b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/sign.c new file mode 100644 index 00000000..a419565f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_D - 1); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, sk + 3 * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N); + + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N; + + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_BYTES; + + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N; + unsigned char mhash[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_BYTES; + + if (PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_BYTES, sm + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/thash.h b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/thash.h new file mode 100644 index 00000000..561e6510 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_THASH_H +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/thash_sha256_robust.c b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/thash_sha256_robust.c new file mode 100644 index 00000000..3e33b1ff --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/thash_sha256_robust.c @@ -0,0 +1,82 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "sha2.h" +#include "sha256.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N bytes. + */ +static void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char outbuf[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_OUTPUT_BYTES]; + unsigned char *bitmask = buf + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_ADDR_BYTES + 4; + sha256ctx sha2_state; + unsigned int i; + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_compress_address(buf + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, addr); + /* MGF1 requires us to have 4 extra bytes in 'buf' */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_mgf1(bitmask, inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, buf, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_ADDR_BYTES); + + /* Retrieve precomputed state containing pub_seed */ + sha256_inc_ctx_clone(&sha2_state, hash_state_seeded); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N; i++) { + buf[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_ADDR_BYTES + i] = in[i] ^ bitmask[i]; + } + + sha256_inc_finalize(outbuf, &sha2_state, buf + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_ADDR_BYTES + 4 + 1 * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_thash( + out, buf, in, 1, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_ADDR_BYTES + 4 + 2 * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_thash( + out, buf, in, 2, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_ADDR_BYTES + 4 + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_LEN, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_SHA256_ADDR_BYTES + 4 + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_TREES, pub_seed, addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/utils.c b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/utils.c new file mode 100644 index 00000000..28fff858 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, leaf, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N); + memcpy(buffer + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_thash_2( + buffer + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N); + } else { + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/utils.h b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/utils.h new file mode 100644 index 00000000..3e263018 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_UTILS_H +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/wots.c b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/wots.c new file mode 100644 index 00000000..3f0188e9 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/wots.c @@ -0,0 +1,167 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], + const hash_state *hash_state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_prf_addr(sk, sk_seed, wots_addr, hash_state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_W; i++) { + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_thash_1( + out, out, pub_seed, addr, hash_state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const size_t out_len, + const unsigned char *input) { + size_t in = 0; + size_t out = 0; + unsigned char total = 0; + unsigned int bits = 0; + size_t consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_LOGW; + output[out] = (unsigned int)((total >> bits) & (PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_W - 1)); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, + const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_ull_to_bytes( + csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(pk + i * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, pk + i * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, + 0, PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_W - 1, pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, 0, lengths[i], pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_N, + lengths[i], PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_W - 1 - lengths[i], pub_seed, addr, + hash_state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/wots.h b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/wots.h new file mode 100644 index 00000000..284b8f15 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-robust/clean/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_H +#define PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256FROBUST_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/META.yml b/crypto_sign/sphincs/sphincs-sha256-256f-simple/META.yml new file mode 100644 index 00000000..dada1ee3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/META.yml @@ -0,0 +1,35 @@ +name: SPHINCS+ +type: signature +claimed-nist-level: 5 +length-public-key: 64 +length-secret-key: 128 +length-signature: 49216 +testvectors-sha256: b4755edf8351c51225921af38a724d2bd9ff9f3afe4ae2abbc3a59763ecc897d +nistkat-sha256: 88fa150041ce9c305a971cef8ec444881afc14c4590637fa4b91c1deb15bb215 +principal-submitters: + - Andreas Hülsing +auxiliary-submitters: + - Jean-Philippe Aumasson + - Daniel J. Bernstein, + - Christoph Dobraunig + - Maria Eichlseder + - Scott Fluhrer + - Stefan-Lukas Gazdag + - Panos Kampanakis + - Stefan Kölbl + - Tanja Lange + - Martin M. Lauridsen + - Florian Mendel + - Ruben Niederhagen + - Christian Rechberger + - Joost Rijneveld + - Peter Schwabe +implementations: + - name: clean + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + - name: avx2 + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + supported_platforms: + - architecture: x86_64 + required_flags: + - avx2 diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/LICENSE b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/Makefile.Microsoft_nmake new file mode 100644 index 00000000..769d1e94 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-sha256-256f-simple_avx2.lib +OBJECTS=address.obj wots.obj utils.obj utilsx8.obj sha256avx.obj sha256x8.obj fors.obj sign.obj hash_sha256.obj thash_sha256_simple.obj hash_sha256x8.obj thash_sha256_simplex8.obj sha256.obj + +CFLAGS=/nologo /arch:AVX2 /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/address.c b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/address.c new file mode 100644 index 00000000..9d58797e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/address.h b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/address.h new file mode 100644 index 00000000..648b05e1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDRESS_H +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/api.h b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/api.h new file mode 100644 index 00000000..1d7945c4 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_API_H +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES 128 +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES 64 +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_CRYPTO_BYTES 49216 +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_CRYPTO_SEEDBYTES 96 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/fors.c b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/fors.c new file mode 100644 index 00000000..13e15ca2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/fors.c @@ -0,0 +1,240 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hashx8.h" +#include "thash.h" +#include "thashx8.h" +#include "utils.h" +#include "utilsx8.h" + +static void fors_gen_skx8(unsigned char *sk0, + unsigned char *sk1, + unsigned char *sk2, + unsigned char *sk3, + unsigned char *sk4, + unsigned char *sk5, + unsigned char *sk6, + unsigned char *sk7, const unsigned char *sk_seed, + uint32_t fors_leaf_addrx8[8 * 8]) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_prf_addrx8(sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7, + sk_seed, fors_leaf_addrx8); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thash_1(leaf, sk, pub_seed, fors_leaf_addr, state_seeded); +} + +static void fors_sk_to_leafx8(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + unsigned char *leaf4, + unsigned char *leaf5, + unsigned char *leaf6, + unsigned char *leaf7, + const unsigned char *sk0, + const unsigned char *sk1, + const unsigned char *sk2, + const unsigned char *sk3, + const unsigned char *sk4, + const unsigned char *sk5, + const unsigned char *sk6, + const unsigned char *sk7, + const unsigned char *pub_seed, + uint32_t fors_leaf_addrx8[8 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thashx8_1(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7, + pub_seed, fors_leaf_addrx8, state_seeded); +} + +static void fors_gen_leafx8(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + unsigned char *leaf4, + unsigned char *leaf5, + unsigned char *leaf6, + unsigned char *leaf7, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx0, + uint32_t addr_idx1, + uint32_t addr_idx2, + uint32_t addr_idx3, + uint32_t addr_idx4, + uint32_t addr_idx5, + uint32_t addr_idx6, + uint32_t addr_idx7, + const uint32_t fors_tree_addr[8], + const hash_state *state_seeded) { + uint32_t fors_leaf_addrx8[8 * 8] = {0}; + unsigned int j; + + /* Only copy the parts that must be kept in fors_leaf_addrx8. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_copy_keypair_addr(fors_leaf_addrx8 + j * 8, fors_tree_addr); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_type(fors_leaf_addrx8 + j * 8, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 0 * 8, addr_idx0); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 1 * 8, addr_idx1); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 2 * 8, addr_idx2); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 3 * 8, addr_idx3); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 4 * 8, addr_idx4); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 5 * 8, addr_idx5); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 6 * 8, addr_idx6); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 7 * 8, addr_idx7); + + fors_gen_skx8(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + sk_seed, fors_leaf_addrx8); + fors_sk_to_leafx8(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + pub_seed, fors_leaf_addrx8, state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + /* Round up to multiple of 8 to prevent out-of-bounds for x8 parallelism */ + uint32_t indices[(PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_TREES + 7) & ~7] = {0}; + unsigned char roots[((PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_TREES + 7) & ~7) * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N]; + /* Sign to a buffer, since we may not have a nice multiple of 8 and would + otherwise overrun the signature. */ + unsigned char sigbufx8[8 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N * (1 + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_HEIGHT)]; + uint32_t fors_tree_addrx8[8 * 8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset[8] = {0}; + unsigned int i, j; + + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_copy_keypair_addr(fors_tree_addrx8 + j * 8, fors_addr); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_type(fors_tree_addrx8 + j * 8, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < ((PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_TREES + 7) & ~0x7); i += 8) { + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_TREES) { + idx_offset[j] = (i + j) * (1 << PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_height(fors_tree_addrx8 + j * 8, 0); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_index(fors_tree_addrx8 + j * 8, + indices[i + j] + idx_offset[j]); + } + } + + /* Include the secret key part that produces the selected leaf nodes. */ + fors_gen_skx8(sigbufx8 + 0 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + sigbufx8 + 1 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + sigbufx8 + 2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + sigbufx8 + 3 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + sigbufx8 + 4 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + sigbufx8 + 5 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + sigbufx8 + 6 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + sigbufx8 + 7 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + sk_seed, fors_tree_addrx8); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_treehashx8_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, sigbufx8 + 8 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, sk_seed, pub_seed, + &indices[i], idx_offset, fors_gen_leafx8, fors_tree_addrx8, + state_seeded); + + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_TREES) { + memcpy(sig, sigbufx8 + j * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + memcpy(sig + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + sigbufx8 + 8 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + j * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_HEIGHT, + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_HEIGHT); + sig += PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N * (1 + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_HEIGHT); + } + } + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_fors_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_compute_root(roots + i * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, leaf, indices[i], idx_offset, + sig, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_HEIGHT, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/fors.h b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/fors.h new file mode 100644 index 00000000..cce6895b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_H +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/hash.h b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/hash.h new file mode 100644 index 00000000..0e007773 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_HASH_H +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/hash_sha256.c b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/hash_sha256.c new file mode 100644 index 00000000..230f8fc7 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/hash_sha256.c @@ -0,0 +1,166 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "sha2.h" +#include "sha256.h" +#include "sha256x8.h" + +/** + * Initializes the hash function states + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_seed_state(&hash_state_seeded->x1, pub_seed); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_seed_statex8(&hash_state_seeded->x8, pub_seed); + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/** + * Cleans up the hash function states + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_destroy_hash_function(hash_state *hash_state_seeded) { + sha256_inc_ctx_release(&hash_state_seeded->x1); +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N bytes and an address + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_prf_addr(unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES]; + unsigned char outbuf[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_compress_address(buf + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, addr); + + sha256(outbuf, buf, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed as a key + * for HMAC, and an optional randomization value prefixed to the message. + * This requires m to have at least PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N space + * available in front of the pointer, i.e. before the message to use for the + * prefix. This is necessary to prevent having to move the message around (and + * allocate memory for it). + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + sha256ctx state; + int i; + + /* This implements HMAC-SHA256 */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N; i++) { + buf[i] = 0x36 ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, 0x36, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + + sha256_inc_init(&state); + sha256_inc_blocks(&state, buf, 1); + + memcpy(buf, optrand, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + + /* If optrand + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + mlen < PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES) { + memcpy(buf + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, m, mlen); + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES, &state, + buf, mlen + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(buf + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, m, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + sha256_inc_blocks(&state, buf, 1); + + m += PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N; + mlen -= PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N; + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES, &state, m, mlen); + } + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N; i++) { + buf[i] = 0x5c ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, 0x5c, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + + sha256(buf, buf, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + memcpy(R, buf, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_TREE_BITS (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_TREE_HEIGHT * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_D - 1)) +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_TREE_BYTES ((PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_LEAF_BITS PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_LEAF_BYTES ((PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_DGST_BYTES (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_TREE_BYTES + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_LEAF_BYTES) + + unsigned char seed[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES + 4]; + + /* Round to nearest multiple of PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_INBLOCKS (((PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_PK_BYTES + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES - 1) & \ + -PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES) / PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES) + unsigned char inbuf[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES]; + + unsigned char buf[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_DGST_BYTES]; + unsigned char *bufp = buf; + sha256ctx state; + + sha256_inc_init(&state); + + memcpy(inbuf, R, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + memcpy(inbuf + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, pk, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_PK_BYTES); + + /* If R + pk + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_PK_BYTES + mlen < PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES) { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_PK_BYTES, m, mlen); + sha256_inc_finalize(seed, &state, inbuf, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_PK_BYTES + mlen); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_PK_BYTES, m, + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N - PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_PK_BYTES); + sha256_inc_blocks(&state, inbuf, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_INBLOCKS); + + m += PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N - PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_PK_BYTES; + mlen -= PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N - PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_PK_BYTES; + sha256_inc_finalize(seed, &state, m, mlen); + } + + /* By doing this in two steps, we prevent hashing the message twice; + otherwise each iteration in MGF1 would hash the message again. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_mgf1(bufp, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_DGST_BYTES, seed, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_bytes_to_ull(bufp, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/hash_sha256x8.c b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/hash_sha256x8.c new file mode 100644 index 00000000..bbd1fda0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/hash_sha256x8.c @@ -0,0 +1,61 @@ +#include +#include + +#include "address.h" +#include "hashx8.h" +#include "params.h" +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "utils.h" + +/* + * 8-way parallel version of prf_addr; takes 8x as much input and output + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_prf_addrx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *key, + const uint32_t addrx8[8 * 8]) { + unsigned char bufx8[8 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES)]; + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + unsigned int j; + + for (j = 0; j < 8; j++) { + memcpy(bufx8 + j * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES), key, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_compress_address(bufx8 + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + j * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES), + addrx8 + j * 8); + } + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256x8(outbufx8 + 0 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + bufx8 + 0 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 1 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 2 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 3 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 4 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 5 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 6 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 7 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES), + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES); + + memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/hash_state.h b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/hash_state.h new file mode 100644 index 00000000..eb72f064 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/hash_state.h @@ -0,0 +1,33 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_HASH_STATE_H +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * We use a struct to differentiate between the x1 and x8 variants of SHA256. + */ + +#include "sha2.h" +#include "sha256avx.h" + +typedef struct { + sha256ctx x1; + sha256ctxx8 x8; +} hash_state; + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/hashx8.h b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/hashx8.h new file mode 100644 index 00000000..d7d629fe --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/hashx8.h @@ -0,0 +1,19 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_HASHX8_H +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_HASHX8_H + +#include + +#include "params.h" + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_prf_addrx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *key, + const uint32_t addrx8[8 * 8]); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/params.h b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/params.h new file mode 100644 index 00000000..68350c37 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_PARAMS_H +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N 32 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FULL_HEIGHT 68 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_D 17 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_HEIGHT 10 +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_TREES 30 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N / PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LEN (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LEN1 + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_BYTES (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_PK_BYTES PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_TREE_HEIGHT (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FULL_HEIGHT / PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_BYTES ((PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_PK_BYTES PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_BYTES (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_BYTES + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_D * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FULL_HEIGHT * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_PK_BYTES (2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SK_BYTES (2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/sha256.c b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/sha256.c new file mode 100644 index 00000000..4020f004 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/sha256.c @@ -0,0 +1,71 @@ +/* Based on the public domain implementation in + * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html + * by D. J. Bernstein */ + +#include +#include +#include + +#include "sha2.h" +#include "sha256.h" +#include "utils.h" + +/* + * Compresses an address to a 22-byte sequence. + * This reduces the number of required SHA256 compression calls, as the last + * block of input is padded with at least 65 bits. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_compress_address(unsigned char *out, const uint32_t addr[8]) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ull_to_bytes(out, 1, addr[0]); /* drop 3 bytes of the layer field */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ull_to_bytes(out + 1, 4, addr[2]); /* drop the highest tree address word */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ull_to_bytes(out + 5, 4, addr[3]); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ull_to_bytes(out + 9, 1, addr[4]); /* drop 3 bytes of the type field */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ull_to_bytes(out + 10, 4, addr[5]); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ull_to_bytes(out + 14, 4, addr[6]); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ull_to_bytes(out + 18, 4, addr[7]); +} + +/** + * Requires 'input_plus_four_bytes' to have 'inlen' + 4 bytes, so that the last + * four bytes can be used for the counter. Typically 'input' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen) { + unsigned char outbuf[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + unsigned long i; + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES <= outlen; i++) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(out, input_plus_four_bytes, inlen + 4); + out += PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + if (outlen > i * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(outbuf, input_plus_four_bytes, inlen + 4); + memcpy(out, outbuf, outlen - i * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + } +} + + +/** + * Absorb the constant pub_seed using one round of the compression function + * This initializes hash_state_seeded, which can then be reused in thash + **/ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N; i < PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + sha256_inc_init(hash_state_seeded); + sha256_inc_blocks(hash_state_seeded, block, 1); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/sha256.h b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/sha256.h new file mode 100644 index 00000000..f78d5384 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/sha256.h @@ -0,0 +1,21 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_H +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_H + +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES 22 + +#include +#include + +#include "sha2.h" + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_compress_address(unsigned char *out, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/sha256avx.c b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/sha256avx.c new file mode 100644 index 00000000..72cbf7ad --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/sha256avx.c @@ -0,0 +1,296 @@ +#include +#include +#include + +#include "sha256avx.h" + +// Transpose 8 vectors containing 32-bit values +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_transpose(u256 s[8]) { + u256 tmp0[8]; + u256 tmp1[8]; + tmp0[0] = _mm256_unpacklo_epi32(s[0], s[1]); + tmp0[1] = _mm256_unpackhi_epi32(s[0], s[1]); + tmp0[2] = _mm256_unpacklo_epi32(s[2], s[3]); + tmp0[3] = _mm256_unpackhi_epi32(s[2], s[3]); + tmp0[4] = _mm256_unpacklo_epi32(s[4], s[5]); + tmp0[5] = _mm256_unpackhi_epi32(s[4], s[5]); + tmp0[6] = _mm256_unpacklo_epi32(s[6], s[7]); + tmp0[7] = _mm256_unpackhi_epi32(s[6], s[7]); + tmp1[0] = _mm256_unpacklo_epi64(tmp0[0], tmp0[2]); + tmp1[1] = _mm256_unpackhi_epi64(tmp0[0], tmp0[2]); + tmp1[2] = _mm256_unpacklo_epi64(tmp0[1], tmp0[3]); + tmp1[3] = _mm256_unpackhi_epi64(tmp0[1], tmp0[3]); + tmp1[4] = _mm256_unpacklo_epi64(tmp0[4], tmp0[6]); + tmp1[5] = _mm256_unpackhi_epi64(tmp0[4], tmp0[6]); + tmp1[6] = _mm256_unpacklo_epi64(tmp0[5], tmp0[7]); + tmp1[7] = _mm256_unpackhi_epi64(tmp0[5], tmp0[7]); + s[0] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x20); + s[1] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x20); + s[2] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x20); + s[3] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x20); + s[4] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x31); + s[5] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x31); + s[6] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x31); + s[7] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x31); +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256_clone_statex8(sha256ctxx8 *outctx, const sha256ctxx8 *inctx) { + memcpy(outctx, inctx, sizeof(sha256ctxx8)); +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256_init8x(sha256ctxx8 *ctx) { + ctx->s[0] = _mm256_set_epi32((int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667); + ctx->s[1] = _mm256_set_epi32((int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85); + ctx->s[2] = _mm256_set_epi32((int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372); + ctx->s[3] = _mm256_set_epi32((int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a); + ctx->s[4] = _mm256_set_epi32((int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f); + ctx->s[5] = _mm256_set_epi32((int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c); + ctx->s[6] = _mm256_set_epi32((int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab); + ctx->s[7] = _mm256_set_epi32((int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19); + + ctx->datalen = 0; + ctx->msglen = 0; +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256_update8x(sha256ctxx8 *ctx, + const unsigned char *d0, + const unsigned char *d1, + const unsigned char *d2, + const unsigned char *d3, + const unsigned char *d4, + const unsigned char *d5, + const unsigned char *d6, + const unsigned char *d7, + unsigned long long len) { + size_t i = 0; + size_t bytes_to_copy; + + while (i < len) { + bytes_to_copy = (size_t)len - i; + if (bytes_to_copy > 64) { + bytes_to_copy = 64; + } + memcpy(&ctx->msgblocks[64 * 0], d0 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 1], d1 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 2], d2 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 3], d3 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 4], d4 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 5], d5 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 6], d6 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 7], d7 + i, bytes_to_copy); + ctx->datalen += (unsigned int)bytes_to_copy; + i += bytes_to_copy; + if (ctx->datalen == 64) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + ctx->msglen += 512; + ctx->datalen = 0; + } + } +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256_final8x(sha256ctxx8 *ctx, + unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7) { + unsigned int i, curlen; + + // Padding + if (ctx->datalen < 56) { + for (i = 0; i < 8; ++i) { + curlen = ctx->datalen; + ctx->msgblocks[64 * i + curlen++] = 0x80; + while (curlen < 64) { + ctx->msgblocks[64 * i + curlen++] = 0x00; + } + } + } else { + for (i = 0; i < 8; ++i) { + curlen = ctx->datalen; + ctx->msgblocks[64 * i + curlen++] = 0x80; + while (curlen < 64) { + ctx->msgblocks[64 * i + curlen++] = 0x00; + } + } + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + memset(ctx->msgblocks, 0, 8 * 64); + } + + // Add length of the message to each block + ctx->msglen += ctx->datalen * 8; + for (i = 0; i < 8; i++) { + ctx->msgblocks[64 * i + 63] = (unsigned char)ctx->msglen; + ctx->msgblocks[64 * i + 62] = (unsigned char)(ctx->msglen >> 8); + ctx->msgblocks[64 * i + 61] = (unsigned char)(ctx->msglen >> 16); + ctx->msgblocks[64 * i + 60] = (unsigned char)(ctx->msglen >> 24); + ctx->msgblocks[64 * i + 59] = (unsigned char)(ctx->msglen >> 32); + ctx->msgblocks[64 * i + 58] = (unsigned char)(ctx->msglen >> 40); + ctx->msgblocks[64 * i + 57] = (unsigned char)(ctx->msglen >> 48); + ctx->msgblocks[64 * i + 56] = (unsigned char)(ctx->msglen >> 56); + } + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + + // Compute final hash output + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_transpose(ctx->s); + + // Store Hash value + STORE(out0, BYTESWAP(ctx->s[0])); + STORE(out1, BYTESWAP(ctx->s[1])); + STORE(out2, BYTESWAP(ctx->s[2])); + STORE(out3, BYTESWAP(ctx->s[3])); + STORE(out4, BYTESWAP(ctx->s[4])); + STORE(out5, BYTESWAP(ctx->s[5])); + STORE(out6, BYTESWAP(ctx->s[6])); + STORE(out7, BYTESWAP(ctx->s[7])); +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256_transform8x(sha256ctxx8 *ctx, const unsigned char *data) { + u256 s[8], w[64], T0, T1; + int i; + + // Load words and transform data correctly + for (i = 0; i < 8; i++) { + w[i] = BYTESWAP(LOAD(data + 64 * i)); + w[i + 8] = BYTESWAP(LOAD(data + 32 + 64 * i)); + } + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_transpose(w); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_transpose(w + 8); + + // Initial State + s[0] = ctx->s[0]; + s[1] = ctx->s[1]; + s[2] = ctx->s[2]; + s[3] = ctx->s[3]; + s[4] = ctx->s[4]; + s[5] = ctx->s[5]; + s[6] = ctx->s[6]; + s[7] = ctx->s[7]; + + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 0, w[0]); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 1, w[1]); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 2, w[2]); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 3, w[3]); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 4, w[4]); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 5, w[5]); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 6, w[6]); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 7, w[7]); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 8, w[8]); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 9, w[9]); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 10, w[10]); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 11, w[11]); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 12, w[12]); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 13, w[13]); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 14, w[14]); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 15, w[15]); + w[16] = ADD4_32(WSIGMA1_AVX(w[14]), w[0], w[9], WSIGMA0_AVX(w[1])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 16, w[16]); + w[17] = ADD4_32(WSIGMA1_AVX(w[15]), w[1], w[10], WSIGMA0_AVX(w[2])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 17, w[17]); + w[18] = ADD4_32(WSIGMA1_AVX(w[16]), w[2], w[11], WSIGMA0_AVX(w[3])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 18, w[18]); + w[19] = ADD4_32(WSIGMA1_AVX(w[17]), w[3], w[12], WSIGMA0_AVX(w[4])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 19, w[19]); + w[20] = ADD4_32(WSIGMA1_AVX(w[18]), w[4], w[13], WSIGMA0_AVX(w[5])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 20, w[20]); + w[21] = ADD4_32(WSIGMA1_AVX(w[19]), w[5], w[14], WSIGMA0_AVX(w[6])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 21, w[21]); + w[22] = ADD4_32(WSIGMA1_AVX(w[20]), w[6], w[15], WSIGMA0_AVX(w[7])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 22, w[22]); + w[23] = ADD4_32(WSIGMA1_AVX(w[21]), w[7], w[16], WSIGMA0_AVX(w[8])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 23, w[23]); + w[24] = ADD4_32(WSIGMA1_AVX(w[22]), w[8], w[17], WSIGMA0_AVX(w[9])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 24, w[24]); + w[25] = ADD4_32(WSIGMA1_AVX(w[23]), w[9], w[18], WSIGMA0_AVX(w[10])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 25, w[25]); + w[26] = ADD4_32(WSIGMA1_AVX(w[24]), w[10], w[19], WSIGMA0_AVX(w[11])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 26, w[26]); + w[27] = ADD4_32(WSIGMA1_AVX(w[25]), w[11], w[20], WSIGMA0_AVX(w[12])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 27, w[27]); + w[28] = ADD4_32(WSIGMA1_AVX(w[26]), w[12], w[21], WSIGMA0_AVX(w[13])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 28, w[28]); + w[29] = ADD4_32(WSIGMA1_AVX(w[27]), w[13], w[22], WSIGMA0_AVX(w[14])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 29, w[29]); + w[30] = ADD4_32(WSIGMA1_AVX(w[28]), w[14], w[23], WSIGMA0_AVX(w[15])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 30, w[30]); + w[31] = ADD4_32(WSIGMA1_AVX(w[29]), w[15], w[24], WSIGMA0_AVX(w[16])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 31, w[31]); + w[32] = ADD4_32(WSIGMA1_AVX(w[30]), w[16], w[25], WSIGMA0_AVX(w[17])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 32, w[32]); + w[33] = ADD4_32(WSIGMA1_AVX(w[31]), w[17], w[26], WSIGMA0_AVX(w[18])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 33, w[33]); + w[34] = ADD4_32(WSIGMA1_AVX(w[32]), w[18], w[27], WSIGMA0_AVX(w[19])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 34, w[34]); + w[35] = ADD4_32(WSIGMA1_AVX(w[33]), w[19], w[28], WSIGMA0_AVX(w[20])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 35, w[35]); + w[36] = ADD4_32(WSIGMA1_AVX(w[34]), w[20], w[29], WSIGMA0_AVX(w[21])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 36, w[36]); + w[37] = ADD4_32(WSIGMA1_AVX(w[35]), w[21], w[30], WSIGMA0_AVX(w[22])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 37, w[37]); + w[38] = ADD4_32(WSIGMA1_AVX(w[36]), w[22], w[31], WSIGMA0_AVX(w[23])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 38, w[38]); + w[39] = ADD4_32(WSIGMA1_AVX(w[37]), w[23], w[32], WSIGMA0_AVX(w[24])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 39, w[39]); + w[40] = ADD4_32(WSIGMA1_AVX(w[38]), w[24], w[33], WSIGMA0_AVX(w[25])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 40, w[40]); + w[41] = ADD4_32(WSIGMA1_AVX(w[39]), w[25], w[34], WSIGMA0_AVX(w[26])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 41, w[41]); + w[42] = ADD4_32(WSIGMA1_AVX(w[40]), w[26], w[35], WSIGMA0_AVX(w[27])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 42, w[42]); + w[43] = ADD4_32(WSIGMA1_AVX(w[41]), w[27], w[36], WSIGMA0_AVX(w[28])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 43, w[43]); + w[44] = ADD4_32(WSIGMA1_AVX(w[42]), w[28], w[37], WSIGMA0_AVX(w[29])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 44, w[44]); + w[45] = ADD4_32(WSIGMA1_AVX(w[43]), w[29], w[38], WSIGMA0_AVX(w[30])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 45, w[45]); + w[46] = ADD4_32(WSIGMA1_AVX(w[44]), w[30], w[39], WSIGMA0_AVX(w[31])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 46, w[46]); + w[47] = ADD4_32(WSIGMA1_AVX(w[45]), w[31], w[40], WSIGMA0_AVX(w[32])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 47, w[47]); + w[48] = ADD4_32(WSIGMA1_AVX(w[46]), w[32], w[41], WSIGMA0_AVX(w[33])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 48, w[48]); + w[49] = ADD4_32(WSIGMA1_AVX(w[47]), w[33], w[42], WSIGMA0_AVX(w[34])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 49, w[49]); + w[50] = ADD4_32(WSIGMA1_AVX(w[48]), w[34], w[43], WSIGMA0_AVX(w[35])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 50, w[50]); + w[51] = ADD4_32(WSIGMA1_AVX(w[49]), w[35], w[44], WSIGMA0_AVX(w[36])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 51, w[51]); + w[52] = ADD4_32(WSIGMA1_AVX(w[50]), w[36], w[45], WSIGMA0_AVX(w[37])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 52, w[52]); + w[53] = ADD4_32(WSIGMA1_AVX(w[51]), w[37], w[46], WSIGMA0_AVX(w[38])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 53, w[53]); + w[54] = ADD4_32(WSIGMA1_AVX(w[52]), w[38], w[47], WSIGMA0_AVX(w[39])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 54, w[54]); + w[55] = ADD4_32(WSIGMA1_AVX(w[53]), w[39], w[48], WSIGMA0_AVX(w[40])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 55, w[55]); + w[56] = ADD4_32(WSIGMA1_AVX(w[54]), w[40], w[49], WSIGMA0_AVX(w[41])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 56, w[56]); + w[57] = ADD4_32(WSIGMA1_AVX(w[55]), w[41], w[50], WSIGMA0_AVX(w[42])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 57, w[57]); + w[58] = ADD4_32(WSIGMA1_AVX(w[56]), w[42], w[51], WSIGMA0_AVX(w[43])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 58, w[58]); + w[59] = ADD4_32(WSIGMA1_AVX(w[57]), w[43], w[52], WSIGMA0_AVX(w[44])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 59, w[59]); + w[60] = ADD4_32(WSIGMA1_AVX(w[58]), w[44], w[53], WSIGMA0_AVX(w[45])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 60, w[60]); + w[61] = ADD4_32(WSIGMA1_AVX(w[59]), w[45], w[54], WSIGMA0_AVX(w[46])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 61, w[61]); + w[62] = ADD4_32(WSIGMA1_AVX(w[60]), w[46], w[55], WSIGMA0_AVX(w[47])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 62, w[62]); + w[63] = ADD4_32(WSIGMA1_AVX(w[61]), w[47], w[56], WSIGMA0_AVX(w[48])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 63, w[63]); + + // Feed Forward + ctx->s[0] = ADD32(s[0], ctx->s[0]); + ctx->s[1] = ADD32(s[1], ctx->s[1]); + ctx->s[2] = ADD32(s[2], ctx->s[2]); + ctx->s[3] = ADD32(s[3], ctx->s[3]); + ctx->s[4] = ADD32(s[4], ctx->s[4]); + ctx->s[5] = ADD32(s[5], ctx->s[5]); + ctx->s[6] = ADD32(s[6], ctx->s[6]); + ctx->s[7] = ADD32(s[7], ctx->s[7]); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/sha256avx.h b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/sha256avx.h new file mode 100644 index 00000000..9089b512 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/sha256avx.h @@ -0,0 +1,103 @@ +#ifndef SHA256AVX_H +#define SHA256AVX_H + +#include +#include + +static const unsigned int RC[] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +#define u32 uint32_t +#define u256 __m256i + +#define XOR _mm256_xor_si256 +#define OR _mm256_or_si256 +#define AND _mm256_and_si256 +#define ADD32 _mm256_add_epi32 +#define NOT(x) _mm256_xor_si256(x, _mm256_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1)) + +#define LOAD(src) _mm256_loadu_si256((__m256i *)(src)) +#define STORE(dest,src) _mm256_storeu_si256((__m256i *)(dest),src) + +#define BYTESWAP(x) _mm256_shuffle_epi8(x, _mm256_set_epi8(0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3,0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3)) + +#define SHIFTR32(x, y) _mm256_srli_epi32(x, y) +#define SHIFTL32(x, y) _mm256_slli_epi32(x, y) + +#define ROTR32(x, y) OR(SHIFTR32(x, y), SHIFTL32(x, 32 - (y))) +#define ROTL32(x, y) OR(SHIFTL32(x, y), SHIFTR32(x, 32 - (y))) + +#define XOR3(a, b, c) XOR(XOR(a, b), c) + +#define ADD3_32(a, b, c) ADD32(ADD32(a, b), c) +#define ADD4_32(a, b, c, d) ADD32(ADD32(ADD32(a, b), c), d) +#define ADD5_32(a, b, c, d, e) ADD32(ADD32(ADD32(ADD32(a, b), c), d), e) + +#define MAJ_AVX(a, b, c) XOR3(AND(a, b), AND(a, c), AND(b, c)) +#define CH_AVX(a, b, c) XOR(AND(a, b), AND(NOT(a), c)) + +#define SIGMA1_AVX(x) XOR3(ROTR32(x, 6), ROTR32(x, 11), ROTR32(x, 25)) +#define SIGMA0_AVX(x) XOR3(ROTR32(x, 2), ROTR32(x, 13), ROTR32(x, 22)) + +#define WSIGMA1_AVX(x) XOR3(ROTR32(x, 17), ROTR32(x, 19), SHIFTR32(x, 10)) +#define WSIGMA0_AVX(x) XOR3(ROTR32(x, 7), ROTR32(x, 18), SHIFTR32(x, 3)) + +#define SHA256ROUND_AVX(a, b, c, d, e, f, g, h, rc, w) \ + T0 = ADD5_32(h, SIGMA1_AVX(e), CH_AVX(e, f, g), _mm256_set1_epi32((int)RC[rc]), w); \ + (d) = ADD32(d, T0); \ + T1 = ADD32(SIGMA0_AVX(a), MAJ_AVX(a, b, c)); \ + (h) = ADD32(T0, T1); + +typedef struct SHA256state { + u256 s[8]; + uint8_t msgblocks[8 * 64]; + unsigned int datalen; + uint64_t msglen; +} sha256ctxx8; + + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_transpose(u256 s[8]); +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256_init_frombytes_x8(sha256ctxx8 *ctx, const uint8_t *s, unsigned long long msglen); +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256_init8x(sha256ctxx8 *ctx); +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256_update8x(sha256ctxx8 *ctx, + const unsigned char *d0, + const unsigned char *d1, + const unsigned char *d2, + const unsigned char *d3, + const unsigned char *d4, + const unsigned char *d5, + const unsigned char *d6, + const unsigned char *d7, + unsigned long long len); +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256_final8x(sha256ctxx8 *ctx, + unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256_transform8x(sha256ctxx8 *ctx, const unsigned char *data); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256_clone_statex8(sha256ctxx8 *outctx, const sha256ctxx8 *inctx); + + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/sha256x8.c b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/sha256x8.c new file mode 100644 index 00000000..3c2995aa --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/sha256x8.c @@ -0,0 +1,128 @@ +#include + +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_seed_statex8(sha256ctxx8 *ctx, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N; i < PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256_init8x(ctx); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256_update8x(ctx, block, block, block, block, block, block, block, block, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES); + +} + +/* This provides a wrapper around the internals of 8x parallel SHA256 */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256x8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned long long inlen) { + sha256ctxx8 ctx; + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256_init8x(&ctx); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256_update8x(&ctx, in0, in1, in2, in3, in4, in5, in6, in7, inlen); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256_final8x(&ctx, out0, out1, out2, out3, out4, out5, out6, out7); +} + +/** + * Note that inlen should be sufficiently small that it still allows for + * an array to be allocated on the stack. Typically 'in' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_mgf1x8( + unsigned char *outx8, + unsigned long outlen, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, + unsigned long inlen) { + unsigned char inbufx8[8 * ((PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES) + 4)]; + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + unsigned long i; + unsigned int j; + + memcpy(inbufx8 + 0 * (inlen + 4), in0, inlen); + memcpy(inbufx8 + 1 * (inlen + 4), in1, inlen); + memcpy(inbufx8 + 2 * (inlen + 4), in2, inlen); + memcpy(inbufx8 + 3 * (inlen + 4), in3, inlen); + memcpy(inbufx8 + 4 * (inlen + 4), in4, inlen); + memcpy(inbufx8 + 5 * (inlen + 4), in5, inlen); + memcpy(inbufx8 + 6 * (inlen + 4), in6, inlen); + memcpy(inbufx8 + 7 * (inlen + 4), in7, inlen); + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES <= outlen; i++) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ull_to_bytes(inbufx8 + inlen + j * (inlen + 4), 4, i); + } + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256x8(outx8 + 0 * outlen, + outx8 + 1 * outlen, + outx8 + 2 * outlen, + outx8 + 3 * outlen, + outx8 + 4 * outlen, + outx8 + 5 * outlen, + outx8 + 6 * outlen, + outx8 + 7 * outlen, + inbufx8 + 0 * (inlen + 4), + inbufx8 + 1 * (inlen + 4), + inbufx8 + 2 * (inlen + 4), + inbufx8 + 3 * (inlen + 4), + inbufx8 + 4 * (inlen + 4), + inbufx8 + 5 * (inlen + 4), + inbufx8 + 6 * (inlen + 4), + inbufx8 + 7 * (inlen + 4), inlen + 4); + outx8 += PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ull_to_bytes(inbufx8 + inlen + j * (inlen + 4), 4, i); + } + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256x8(outbufx8 + 0 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + inbufx8 + 0 * (inlen + 4), + inbufx8 + 1 * (inlen + 4), + inbufx8 + 2 * (inlen + 4), + inbufx8 + 3 * (inlen + 4), + inbufx8 + 4 * (inlen + 4), + inbufx8 + 5 * (inlen + 4), + inbufx8 + 6 * (inlen + 4), + inbufx8 + 7 * (inlen + 4), inlen + 4); + + for (j = 0; j < 8; j++) { + memcpy(outx8 + j * outlen, + outbufx8 + j * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outlen - i * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/sha256x8.h b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/sha256x8.h new file mode 100644 index 00000000..12aad90e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/sha256x8.h @@ -0,0 +1,44 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256X8_H +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256X8_H + +#include "sha256avx.h" + +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N */ + +/* This provides a wrapper around the internals of 8x parallel SHA256 */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256x8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned long long inlen); + +/** + * Note that inlen should be sufficiently small that it still allows for + * an array to be allocated on the stack. Typically 'in' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_mgf1x8(unsigned char *outx8, unsigned long outlen, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, + unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_seed_statex8(sha256ctxx8 *ctx, const unsigned char *pub_seed); +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/sign.c b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/sign.c new file mode 100644 index 00000000..9fe22a88 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_D - 1); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, sk + 3 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N; + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_D; i++) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_BYTES; + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N; + unsigned char mhash[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_D; i++) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_BYTES; + + if (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_BYTES, sm + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/thash.h b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/thash.h new file mode 100644 index 00000000..bbb9b7e8 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_THASH_H +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/thash_sha256_simple.c b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/thash_sha256_simple.c new file mode 100644 index 00000000..7f56f862 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/thash_sha256_simple.c @@ -0,0 +1,75 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "sha2.h" +#include "sha256.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N bytes. + */ +static void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char outbuf[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + sha256ctx sha2_state; + + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + + /* Retrieve precomputed state containing pub_seed */ + sha256_inc_ctx_clone(&sha2_state, &hash_state_seeded->x1); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_compress_address(buf, addr); + memcpy(buf + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES, in, inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + + sha256_inc_finalize(outbuf, &sha2_state, buf, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thash( + out, buf, in, 1, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thash( + out, buf, in, 2, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LEN, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_TREES, pub_seed, addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/thash_sha256_simplex8.c b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/thash_sha256_simplex8.c new file mode 100644 index 00000000..56297738 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/thash_sha256_simplex8.c @@ -0,0 +1,129 @@ +#include +#include + +#include "address.h" +#include "hash_state.h" +#include "params.h" +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "thashx8.h" +#include "utils.h" + +/** + * 8-way parallel version of thash; takes 8x as much input and output + */ +static void thashx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addrx8[8 * 8], + uint8_t *bufx8, + const hash_state *state_seeded) { + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + unsigned int i; + sha256ctxx8 ctx; + + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256_clone_statex8(&ctx, &state_seeded->x8); + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_compress_address(bufx8 + i * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N), + addrx8 + i * 8); + } + + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + + 0 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N), in0, inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + + 1 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N), in1, inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + + 2 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N), in2, inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + + 3 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N), in3, inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + + 4 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N), in4, inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + + 5 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N), in5, inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + + 6 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N), in6, inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + + 7 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N), in7, inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256_update8x(&ctx, + bufx8 + 0 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N), + bufx8 + 1 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N), + bufx8 + 2 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N), + bufx8 + 3 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N), + bufx8 + 4 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N), + bufx8 + 5 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N), + bufx8 + 6 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N), + bufx8 + 7 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N), + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256_final8x(&ctx, + outbufx8 + 0 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + + memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); +} + +#define thashx8_variant_impl(name, size) \ + void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thashx8_##name(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + unsigned char *out4, \ + unsigned char *out5, \ + unsigned char *out6, \ + unsigned char *out7, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *in4, \ + const unsigned char *in5, \ + const unsigned char *in6, \ + const unsigned char *in7, \ + const unsigned char *pub_seed, \ + uint32_t addrx8[8*8], \ + const hash_state *state_seeded) \ + { \ + const unsigned int inblocks = (size); \ + uint8_t bufx8[8*(PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + (size)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N)]; \ + thashx8(out0, out1, out2, out3, out4, out5, out6, out7, \ + in0, in1, in2, in3, in4, in5, in6, in7, inblocks, \ + pub_seed, addrx8, bufx8, state_seeded); \ + } + +thashx8_variant_impl(1, 1) +thashx8_variant_impl(2, 2) +thashx8_variant_impl(WOTS_LEN, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LEN) +thashx8_variant_impl(FORS_TREES, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_TREES) + +#undef thashx8_variant_impl diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/thashx8.h b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/thashx8.h new file mode 100644 index 00000000..d8a5a8da --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/thashx8.h @@ -0,0 +1,39 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_THASHX8_H +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_THASHX8_H + +#include + +#include "hash_state.h" +#include "sha256avx.h" + + +#define thashx8_variant(name) \ + void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thashx8_##name( \ + unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + unsigned char *out4, \ + unsigned char *out5, \ + unsigned char *out6, \ + unsigned char *out7, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *in4, \ + const unsigned char *in5, \ + const unsigned char *in6, \ + const unsigned char *in7, \ + const unsigned char *pub_seed, \ + uint32_t addrx8[8*8], \ + const hash_state *state_seeded) + + +thashx8_variant(1); +thashx8_variant(2); +thashx8_variant(WOTS_LEN); +thashx8_variant(FORS_TREES); + +#undef thashx8_variant +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/utils.c b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/utils.c new file mode 100644 index 00000000..1cfe73c5 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, leaf, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + memcpy(buffer + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, auth_path, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thash_2( + buffer + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + } else { + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, auth_path, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/utils.h b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/utils.h new file mode 100644 index 00000000..37037bf3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_UTILS_H +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/utilsx8.c b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/utilsx8.c new file mode 100644 index 00000000..ede121a8 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/utilsx8.c @@ -0,0 +1,172 @@ +#include + +#include "address.h" +#include "params.h" +#include "thashx8.h" +#include "utils.h" + +#include "utilsx8.h" + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void treehashx8(unsigned char *rootx8, unsigned char *auth_pathx8, + unsigned char *stackx8, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t leaf_idx[8], uint32_t idx_offset[8], + uint32_t tree_height, + void (*gen_leafx8)( + unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + unsigned char * /* leaf4 */, + unsigned char * /* leaf5 */, + unsigned char * /* leaf6 */, + unsigned char * /* leaf7 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + uint32_t /* addr_idx4 */, + uint32_t /* addr_idx5 */, + uint32_t /* addr_idx6 */, + uint32_t /* addr_idx7 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx8[8 * 8], + const hash_state *state_seeded) { + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + unsigned int j; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leafx8(stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + sk_seed, pub_seed, + idx + idx_offset[0], + idx + idx_offset[1], + idx + idx_offset[2], + idx + idx_offset[3], + idx + idx_offset[4], + idx + idx_offset[5], + idx + idx_offset[6], + idx + idx_offset[7], + tree_addrx8, + state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + for (j = 0; j < 8; j++) { + if ((leaf_idx[j] ^ 0x1) == idx) { + memcpy(auth_pathx8 + j * tree_height * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + (offset - 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + } + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_height(tree_addrx8 + j * 8, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_tree_index(tree_addrx8 + j * 8, + tree_idx + (idx_offset[j] >> (heights[offset - 1] + 1))); + } + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thashx8_2(stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + pub_seed, tree_addrx8, state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + for (j = 0; j < 8; j++) { + if (((leaf_idx[j] >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_pathx8 + j * tree_height * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + heights[offset - 1]*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N + (offset - 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + } + } + } + } + + for (j = 0; j < 8; j++) { + memcpy(rootx8 + j * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + } +} + +/* The wrappers below ensure we used fixed-size buffers on the stack (no VLAs) */ + + +#define treehashx8_variant(name, size) \ + void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_treehashx8_##name( \ + unsigned char *rootx8, unsigned char *auth_pathx8, \ + const unsigned char *sk_seed, const unsigned char *pub_seed, \ + const uint32_t leaf_idx[8], uint32_t idx_offset[8], \ + void (*gen_leafx8)( \ + unsigned char* /* leaf0 */, \ + unsigned char* /* leaf1 */, \ + unsigned char* /* leaf2 */, \ + unsigned char* /* leaf3 */, \ + unsigned char* /* leaf4 */, \ + unsigned char* /* leaf5 */, \ + unsigned char* /* leaf6 */, \ + unsigned char* /* leaf7 */, \ + const unsigned char* /* sk_seed */, \ + const unsigned char* /* pub_seed */, \ + uint32_t /* addr_idx0 */, \ + uint32_t /* addr_idx1 */, \ + uint32_t /* addr_idx2 */, \ + uint32_t /* addr_idx3 */, \ + uint32_t /* addr_idx4 */, \ + uint32_t /* addr_idx5 */, \ + uint32_t /* addr_idx6 */, \ + uint32_t /* addr_idx7 */, \ + const uint32_t[8] /* tree_addr */, \ + const hash_state* /* state_seeded */), \ + uint32_t tree_addrx8[8*8], \ + const hash_state *state_seeded) \ + { \ + const uint32_t tree_height = (size); \ + unsigned char stackx8[8*((size) + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N]; \ + unsigned int heights[(size) + 1]; \ + treehashx8(rootx8, auth_pathx8, stackx8, heights, sk_seed, pub_seed, \ + leaf_idx, idx_offset, tree_height, gen_leafx8, tree_addrx8, state_seeded); \ + } + +treehashx8_variant(FORS_HEIGHT, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_HEIGHT) + +#undef treehashx8_variant diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/utilsx8.h b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/utilsx8.h new file mode 100644 index 00000000..9c5e5366 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/utilsx8.h @@ -0,0 +1,46 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_UTILSX8_H +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_UTILSX8_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_treehashx8_FORS_HEIGHT( + unsigned char *rootx8, unsigned char *auth_pathx8, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t leaf_idx[8], uint32_t idx_offset[8], + void (*gen_leafx8)( + unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + unsigned char * /* leaf4 */, + unsigned char * /* leaf5 */, + unsigned char * /* leaf6 */, + unsigned char * /* leaf7 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + uint32_t /* addr_idx4 */, + uint32_t /* addr_idx5 */, + uint32_t /* addr_idx6 */, + uint32_t /* addr_idx7 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx8[8 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/wots.c b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/wots.c new file mode 100644 index 00000000..57013022 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/wots.c @@ -0,0 +1,240 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "hashx8.h" +#include "params.h" +#include "thash.h" +#include "thashx8.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], const hash_state *state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_prf_addr(sk, sk_seed, wots_addr, state_seeded); +} + +/** + * 8-way parallel version of wots_gen_sk; expects 8x as much space in sk + */ +static void wots_gen_skx8(unsigned char *skx8, const unsigned char *sk_seed, + uint32_t wots_addrx8[8 * 8]) { + unsigned int j; + + /* Make sure that the hash address is actually zeroed. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_hash_addr(wots_addrx8 + j * 8, 0); + } + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_prf_addrx8(skx8 + 0 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + skx8 + 1 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + skx8 + 2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + skx8 + 3 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + skx8 + 4 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + skx8 + 5 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + skx8 + 6 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + skx8 + 7 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + sk_seed, wots_addrx8); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_W; i++) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thash_1(out, out, pub_seed, addr, state_seeded); + } +} + +/** + * 8-way parallel version of gen_chain; expects 8x as much space in out, and + * 8x as much space in inx8. Assumes start and step identical across chains. + */ +static void gen_chainx8(unsigned char *outx8, const unsigned char *inx8, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addrx8[8 * 8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + /* Initialize outx8 with the value at position 'start'. */ + memcpy(outx8, inx8, 8 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_W; i++) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_hash_addr(addrx8 + j * 8, i); + } + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thashx8_1(outx8 + 0 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + outx8 + 1 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + outx8 + 2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + outx8 + 3 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + outx8 + 4 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + outx8 + 5 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + outx8 + 6 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + outx8 + 7 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + outx8 + 0 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + outx8 + 1 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + outx8 + 2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + outx8 + 3 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + outx8 + 4 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + outx8 + 5 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + outx8 + 6 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + outx8 + 7 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + pub_seed, addrx8, state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { + int in = 0; + int out = 0; + unsigned char total = 0; + int bits = 0; + int consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LOGW; + output[out] = (unsigned int)(total >> bits) & (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_W - 1); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_wots_gen_pk(unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + uint32_t addrx8[8 * 8]; + unsigned char pkbuf[8 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N]; + + for (j = 0; j < 8; j++) { + memcpy(addrx8 + j * 8, addr, sizeof(uint32_t) * 8); + } + + /* The last iteration typically does not have complete set of 4 chains, + but because we use pkbuf, this is not an issue -- we still do as many + in parallel as possible. */ + for (i = 0; i < ((PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LEN + 7) & ~0x7); i += 8) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_chain_addr(addrx8 + j * 8, i + j); + } + wots_gen_skx8(pkbuf, sk_seed, addrx8); + gen_chainx8(pkbuf, pkbuf, 0, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_W - 1, pub_seed, addrx8, state_seeded); + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LEN) { + memcpy(pk + (i + j)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, pkbuf + j * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N); + } + } + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_wots_sign(unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, sk_seed, addr, state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, sig + i * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, 0, lengths[i], pub_seed, addr, state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_wots_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, sig + i * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N, + lengths[i], PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_W - 1 - lengths[i], pub_seed, addr, state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/wots.h b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/wots.h new file mode 100644 index 00000000..4c1af89f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/avx2/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_H +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/LICENSE b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..67ebaea2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-sha256-256f-simple_clean.lib +OBJECTS=address.obj wots.obj utils.obj fors.obj sign.obj hash_sha256.obj thash_sha256_simple.obj sha256.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/address.c b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/address.c new file mode 100644 index 00000000..29f4ff70 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/address.h b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/address.h new file mode 100644 index 00000000..079c0153 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ADDRESS_H +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/api.h b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/api.h new file mode 100644 index 00000000..e2c304f3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_API_H +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_CRYPTO_SECRETKEYBYTES 128 +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_CRYPTO_PUBLICKEYBYTES 64 +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_CRYPTO_BYTES 49216 +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_CRYPTO_SEEDBYTES 96 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/fors.c b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/fors.c new file mode 100644 index 00000000..8794e3e9 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/fors.c @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "thash.h" +#include "utils.h" + +static void fors_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t fors_leaf_addr[8], const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_prf_addr( + sk, sk_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_thash_1( + leaf, sk, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t fors_tree_addr[8], + const hash_state *hash_state_seeded) { + uint32_t fors_leaf_addr[8] = {0}; + + /* Only copy the parts that must be kept in fors_leaf_addr. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_copy_keypair_addr( + fors_leaf_addr, fors_tree_addr); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_type( + fors_leaf_addr, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_tree_index( + fors_leaf_addr, addr_idx); + + fors_gen_sk(leaf, sk_seed, fors_leaf_addr, hash_state_seeded); + fors_sk_to_leaf(leaf, leaf, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_copy_keypair_addr( + fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_copy_keypair_addr( + fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_type( + fors_tree_addr, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_type( + fors_pk_addr, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_tree_height( + fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_tree_index( + fors_tree_addr, indices[i] + idx_offset); + + /* Include the secret key part that produces the selected leaf node. */ + fors_gen_sk(sig, sk_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N; + + /* Compute the authentication path for this leaf node. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_treehash_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, sig, sk_seed, pub_seed, + indices[i], idx_offset, fors_gen_leaf, fors_tree_addr, + hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_thash_FORS_TREES( + pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_compute_root(roots + i * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, leaf, indices[i], idx_offset, sig, + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_HEIGHT, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/fors.h b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/fors.h new file mode 100644 index 00000000..bb35a141 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_H +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/hash.h b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/hash.h new file mode 100644 index 00000000..2a473aa7 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_HASH_H +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/hash_sha256.c b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/hash_sha256.c new file mode 100644 index 00000000..a35f2ac0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/hash_sha256.c @@ -0,0 +1,162 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "sha2.h" +#include "sha256.h" + +/* For SHA256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_seed_state(hash_state_seeded, pub_seed); + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* Clean up hash state */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_destroy_hash_function(hash_state *hash_state_seeded) { + sha256_inc_ctx_release(hash_state_seeded); +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N bytes and an address + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_ADDR_BYTES]; + unsigned char outbuf[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_compress_address(buf + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, addr); + + sha256(outbuf, buf, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_ADDR_BYTES); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed as a key + * for HMAC, and an optional randomization value prefixed to the message. + * This requires m to have at least PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N space + * available in front of the pointer, i.e. before the message to use for the + * prefix. This is necessary to prevent having to move the message around (and + * allocate memory for it). + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES]; + sha256ctx state; + int i; + + /* This implements HMAC-SHA256 */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N; i++) { + buf[i] = 0x36 ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, 0x36, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N); + + sha256_inc_init(&state); + sha256_inc_blocks(&state, buf, 1); + + memcpy(buf, optrand, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N); + + /* If optrand + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N + mlen < PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_BLOCK_BYTES) { + memcpy(buf + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, m, mlen); + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_BLOCK_BYTES, &state, + buf, mlen + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(buf + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, m, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N); + sha256_inc_blocks(&state, buf, 1); + + m += PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N; + mlen -= PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N; + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_BLOCK_BYTES, &state, m, mlen); + } + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N; i++) { + buf[i] = 0x5c ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, 0x5c, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N); + + sha256(buf, buf, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES); + memcpy(R, buf, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_TREE_BITS (PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_TREE_HEIGHT * (PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_D - 1)) +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_TREE_BYTES ((PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_LEAF_BITS PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_LEAF_BYTES ((PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_DGST_BYTES (PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_TREE_BYTES + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_LEAF_BYTES) + + unsigned char seed[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES + 4]; + + /* Round to nearest multiple of PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_BLOCK_BYTES */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_INBLOCKS (((PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_PK_BYTES + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - 1) & \ + -PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_BLOCK_BYTES) / PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_BLOCK_BYTES) + unsigned char inbuf[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_BLOCK_BYTES]; + + unsigned char buf[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_DGST_BYTES]; + unsigned char *bufp = buf; + sha256ctx state; + + sha256_inc_init(&state); + + memcpy(inbuf, R, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N); + memcpy(inbuf + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, pk, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_PK_BYTES); + + /* If R + pk + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_PK_BYTES + mlen < PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_BLOCK_BYTES) { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_PK_BYTES, m, mlen); + sha256_inc_finalize(seed, &state, inbuf, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_PK_BYTES + mlen); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_PK_BYTES, m, + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N - PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_PK_BYTES); + sha256_inc_blocks(&state, inbuf, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_INBLOCKS); + + m += PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N - PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_PK_BYTES; + mlen -= PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N - PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_PK_BYTES; + sha256_inc_finalize(seed, &state, m, mlen); + } + + /* By doing this in two steps, we prevent hashing the message twice; + otherwise each iteration in MGF1 would hash the message again. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_mgf1(bufp, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_DGST_BYTES, seed, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_bytes_to_ull(bufp, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/hash_state.h b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/hash_state.h new file mode 100644 index 00000000..19fc335e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/hash_state.h @@ -0,0 +1,26 @@ +#ifndef SPX_HASH_STATE_H +#define SPX_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + */ + +#include "sha2.h" +#define hash_state sha256ctx + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/params.h b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/params.h new file mode 100644 index 00000000..ee98a9f4 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_PARAMS_H +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N 32 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FULL_HEIGHT 68 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_D 17 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_HEIGHT 10 +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_TREES 30 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N / PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_LEN (PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_LEN1 + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_BYTES (PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_PK_BYTES PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_TREE_HEIGHT (PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FULL_HEIGHT / PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_BYTES ((PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_PK_BYTES PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_BYTES (PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_BYTES + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_D * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FULL_HEIGHT * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_PK_BYTES (2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SK_BYTES (2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/sha256.c b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/sha256.c new file mode 100644 index 00000000..2c3ff801 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/sha256.c @@ -0,0 +1,71 @@ +/* Based on the public domain implementation in + * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html + * by D. J. Bernstein */ + +#include +#include +#include + +#include "sha2.h" +#include "sha256.h" +#include "utils.h" + +/* + * Compresses an address to a 22-byte sequence. + * This reduces the number of required SHA256 compression calls, as the last + * block of input is padded with at least 65 bits. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_compress_address(unsigned char *out, const uint32_t addr[8]) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ull_to_bytes(out, 1, addr[0]); /* drop 3 bytes of the layer field */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ull_to_bytes(out + 1, 4, addr[2]); /* drop the highest tree address word */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ull_to_bytes(out + 5, 4, addr[3]); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ull_to_bytes(out + 9, 1, addr[4]); /* drop 3 bytes of the type field */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ull_to_bytes(out + 10, 4, addr[5]); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ull_to_bytes(out + 14, 4, addr[6]); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ull_to_bytes(out + 18, 4, addr[7]); +} + +/** + * Requires 'input_plus_four_bytes' to have 'inlen' + 4 bytes, so that the last + * four bytes can be used for the counter. Typically 'input' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen) { + unsigned char outbuf[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES]; + unsigned long i; + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES <= outlen; i++) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(out, input_plus_four_bytes, inlen + 4); + out += PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + if (outlen > i * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(outbuf, input_plus_four_bytes, inlen + 4); + memcpy(out, outbuf, outlen - i * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES); + } +} + + +/** + * Absorb the constant pub_seed using one round of the compression function + * This initializes hash_state_seeded, which can then be reused in thash + **/ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N; i < PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + sha256_inc_init(hash_state_seeded); + sha256_inc_blocks(hash_state_seeded, block, 1); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/sha256.h b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/sha256.h new file mode 100644 index 00000000..61bc4374 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/sha256.h @@ -0,0 +1,21 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_H +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_H + +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N */ +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_ADDR_BYTES 22 + +#include +#include + +#include "sha2.h" + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_compress_address(unsigned char *out, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/sign.c b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/sign.c new file mode 100644 index 00000000..5942d0a7 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_D - 1); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, sk + 3 * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N; + + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_BYTES; + + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N; + unsigned char mhash[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_BYTES; + + if (PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_BYTES, sm + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/thash.h b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/thash.h new file mode 100644 index 00000000..2de4347f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_THASH_H +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/thash_sha256_simple.c b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/thash_sha256_simple.c new file mode 100644 index 00000000..333a9712 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/thash_sha256_simple.c @@ -0,0 +1,75 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "sha2.h" +#include "sha256.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N bytes. + */ +static void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char outbuf[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_OUTPUT_BYTES]; + sha256ctx sha2_state; + + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + + /* Retrieve precomputed state containing pub_seed */ + sha256_inc_ctx_clone(&sha2_state, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_compress_address(buf, addr); + memcpy(buf + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_ADDR_BYTES, in, inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N); + + sha256_inc_finalize(outbuf, &sha2_state, buf, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_thash( + out, buf, in, 1, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_thash( + out, buf, in, 2, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_ADDR_BYTES + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_LEN, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_SHA256_ADDR_BYTES + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_TREES, pub_seed, addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/utils.c b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/utils.c new file mode 100644 index 00000000..6dc29c05 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, leaf, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N); + memcpy(buffer + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_thash_2( + buffer + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N); + } else { + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/utils.h b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/utils.h new file mode 100644 index 00000000..6b35879b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_UTILS_H +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/wots.c b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/wots.c new file mode 100644 index 00000000..0cc6e964 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/wots.c @@ -0,0 +1,167 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], + const hash_state *hash_state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_prf_addr(sk, sk_seed, wots_addr, hash_state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_W; i++) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_thash_1( + out, out, pub_seed, addr, hash_state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const size_t out_len, + const unsigned char *input) { + size_t in = 0; + size_t out = 0; + unsigned char total = 0; + unsigned int bits = 0; + size_t consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_LOGW; + output[out] = (unsigned int)((total >> bits) & (PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_W - 1)); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, + const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_ull_to_bytes( + csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(pk + i * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, pk + i * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, + 0, PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_W - 1, pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, 0, lengths[i], pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_N, + lengths[i], PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_W - 1 - lengths[i], pub_seed, addr, + hash_state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/wots.h b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/wots.h new file mode 100644 index 00000000..124fe5be --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256f-simple/clean/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_H +#define PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256FSIMPLE_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/META.yml b/crypto_sign/sphincs/sphincs-sha256-256s-robust/META.yml new file mode 100644 index 00000000..7490886d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/META.yml @@ -0,0 +1,35 @@ +name: SPHINCS+ +type: signature +claimed-nist-level: 5 +length-public-key: 64 +length-secret-key: 128 +length-signature: 29792 +testvectors-sha256: 6a85ec1f64d017fc2ffd88aa7d679de7e0554e00bdea62c7fea5c4c403e3eafa +nistkat-sha256: da28ff350ac552f100b35b01ecb494dc02f9dcf542fa2d88439cd427985e9581 +principal-submitters: + - Andreas Hülsing +auxiliary-submitters: + - Jean-Philippe Aumasson + - Daniel J. Bernstein, + - Christoph Dobraunig + - Maria Eichlseder + - Scott Fluhrer + - Stefan-Lukas Gazdag + - Panos Kampanakis + - Stefan Kölbl + - Tanja Lange + - Martin M. Lauridsen + - Florian Mendel + - Ruben Niederhagen + - Christian Rechberger + - Joost Rijneveld + - Peter Schwabe +implementations: + - name: clean + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + - name: avx2 + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + supported_platforms: + - architecture: x86_64 + required_flags: + - avx2 diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/LICENSE b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/Makefile.Microsoft_nmake new file mode 100644 index 00000000..ac9c3743 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-sha256-256s-robust_avx2.lib +OBJECTS=address.obj wots.obj utils.obj utilsx8.obj sha256avx.obj sha256x8.obj fors.obj sign.obj hash_sha256.obj thash_sha256_robust.obj hash_sha256x8.obj thash_sha256_robustx8.obj sha256.obj + +CFLAGS=/nologo /arch:AVX2 /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/address.c b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/address.c new file mode 100644 index 00000000..d9f2f525 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/address.h b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/address.h new file mode 100644 index 00000000..4c046f9c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDRESS_H +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/api.h b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/api.h new file mode 100644 index 00000000..e2a4910c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_API_H +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_CRYPTO_SECRETKEYBYTES 128 +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES 64 +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_CRYPTO_BYTES 29792 +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_CRYPTO_SEEDBYTES 96 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/fors.c b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/fors.c new file mode 100644 index 00000000..4be789b0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/fors.c @@ -0,0 +1,240 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hashx8.h" +#include "thash.h" +#include "thashx8.h" +#include "utils.h" +#include "utilsx8.h" + +static void fors_gen_skx8(unsigned char *sk0, + unsigned char *sk1, + unsigned char *sk2, + unsigned char *sk3, + unsigned char *sk4, + unsigned char *sk5, + unsigned char *sk6, + unsigned char *sk7, const unsigned char *sk_seed, + uint32_t fors_leaf_addrx8[8 * 8]) { + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_prf_addrx8(sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7, + sk_seed, fors_leaf_addrx8); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_thash_1(leaf, sk, pub_seed, fors_leaf_addr, state_seeded); +} + +static void fors_sk_to_leafx8(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + unsigned char *leaf4, + unsigned char *leaf5, + unsigned char *leaf6, + unsigned char *leaf7, + const unsigned char *sk0, + const unsigned char *sk1, + const unsigned char *sk2, + const unsigned char *sk3, + const unsigned char *sk4, + const unsigned char *sk5, + const unsigned char *sk6, + const unsigned char *sk7, + const unsigned char *pub_seed, + uint32_t fors_leaf_addrx8[8 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_thashx8_1(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7, + pub_seed, fors_leaf_addrx8, state_seeded); +} + +static void fors_gen_leafx8(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + unsigned char *leaf4, + unsigned char *leaf5, + unsigned char *leaf6, + unsigned char *leaf7, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx0, + uint32_t addr_idx1, + uint32_t addr_idx2, + uint32_t addr_idx3, + uint32_t addr_idx4, + uint32_t addr_idx5, + uint32_t addr_idx6, + uint32_t addr_idx7, + const uint32_t fors_tree_addr[8], + const hash_state *state_seeded) { + uint32_t fors_leaf_addrx8[8 * 8] = {0}; + unsigned int j; + + /* Only copy the parts that must be kept in fors_leaf_addrx8. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_copy_keypair_addr(fors_leaf_addrx8 + j * 8, fors_tree_addr); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_type(fors_leaf_addrx8 + j * 8, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 0 * 8, addr_idx0); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 1 * 8, addr_idx1); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 2 * 8, addr_idx2); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 3 * 8, addr_idx3); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 4 * 8, addr_idx4); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 5 * 8, addr_idx5); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 6 * 8, addr_idx6); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_index(fors_leaf_addrx8 + 7 * 8, addr_idx7); + + fors_gen_skx8(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + sk_seed, fors_leaf_addrx8); + fors_sk_to_leafx8(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + pub_seed, fors_leaf_addrx8, state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + /* Round up to multiple of 8 to prevent out-of-bounds for x8 parallelism */ + uint32_t indices[(PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_TREES + 7) & ~7] = {0}; + unsigned char roots[((PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_TREES + 7) & ~7) * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N]; + /* Sign to a buffer, since we may not have a nice multiple of 8 and would + otherwise overrun the signature. */ + unsigned char sigbufx8[8 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N * (1 + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_HEIGHT)]; + uint32_t fors_tree_addrx8[8 * 8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset[8] = {0}; + unsigned int i, j; + + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_copy_keypair_addr(fors_tree_addrx8 + j * 8, fors_addr); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_type(fors_tree_addrx8 + j * 8, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < ((PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_TREES + 7) & ~0x7); i += 8) { + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_TREES) { + idx_offset[j] = (i + j) * (1 << PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_height(fors_tree_addrx8 + j * 8, 0); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_index(fors_tree_addrx8 + j * 8, + indices[i + j] + idx_offset[j]); + } + } + + /* Include the secret key part that produces the selected leaf nodes. */ + fors_gen_skx8(sigbufx8 + 0 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + sigbufx8 + 1 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + sigbufx8 + 2 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + sigbufx8 + 3 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + sigbufx8 + 4 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + sigbufx8 + 5 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + sigbufx8 + 6 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + sigbufx8 + 7 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + sk_seed, fors_tree_addrx8); + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_treehashx8_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, sigbufx8 + 8 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, sk_seed, pub_seed, + &indices[i], idx_offset, fors_gen_leafx8, fors_tree_addrx8, + state_seeded); + + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_TREES) { + memcpy(sig, sigbufx8 + j * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + memcpy(sig + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + sigbufx8 + 8 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + j * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_HEIGHT, + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_HEIGHT); + sig += PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N * (1 + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_HEIGHT); + } + } + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_fors_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_compute_root(roots + i * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, leaf, indices[i], idx_offset, + sig, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_HEIGHT, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/fors.h b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/fors.h new file mode 100644 index 00000000..3d286c96 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_H +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/hash.h b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/hash.h new file mode 100644 index 00000000..61fe75d7 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_HASH_H +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/hash_sha256.c b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/hash_sha256.c new file mode 100644 index 00000000..fffb80d3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/hash_sha256.c @@ -0,0 +1,166 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "sha2.h" +#include "sha256.h" +#include "sha256x8.h" + +/** + * Initializes the hash function states + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed) { + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_seed_state(&hash_state_seeded->x1, pub_seed); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_seed_statex8(&hash_state_seeded->x8, pub_seed); + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/** + * Cleans up the hash function states + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_destroy_hash_function(hash_state *hash_state_seeded) { + sha256_inc_ctx_release(&hash_state_seeded->x1); +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N bytes and an address + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_prf_addr(unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES]; + unsigned char outbuf[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_compress_address(buf + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, addr); + + sha256(outbuf, buf, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed as a key + * for HMAC, and an optional randomization value prefixed to the message. + * This requires m to have at least PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N space + * available in front of the pointer, i.e. before the message to use for the + * prefix. This is necessary to prevent having to move the message around (and + * allocate memory for it). + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES]; + sha256ctx state; + int i; + + /* This implements HMAC-SHA256 */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N; i++) { + buf[i] = 0x36 ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, 0x36, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + + sha256_inc_init(&state); + sha256_inc_blocks(&state, buf, 1); + + memcpy(buf, optrand, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + + /* If optrand + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + mlen < PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES) { + memcpy(buf + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, m, mlen); + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES, &state, + buf, mlen + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(buf + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, m, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + sha256_inc_blocks(&state, buf, 1); + + m += PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N; + mlen -= PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N; + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES, &state, m, mlen); + } + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N; i++) { + buf[i] = 0x5c ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, 0x5c, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + + sha256(buf, buf, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES); + memcpy(R, buf, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_TREE_BITS (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_TREE_HEIGHT * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_D - 1)) +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_TREE_BYTES ((PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_LEAF_BITS PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_LEAF_BYTES ((PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_DGST_BYTES (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_TREE_BYTES + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_LEAF_BYTES) + + unsigned char seed[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES + 4]; + + /* Round to nearest multiple of PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_INBLOCKS (((PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_PK_BYTES + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES - 1) & \ + -PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES) / PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES) + unsigned char inbuf[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES]; + + unsigned char buf[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_DGST_BYTES]; + unsigned char *bufp = buf; + sha256ctx state; + + sha256_inc_init(&state); + + memcpy(inbuf, R, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + memcpy(inbuf + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, pk, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_PK_BYTES); + + /* If R + pk + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_PK_BYTES + mlen < PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES) { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_PK_BYTES, m, mlen); + sha256_inc_finalize(seed, &state, inbuf, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_PK_BYTES + mlen); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_PK_BYTES, m, + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N - PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_PK_BYTES); + sha256_inc_blocks(&state, inbuf, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_INBLOCKS); + + m += PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N - PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_PK_BYTES; + mlen -= PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N - PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_PK_BYTES; + sha256_inc_finalize(seed, &state, m, mlen); + } + + /* By doing this in two steps, we prevent hashing the message twice; + otherwise each iteration in MGF1 would hash the message again. */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_mgf1(bufp, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_DGST_BYTES, seed, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_bytes_to_ull(bufp, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/hash_sha256x8.c b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/hash_sha256x8.c new file mode 100644 index 00000000..a57b923f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/hash_sha256x8.c @@ -0,0 +1,61 @@ +#include +#include + +#include "address.h" +#include "hashx8.h" +#include "params.h" +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "utils.h" + +/* + * 8-way parallel version of prf_addr; takes 8x as much input and output + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_prf_addrx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *key, + const uint32_t addrx8[8 * 8]) { + unsigned char bufx8[8 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES)]; + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned int j; + + for (j = 0; j < 8; j++) { + memcpy(bufx8 + j * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES), key, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_compress_address(bufx8 + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + j * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES), + addrx8 + j * 8); + } + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256x8(outbufx8 + 0 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, + bufx8 + 0 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 1 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 2 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 3 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 4 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 5 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 6 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES), + bufx8 + 7 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES), + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES); + + memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/hash_state.h b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/hash_state.h new file mode 100644 index 00000000..47470f23 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/hash_state.h @@ -0,0 +1,33 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_HASH_STATE_H +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * We use a struct to differentiate between the x1 and x8 variants of SHA256. + */ + +#include "sha2.h" +#include "sha256avx.h" + +typedef struct { + sha256ctx x1; + sha256ctxx8 x8; +} hash_state; + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/hashx8.h b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/hashx8.h new file mode 100644 index 00000000..30d23216 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/hashx8.h @@ -0,0 +1,19 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_HASHX8_H +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_HASHX8_H + +#include + +#include "params.h" + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_prf_addrx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *key, + const uint32_t addrx8[8 * 8]); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/params.h b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/params.h new file mode 100644 index 00000000..80f9071c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_PARAMS_H +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N 32 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FULL_HEIGHT 64 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_D 8 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_HEIGHT 14 +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_TREES 22 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N / PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LEN (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LEN1 + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_BYTES (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_PK_BYTES PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_TREE_HEIGHT (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FULL_HEIGHT / PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_BYTES ((PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_PK_BYTES PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_BYTES (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_BYTES + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_D * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FULL_HEIGHT * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_PK_BYTES (2 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SK_BYTES (2 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/sha256.c b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/sha256.c new file mode 100644 index 00000000..b7a64263 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/sha256.c @@ -0,0 +1,71 @@ +/* Based on the public domain implementation in + * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html + * by D. J. Bernstein */ + +#include +#include +#include + +#include "sha2.h" +#include "sha256.h" +#include "utils.h" + +/* + * Compresses an address to a 22-byte sequence. + * This reduces the number of required SHA256 compression calls, as the last + * block of input is padded with at least 65 bits. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_compress_address(unsigned char *out, const uint32_t addr[8]) { + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ull_to_bytes(out, 1, addr[0]); /* drop 3 bytes of the layer field */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ull_to_bytes(out + 1, 4, addr[2]); /* drop the highest tree address word */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ull_to_bytes(out + 5, 4, addr[3]); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ull_to_bytes(out + 9, 1, addr[4]); /* drop 3 bytes of the type field */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ull_to_bytes(out + 10, 4, addr[5]); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ull_to_bytes(out + 14, 4, addr[6]); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ull_to_bytes(out + 18, 4, addr[7]); +} + +/** + * Requires 'input_plus_four_bytes' to have 'inlen' + 4 bytes, so that the last + * four bytes can be used for the counter. Typically 'input' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen) { + unsigned char outbuf[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned long i; + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES <= outlen; i++) { + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(out, input_plus_four_bytes, inlen + 4); + out += PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + if (outlen > i * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES) { + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(outbuf, input_plus_four_bytes, inlen + 4); + memcpy(out, outbuf, outlen - i * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES); + } +} + + +/** + * Absorb the constant pub_seed using one round of the compression function + * This initializes hash_state_seeded, which can then be reused in thash + **/ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N; i < PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + sha256_inc_init(hash_state_seeded); + sha256_inc_blocks(hash_state_seeded, block, 1); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/sha256.h b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/sha256.h new file mode 100644 index 00000000..f627451b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/sha256.h @@ -0,0 +1,21 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_H +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_H + +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES 22 + +#include +#include + +#include "sha2.h" + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_compress_address(unsigned char *out, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/sha256avx.c b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/sha256avx.c new file mode 100644 index 00000000..02fd73ce --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/sha256avx.c @@ -0,0 +1,296 @@ +#include +#include +#include + +#include "sha256avx.h" + +// Transpose 8 vectors containing 32-bit values +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_transpose(u256 s[8]) { + u256 tmp0[8]; + u256 tmp1[8]; + tmp0[0] = _mm256_unpacklo_epi32(s[0], s[1]); + tmp0[1] = _mm256_unpackhi_epi32(s[0], s[1]); + tmp0[2] = _mm256_unpacklo_epi32(s[2], s[3]); + tmp0[3] = _mm256_unpackhi_epi32(s[2], s[3]); + tmp0[4] = _mm256_unpacklo_epi32(s[4], s[5]); + tmp0[5] = _mm256_unpackhi_epi32(s[4], s[5]); + tmp0[6] = _mm256_unpacklo_epi32(s[6], s[7]); + tmp0[7] = _mm256_unpackhi_epi32(s[6], s[7]); + tmp1[0] = _mm256_unpacklo_epi64(tmp0[0], tmp0[2]); + tmp1[1] = _mm256_unpackhi_epi64(tmp0[0], tmp0[2]); + tmp1[2] = _mm256_unpacklo_epi64(tmp0[1], tmp0[3]); + tmp1[3] = _mm256_unpackhi_epi64(tmp0[1], tmp0[3]); + tmp1[4] = _mm256_unpacklo_epi64(tmp0[4], tmp0[6]); + tmp1[5] = _mm256_unpackhi_epi64(tmp0[4], tmp0[6]); + tmp1[6] = _mm256_unpacklo_epi64(tmp0[5], tmp0[7]); + tmp1[7] = _mm256_unpackhi_epi64(tmp0[5], tmp0[7]); + s[0] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x20); + s[1] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x20); + s[2] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x20); + s[3] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x20); + s[4] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x31); + s[5] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x31); + s[6] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x31); + s[7] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x31); +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256_clone_statex8(sha256ctxx8 *outctx, const sha256ctxx8 *inctx) { + memcpy(outctx, inctx, sizeof(sha256ctxx8)); +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256_init8x(sha256ctxx8 *ctx) { + ctx->s[0] = _mm256_set_epi32((int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667); + ctx->s[1] = _mm256_set_epi32((int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85); + ctx->s[2] = _mm256_set_epi32((int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372); + ctx->s[3] = _mm256_set_epi32((int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a); + ctx->s[4] = _mm256_set_epi32((int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f); + ctx->s[5] = _mm256_set_epi32((int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c); + ctx->s[6] = _mm256_set_epi32((int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab); + ctx->s[7] = _mm256_set_epi32((int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19); + + ctx->datalen = 0; + ctx->msglen = 0; +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256_update8x(sha256ctxx8 *ctx, + const unsigned char *d0, + const unsigned char *d1, + const unsigned char *d2, + const unsigned char *d3, + const unsigned char *d4, + const unsigned char *d5, + const unsigned char *d6, + const unsigned char *d7, + unsigned long long len) { + size_t i = 0; + size_t bytes_to_copy; + + while (i < len) { + bytes_to_copy = (size_t)len - i; + if (bytes_to_copy > 64) { + bytes_to_copy = 64; + } + memcpy(&ctx->msgblocks[64 * 0], d0 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 1], d1 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 2], d2 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 3], d3 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 4], d4 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 5], d5 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 6], d6 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 7], d7 + i, bytes_to_copy); + ctx->datalen += (unsigned int)bytes_to_copy; + i += bytes_to_copy; + if (ctx->datalen == 64) { + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + ctx->msglen += 512; + ctx->datalen = 0; + } + } +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256_final8x(sha256ctxx8 *ctx, + unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7) { + unsigned int i, curlen; + + // Padding + if (ctx->datalen < 56) { + for (i = 0; i < 8; ++i) { + curlen = ctx->datalen; + ctx->msgblocks[64 * i + curlen++] = 0x80; + while (curlen < 64) { + ctx->msgblocks[64 * i + curlen++] = 0x00; + } + } + } else { + for (i = 0; i < 8; ++i) { + curlen = ctx->datalen; + ctx->msgblocks[64 * i + curlen++] = 0x80; + while (curlen < 64) { + ctx->msgblocks[64 * i + curlen++] = 0x00; + } + } + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + memset(ctx->msgblocks, 0, 8 * 64); + } + + // Add length of the message to each block + ctx->msglen += ctx->datalen * 8; + for (i = 0; i < 8; i++) { + ctx->msgblocks[64 * i + 63] = (unsigned char)ctx->msglen; + ctx->msgblocks[64 * i + 62] = (unsigned char)(ctx->msglen >> 8); + ctx->msgblocks[64 * i + 61] = (unsigned char)(ctx->msglen >> 16); + ctx->msgblocks[64 * i + 60] = (unsigned char)(ctx->msglen >> 24); + ctx->msgblocks[64 * i + 59] = (unsigned char)(ctx->msglen >> 32); + ctx->msgblocks[64 * i + 58] = (unsigned char)(ctx->msglen >> 40); + ctx->msgblocks[64 * i + 57] = (unsigned char)(ctx->msglen >> 48); + ctx->msgblocks[64 * i + 56] = (unsigned char)(ctx->msglen >> 56); + } + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + + // Compute final hash output + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_transpose(ctx->s); + + // Store Hash value + STORE(out0, BYTESWAP(ctx->s[0])); + STORE(out1, BYTESWAP(ctx->s[1])); + STORE(out2, BYTESWAP(ctx->s[2])); + STORE(out3, BYTESWAP(ctx->s[3])); + STORE(out4, BYTESWAP(ctx->s[4])); + STORE(out5, BYTESWAP(ctx->s[5])); + STORE(out6, BYTESWAP(ctx->s[6])); + STORE(out7, BYTESWAP(ctx->s[7])); +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256_transform8x(sha256ctxx8 *ctx, const unsigned char *data) { + u256 s[8], w[64], T0, T1; + int i; + + // Load words and transform data correctly + for (i = 0; i < 8; i++) { + w[i] = BYTESWAP(LOAD(data + 64 * i)); + w[i + 8] = BYTESWAP(LOAD(data + 32 + 64 * i)); + } + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_transpose(w); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_transpose(w + 8); + + // Initial State + s[0] = ctx->s[0]; + s[1] = ctx->s[1]; + s[2] = ctx->s[2]; + s[3] = ctx->s[3]; + s[4] = ctx->s[4]; + s[5] = ctx->s[5]; + s[6] = ctx->s[6]; + s[7] = ctx->s[7]; + + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 0, w[0]); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 1, w[1]); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 2, w[2]); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 3, w[3]); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 4, w[4]); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 5, w[5]); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 6, w[6]); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 7, w[7]); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 8, w[8]); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 9, w[9]); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 10, w[10]); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 11, w[11]); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 12, w[12]); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 13, w[13]); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 14, w[14]); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 15, w[15]); + w[16] = ADD4_32(WSIGMA1_AVX(w[14]), w[0], w[9], WSIGMA0_AVX(w[1])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 16, w[16]); + w[17] = ADD4_32(WSIGMA1_AVX(w[15]), w[1], w[10], WSIGMA0_AVX(w[2])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 17, w[17]); + w[18] = ADD4_32(WSIGMA1_AVX(w[16]), w[2], w[11], WSIGMA0_AVX(w[3])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 18, w[18]); + w[19] = ADD4_32(WSIGMA1_AVX(w[17]), w[3], w[12], WSIGMA0_AVX(w[4])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 19, w[19]); + w[20] = ADD4_32(WSIGMA1_AVX(w[18]), w[4], w[13], WSIGMA0_AVX(w[5])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 20, w[20]); + w[21] = ADD4_32(WSIGMA1_AVX(w[19]), w[5], w[14], WSIGMA0_AVX(w[6])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 21, w[21]); + w[22] = ADD4_32(WSIGMA1_AVX(w[20]), w[6], w[15], WSIGMA0_AVX(w[7])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 22, w[22]); + w[23] = ADD4_32(WSIGMA1_AVX(w[21]), w[7], w[16], WSIGMA0_AVX(w[8])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 23, w[23]); + w[24] = ADD4_32(WSIGMA1_AVX(w[22]), w[8], w[17], WSIGMA0_AVX(w[9])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 24, w[24]); + w[25] = ADD4_32(WSIGMA1_AVX(w[23]), w[9], w[18], WSIGMA0_AVX(w[10])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 25, w[25]); + w[26] = ADD4_32(WSIGMA1_AVX(w[24]), w[10], w[19], WSIGMA0_AVX(w[11])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 26, w[26]); + w[27] = ADD4_32(WSIGMA1_AVX(w[25]), w[11], w[20], WSIGMA0_AVX(w[12])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 27, w[27]); + w[28] = ADD4_32(WSIGMA1_AVX(w[26]), w[12], w[21], WSIGMA0_AVX(w[13])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 28, w[28]); + w[29] = ADD4_32(WSIGMA1_AVX(w[27]), w[13], w[22], WSIGMA0_AVX(w[14])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 29, w[29]); + w[30] = ADD4_32(WSIGMA1_AVX(w[28]), w[14], w[23], WSIGMA0_AVX(w[15])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 30, w[30]); + w[31] = ADD4_32(WSIGMA1_AVX(w[29]), w[15], w[24], WSIGMA0_AVX(w[16])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 31, w[31]); + w[32] = ADD4_32(WSIGMA1_AVX(w[30]), w[16], w[25], WSIGMA0_AVX(w[17])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 32, w[32]); + w[33] = ADD4_32(WSIGMA1_AVX(w[31]), w[17], w[26], WSIGMA0_AVX(w[18])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 33, w[33]); + w[34] = ADD4_32(WSIGMA1_AVX(w[32]), w[18], w[27], WSIGMA0_AVX(w[19])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 34, w[34]); + w[35] = ADD4_32(WSIGMA1_AVX(w[33]), w[19], w[28], WSIGMA0_AVX(w[20])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 35, w[35]); + w[36] = ADD4_32(WSIGMA1_AVX(w[34]), w[20], w[29], WSIGMA0_AVX(w[21])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 36, w[36]); + w[37] = ADD4_32(WSIGMA1_AVX(w[35]), w[21], w[30], WSIGMA0_AVX(w[22])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 37, w[37]); + w[38] = ADD4_32(WSIGMA1_AVX(w[36]), w[22], w[31], WSIGMA0_AVX(w[23])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 38, w[38]); + w[39] = ADD4_32(WSIGMA1_AVX(w[37]), w[23], w[32], WSIGMA0_AVX(w[24])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 39, w[39]); + w[40] = ADD4_32(WSIGMA1_AVX(w[38]), w[24], w[33], WSIGMA0_AVX(w[25])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 40, w[40]); + w[41] = ADD4_32(WSIGMA1_AVX(w[39]), w[25], w[34], WSIGMA0_AVX(w[26])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 41, w[41]); + w[42] = ADD4_32(WSIGMA1_AVX(w[40]), w[26], w[35], WSIGMA0_AVX(w[27])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 42, w[42]); + w[43] = ADD4_32(WSIGMA1_AVX(w[41]), w[27], w[36], WSIGMA0_AVX(w[28])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 43, w[43]); + w[44] = ADD4_32(WSIGMA1_AVX(w[42]), w[28], w[37], WSIGMA0_AVX(w[29])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 44, w[44]); + w[45] = ADD4_32(WSIGMA1_AVX(w[43]), w[29], w[38], WSIGMA0_AVX(w[30])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 45, w[45]); + w[46] = ADD4_32(WSIGMA1_AVX(w[44]), w[30], w[39], WSIGMA0_AVX(w[31])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 46, w[46]); + w[47] = ADD4_32(WSIGMA1_AVX(w[45]), w[31], w[40], WSIGMA0_AVX(w[32])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 47, w[47]); + w[48] = ADD4_32(WSIGMA1_AVX(w[46]), w[32], w[41], WSIGMA0_AVX(w[33])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 48, w[48]); + w[49] = ADD4_32(WSIGMA1_AVX(w[47]), w[33], w[42], WSIGMA0_AVX(w[34])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 49, w[49]); + w[50] = ADD4_32(WSIGMA1_AVX(w[48]), w[34], w[43], WSIGMA0_AVX(w[35])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 50, w[50]); + w[51] = ADD4_32(WSIGMA1_AVX(w[49]), w[35], w[44], WSIGMA0_AVX(w[36])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 51, w[51]); + w[52] = ADD4_32(WSIGMA1_AVX(w[50]), w[36], w[45], WSIGMA0_AVX(w[37])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 52, w[52]); + w[53] = ADD4_32(WSIGMA1_AVX(w[51]), w[37], w[46], WSIGMA0_AVX(w[38])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 53, w[53]); + w[54] = ADD4_32(WSIGMA1_AVX(w[52]), w[38], w[47], WSIGMA0_AVX(w[39])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 54, w[54]); + w[55] = ADD4_32(WSIGMA1_AVX(w[53]), w[39], w[48], WSIGMA0_AVX(w[40])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 55, w[55]); + w[56] = ADD4_32(WSIGMA1_AVX(w[54]), w[40], w[49], WSIGMA0_AVX(w[41])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 56, w[56]); + w[57] = ADD4_32(WSIGMA1_AVX(w[55]), w[41], w[50], WSIGMA0_AVX(w[42])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 57, w[57]); + w[58] = ADD4_32(WSIGMA1_AVX(w[56]), w[42], w[51], WSIGMA0_AVX(w[43])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 58, w[58]); + w[59] = ADD4_32(WSIGMA1_AVX(w[57]), w[43], w[52], WSIGMA0_AVX(w[44])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 59, w[59]); + w[60] = ADD4_32(WSIGMA1_AVX(w[58]), w[44], w[53], WSIGMA0_AVX(w[45])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 60, w[60]); + w[61] = ADD4_32(WSIGMA1_AVX(w[59]), w[45], w[54], WSIGMA0_AVX(w[46])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 61, w[61]); + w[62] = ADD4_32(WSIGMA1_AVX(w[60]), w[46], w[55], WSIGMA0_AVX(w[47])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 62, w[62]); + w[63] = ADD4_32(WSIGMA1_AVX(w[61]), w[47], w[56], WSIGMA0_AVX(w[48])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 63, w[63]); + + // Feed Forward + ctx->s[0] = ADD32(s[0], ctx->s[0]); + ctx->s[1] = ADD32(s[1], ctx->s[1]); + ctx->s[2] = ADD32(s[2], ctx->s[2]); + ctx->s[3] = ADD32(s[3], ctx->s[3]); + ctx->s[4] = ADD32(s[4], ctx->s[4]); + ctx->s[5] = ADD32(s[5], ctx->s[5]); + ctx->s[6] = ADD32(s[6], ctx->s[6]); + ctx->s[7] = ADD32(s[7], ctx->s[7]); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/sha256avx.h b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/sha256avx.h new file mode 100644 index 00000000..17b36890 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/sha256avx.h @@ -0,0 +1,103 @@ +#ifndef SHA256AVX_H +#define SHA256AVX_H + +#include +#include + +static const unsigned int RC[] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +#define u32 uint32_t +#define u256 __m256i + +#define XOR _mm256_xor_si256 +#define OR _mm256_or_si256 +#define AND _mm256_and_si256 +#define ADD32 _mm256_add_epi32 +#define NOT(x) _mm256_xor_si256(x, _mm256_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1)) + +#define LOAD(src) _mm256_loadu_si256((__m256i *)(src)) +#define STORE(dest,src) _mm256_storeu_si256((__m256i *)(dest),src) + +#define BYTESWAP(x) _mm256_shuffle_epi8(x, _mm256_set_epi8(0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3,0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3)) + +#define SHIFTR32(x, y) _mm256_srli_epi32(x, y) +#define SHIFTL32(x, y) _mm256_slli_epi32(x, y) + +#define ROTR32(x, y) OR(SHIFTR32(x, y), SHIFTL32(x, 32 - (y))) +#define ROTL32(x, y) OR(SHIFTL32(x, y), SHIFTR32(x, 32 - (y))) + +#define XOR3(a, b, c) XOR(XOR(a, b), c) + +#define ADD3_32(a, b, c) ADD32(ADD32(a, b), c) +#define ADD4_32(a, b, c, d) ADD32(ADD32(ADD32(a, b), c), d) +#define ADD5_32(a, b, c, d, e) ADD32(ADD32(ADD32(ADD32(a, b), c), d), e) + +#define MAJ_AVX(a, b, c) XOR3(AND(a, b), AND(a, c), AND(b, c)) +#define CH_AVX(a, b, c) XOR(AND(a, b), AND(NOT(a), c)) + +#define SIGMA1_AVX(x) XOR3(ROTR32(x, 6), ROTR32(x, 11), ROTR32(x, 25)) +#define SIGMA0_AVX(x) XOR3(ROTR32(x, 2), ROTR32(x, 13), ROTR32(x, 22)) + +#define WSIGMA1_AVX(x) XOR3(ROTR32(x, 17), ROTR32(x, 19), SHIFTR32(x, 10)) +#define WSIGMA0_AVX(x) XOR3(ROTR32(x, 7), ROTR32(x, 18), SHIFTR32(x, 3)) + +#define SHA256ROUND_AVX(a, b, c, d, e, f, g, h, rc, w) \ + T0 = ADD5_32(h, SIGMA1_AVX(e), CH_AVX(e, f, g), _mm256_set1_epi32((int)RC[rc]), w); \ + (d) = ADD32(d, T0); \ + T1 = ADD32(SIGMA0_AVX(a), MAJ_AVX(a, b, c)); \ + (h) = ADD32(T0, T1); + +typedef struct SHA256state { + u256 s[8]; + uint8_t msgblocks[8 * 64]; + unsigned int datalen; + uint64_t msglen; +} sha256ctxx8; + + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_transpose(u256 s[8]); +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256_init_frombytes_x8(sha256ctxx8 *ctx, const uint8_t *s, unsigned long long msglen); +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256_init8x(sha256ctxx8 *ctx); +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256_update8x(sha256ctxx8 *ctx, + const unsigned char *d0, + const unsigned char *d1, + const unsigned char *d2, + const unsigned char *d3, + const unsigned char *d4, + const unsigned char *d5, + const unsigned char *d6, + const unsigned char *d7, + unsigned long long len); +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256_final8x(sha256ctxx8 *ctx, + unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7); + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256_transform8x(sha256ctxx8 *ctx, const unsigned char *data); + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256_clone_statex8(sha256ctxx8 *outctx, const sha256ctxx8 *inctx); + + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/sha256x8.c b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/sha256x8.c new file mode 100644 index 00000000..b62aa76f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/sha256x8.c @@ -0,0 +1,128 @@ +#include + +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_seed_statex8(sha256ctxx8 *ctx, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N; i < PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256_init8x(ctx); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256_update8x(ctx, block, block, block, block, block, block, block, block, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES); + +} + +/* This provides a wrapper around the internals of 8x parallel SHA256 */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256x8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned long long inlen) { + sha256ctxx8 ctx; + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256_init8x(&ctx); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256_update8x(&ctx, in0, in1, in2, in3, in4, in5, in6, in7, inlen); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256_final8x(&ctx, out0, out1, out2, out3, out4, out5, out6, out7); +} + +/** + * Note that inlen should be sufficiently small that it still allows for + * an array to be allocated on the stack. Typically 'in' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_mgf1x8( + unsigned char *outx8, + unsigned long outlen, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, + unsigned long inlen) { + unsigned char inbufx8[8 * ((PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES) + 4)]; + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned long i; + unsigned int j; + + memcpy(inbufx8 + 0 * (inlen + 4), in0, inlen); + memcpy(inbufx8 + 1 * (inlen + 4), in1, inlen); + memcpy(inbufx8 + 2 * (inlen + 4), in2, inlen); + memcpy(inbufx8 + 3 * (inlen + 4), in3, inlen); + memcpy(inbufx8 + 4 * (inlen + 4), in4, inlen); + memcpy(inbufx8 + 5 * (inlen + 4), in5, inlen); + memcpy(inbufx8 + 6 * (inlen + 4), in6, inlen); + memcpy(inbufx8 + 7 * (inlen + 4), in7, inlen); + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES <= outlen; i++) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ull_to_bytes(inbufx8 + inlen + j * (inlen + 4), 4, i); + } + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256x8(outx8 + 0 * outlen, + outx8 + 1 * outlen, + outx8 + 2 * outlen, + outx8 + 3 * outlen, + outx8 + 4 * outlen, + outx8 + 5 * outlen, + outx8 + 6 * outlen, + outx8 + 7 * outlen, + inbufx8 + 0 * (inlen + 4), + inbufx8 + 1 * (inlen + 4), + inbufx8 + 2 * (inlen + 4), + inbufx8 + 3 * (inlen + 4), + inbufx8 + 4 * (inlen + 4), + inbufx8 + 5 * (inlen + 4), + inbufx8 + 6 * (inlen + 4), + inbufx8 + 7 * (inlen + 4), inlen + 4); + outx8 += PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ull_to_bytes(inbufx8 + inlen + j * (inlen + 4), 4, i); + } + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256x8(outbufx8 + 0 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, + inbufx8 + 0 * (inlen + 4), + inbufx8 + 1 * (inlen + 4), + inbufx8 + 2 * (inlen + 4), + inbufx8 + 3 * (inlen + 4), + inbufx8 + 4 * (inlen + 4), + inbufx8 + 5 * (inlen + 4), + inbufx8 + 6 * (inlen + 4), + inbufx8 + 7 * (inlen + 4), inlen + 4); + + for (j = 0; j < 8; j++) { + memcpy(outx8 + j * outlen, + outbufx8 + j * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outlen - i * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/sha256x8.h b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/sha256x8.h new file mode 100644 index 00000000..9d45c625 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/sha256x8.h @@ -0,0 +1,44 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256X8_H +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256X8_H + +#include "sha256avx.h" + +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N */ + +/* This provides a wrapper around the internals of 8x parallel SHA256 */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256x8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned long long inlen); + +/** + * Note that inlen should be sufficiently small that it still allows for + * an array to be allocated on the stack. Typically 'in' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_mgf1x8(unsigned char *outx8, unsigned long outlen, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, + unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_seed_statex8(sha256ctxx8 *ctx, const unsigned char *pub_seed); +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/sign.c b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/sign.c new file mode 100644 index 00000000..a7b839d0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_D - 1); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, sk + 3 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N; + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_D; i++) { + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_BYTES; + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N; + unsigned char mhash[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_D; i++) { + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_BYTES; + + if (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_BYTES, sm + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/thash.h b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/thash.h new file mode 100644 index 00000000..8db771a0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_THASH_H +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/thash_sha256_robust.c b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/thash_sha256_robust.c new file mode 100644 index 00000000..b8b0bdc6 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/thash_sha256_robust.c @@ -0,0 +1,78 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "sha2.h" +#include "sha256.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N bytes. + */ +static void thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char outbuf[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned char *bitmask = buf + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + 4; + sha256ctx sha2_state; + unsigned int i; + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_compress_address(buf + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, addr); + /* MGF1 requires us to have 4 extra bytes in 'buf' */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_mgf1(bitmask, inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, buf, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES); + + /* Retrieve precomputed state containing pub_seed */ + sha256_inc_ctx_clone(&sha2_state, &hash_state_seeded->x1); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N; i++) { + buf[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + i] = in[i] ^ bitmask[i]; + } + + sha256_inc_finalize(outbuf, &sha2_state, buf + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + 4 + 1 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N]; + thash(out, buf, in, 1, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + 4 + 2 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N]; + thash(out, buf, in, 2, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + 4 + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N]; + thash(out, buf, in, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LEN, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + 4 + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N]; + thash(out, buf, in, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_TREES, pub_seed, addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/thash_sha256_robustx8.c b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/thash_sha256_robustx8.c new file mode 100644 index 00000000..14b8e19c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/thash_sha256_robustx8.c @@ -0,0 +1,156 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "thashx8.h" +#include "utils.h" + +/** + * 8-way parallel version of thash; takes 8x as much input and output + */ +static void thashx8(uint8_t *out0, + uint8_t *out1, + uint8_t *out2, + uint8_t *out3, + uint8_t *out4, + uint8_t *out5, + uint8_t *out6, + uint8_t *out7, + const uint8_t *in0, + const uint8_t *in1, + const uint8_t *in2, + const uint8_t *in3, + const uint8_t *in4, + const uint8_t *in5, + const uint8_t *in6, + const uint8_t *in7, + unsigned int inblocks, + const uint8_t *pub_seed, + uint32_t addrx8[8 * 8], + uint8_t *bufx8, + uint8_t *bitmaskx8, + const hash_state *state_seeded) { + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES]; + unsigned int i; + sha256ctxx8 ctx; + + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + + for (i = 0; i < 8; i++) { + memcpy(bufx8 + i * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N), + pub_seed, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_compress_address(bufx8 + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + + i * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N), + addrx8 + i * 8); + } + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_mgf1x8(bitmaskx8, inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + bufx8 + 0 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N), + bufx8 + 1 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N), + bufx8 + 2 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N), + bufx8 + 3 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N), + bufx8 + 4 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N), + bufx8 + 5 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N), + bufx8 + 6 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N), + bufx8 + 7 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N), + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + ); + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256_clone_statex8(&ctx, &state_seeded->x8); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N; i++) { + bufx8[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + i + + 0 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N)] = + in0[i] ^ bitmaskx8[i + 0 * (inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + i + + 1 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N)] = + in1[i] ^ bitmaskx8[i + 1 * (inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + i + + 2 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N)] = + in2[i] ^ bitmaskx8[i + 2 * (inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + i + + 3 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N)] = + in3[i] ^ bitmaskx8[i + 3 * (inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + i + + 4 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N)] = + in4[i] ^ bitmaskx8[i + 4 * (inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + i + + 5 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N)] = + in5[i] ^ bitmaskx8[i + 5 * (inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + i + + 6 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N)] = + in6[i] ^ bitmaskx8[i + 6 * (inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N)]; + bufx8[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + i + + 7 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N)] = + in7[i] ^ bitmaskx8[i + 7 * (inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N)]; + } + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256_update8x(&ctx, + bufx8 + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + 0 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + 1 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + 2 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + 3 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + 4 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + 5 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + 6 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N), + bufx8 + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + 7 * (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N), + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_sha256_final8x(&ctx, + outbufx8 + 0 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES); + + memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); +} + +#define thash_size_variant(name, size) \ + void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_thashx8_##name(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + unsigned char *out4, \ + unsigned char *out5, \ + unsigned char *out6, \ + unsigned char *out7, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *in4, \ + const unsigned char *in5, \ + const unsigned char *in6, \ + const unsigned char *in7, \ + const unsigned char *pub_seed, \ + uint32_t addrx8[8*8], \ + const hash_state *state_seeded) { \ + const unsigned int inblocks = (size); \ + uint8_t bufx8[8*(PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_SHA256_ADDR_BYTES + (size)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N)]; \ + uint8_t bitmaskx8[8*((size) * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N)]; \ + thashx8(out0, out1, out2, out3, out4, out5, out6, out7, \ + in0, in1, in2, in3, in4, in5, in6, in7, inblocks, \ + pub_seed, addrx8, bufx8, bitmaskx8, state_seeded); \ + } + +thash_size_variant(1, 1) +thash_size_variant(2, 2) +thash_size_variant(WOTS_LEN, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LEN) +thash_size_variant(FORS_TREES, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_TREES) + +#undef thash_size_variant diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/thashx8.h b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/thashx8.h new file mode 100644 index 00000000..cf5c0616 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/thashx8.h @@ -0,0 +1,39 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_THASHX8_H +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_THASHX8_H + +#include + +#include "hash_state.h" +#include "sha256avx.h" + + +#define thashx8_variant(name) \ + void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_thashx8_##name( \ + unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + unsigned char *out4, \ + unsigned char *out5, \ + unsigned char *out6, \ + unsigned char *out7, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *in4, \ + const unsigned char *in5, \ + const unsigned char *in6, \ + const unsigned char *in7, \ + const unsigned char *pub_seed, \ + uint32_t addrx8[8*8], \ + const hash_state *state_seeded) + + +thashx8_variant(1); +thashx8_variant(2); +thashx8_variant(WOTS_LEN); +thashx8_variant(FORS_TREES); + +#undef thashx8_variant +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/utils.c b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/utils.c new file mode 100644 index 00000000..943db620 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, leaf, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + memcpy(buffer + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, auth_path, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_thash_2( + buffer + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + } else { + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, auth_path, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/utils.h b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/utils.h new file mode 100644 index 00000000..fbf19ec2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_UTILS_H +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/utilsx8.c b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/utilsx8.c new file mode 100644 index 00000000..7a7843f9 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/utilsx8.c @@ -0,0 +1,172 @@ +#include + +#include "address.h" +#include "params.h" +#include "thashx8.h" +#include "utils.h" + +#include "utilsx8.h" + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void treehashx8(unsigned char *rootx8, unsigned char *auth_pathx8, + unsigned char *stackx8, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t leaf_idx[8], uint32_t idx_offset[8], + uint32_t tree_height, + void (*gen_leafx8)( + unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + unsigned char * /* leaf4 */, + unsigned char * /* leaf5 */, + unsigned char * /* leaf6 */, + unsigned char * /* leaf7 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + uint32_t /* addr_idx4 */, + uint32_t /* addr_idx5 */, + uint32_t /* addr_idx6 */, + uint32_t /* addr_idx7 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx8[8 * 8], + const hash_state *state_seeded) { + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + unsigned int j; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leafx8(stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + sk_seed, pub_seed, + idx + idx_offset[0], + idx + idx_offset[1], + idx + idx_offset[2], + idx + idx_offset[3], + idx + idx_offset[4], + idx + idx_offset[5], + idx + idx_offset[6], + idx + idx_offset[7], + tree_addrx8, + state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + for (j = 0; j < 8; j++) { + if ((leaf_idx[j] ^ 0x1) == idx) { + memcpy(auth_pathx8 + j * tree_height * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + (offset - 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + } + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_height(tree_addrx8 + j * 8, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_tree_index(tree_addrx8 + j * 8, + tree_idx + (idx_offset[j] >> (heights[offset - 1] + 1))); + } + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_thashx8_2(stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + pub_seed, tree_addrx8, state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + for (j = 0; j < 8; j++) { + if (((leaf_idx[j] >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_pathx8 + j * tree_height * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + heights[offset - 1]*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N + (offset - 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + } + } + } + } + + for (j = 0; j < 8; j++) { + memcpy(rootx8 + j * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + } +} + +/* The wrappers below ensure we used fixed-size buffers on the stack (no VLAs) */ + + +#define treehashx8_variant(name, size) \ + void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_treehashx8_##name( \ + unsigned char *rootx8, unsigned char *auth_pathx8, \ + const unsigned char *sk_seed, const unsigned char *pub_seed, \ + const uint32_t leaf_idx[8], uint32_t idx_offset[8], \ + void (*gen_leafx8)( \ + unsigned char* /* leaf0 */, \ + unsigned char* /* leaf1 */, \ + unsigned char* /* leaf2 */, \ + unsigned char* /* leaf3 */, \ + unsigned char* /* leaf4 */, \ + unsigned char* /* leaf5 */, \ + unsigned char* /* leaf6 */, \ + unsigned char* /* leaf7 */, \ + const unsigned char* /* sk_seed */, \ + const unsigned char* /* pub_seed */, \ + uint32_t /* addr_idx0 */, \ + uint32_t /* addr_idx1 */, \ + uint32_t /* addr_idx2 */, \ + uint32_t /* addr_idx3 */, \ + uint32_t /* addr_idx4 */, \ + uint32_t /* addr_idx5 */, \ + uint32_t /* addr_idx6 */, \ + uint32_t /* addr_idx7 */, \ + const uint32_t[8] /* tree_addr */, \ + const hash_state* /* state_seeded */), \ + uint32_t tree_addrx8[8*8], \ + const hash_state *state_seeded) \ + { \ + const uint32_t tree_height = (size); \ + unsigned char stackx8[8*((size) + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N]; \ + unsigned int heights[(size) + 1]; \ + treehashx8(rootx8, auth_pathx8, stackx8, heights, sk_seed, pub_seed, \ + leaf_idx, idx_offset, tree_height, gen_leafx8, tree_addrx8, state_seeded); \ + } + +treehashx8_variant(FORS_HEIGHT, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_FORS_HEIGHT) + +#undef treehashx8_variant diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/utilsx8.h b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/utilsx8.h new file mode 100644 index 00000000..6e27f56d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/utilsx8.h @@ -0,0 +1,46 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_UTILSX8_H +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_UTILSX8_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ + +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_treehashx8_FORS_HEIGHT( + unsigned char *rootx8, unsigned char *auth_pathx8, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t leaf_idx[8], uint32_t idx_offset[8], + void (*gen_leafx8)( + unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + unsigned char * /* leaf4 */, + unsigned char * /* leaf5 */, + unsigned char * /* leaf6 */, + unsigned char * /* leaf7 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + uint32_t /* addr_idx4 */, + uint32_t /* addr_idx5 */, + uint32_t /* addr_idx6 */, + uint32_t /* addr_idx7 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx8[8 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/wots.c b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/wots.c new file mode 100644 index 00000000..b665943e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/wots.c @@ -0,0 +1,240 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "hashx8.h" +#include "params.h" +#include "thash.h" +#include "thashx8.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], const hash_state *state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_prf_addr(sk, sk_seed, wots_addr, state_seeded); +} + +/** + * 8-way parallel version of wots_gen_sk; expects 8x as much space in sk + */ +static void wots_gen_skx8(unsigned char *skx8, const unsigned char *sk_seed, + uint32_t wots_addrx8[8 * 8]) { + unsigned int j; + + /* Make sure that the hash address is actually zeroed. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_hash_addr(wots_addrx8 + j * 8, 0); + } + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_prf_addrx8(skx8 + 0 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + skx8 + 1 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + skx8 + 2 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + skx8 + 3 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + skx8 + 4 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + skx8 + 5 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + skx8 + 6 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + skx8 + 7 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + sk_seed, wots_addrx8); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_W; i++) { + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_thash_1(out, out, pub_seed, addr, state_seeded); + } +} + +/** + * 8-way parallel version of gen_chain; expects 8x as much space in out, and + * 8x as much space in inx8. Assumes start and step identical across chains. + */ +static void gen_chainx8(unsigned char *outx8, const unsigned char *inx8, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addrx8[8 * 8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + /* Initialize outx8 with the value at position 'start'. */ + memcpy(outx8, inx8, 8 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_W; i++) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_hash_addr(addrx8 + j * 8, i); + } + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_thashx8_1(outx8 + 0 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + outx8 + 1 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + outx8 + 2 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + outx8 + 3 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + outx8 + 4 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + outx8 + 5 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + outx8 + 6 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + outx8 + 7 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + outx8 + 0 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + outx8 + 1 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + outx8 + 2 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + outx8 + 3 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + outx8 + 4 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + outx8 + 5 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + outx8 + 6 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + outx8 + 7 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + pub_seed, addrx8, state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { + int in = 0; + int out = 0; + unsigned char total = 0; + int bits = 0; + int consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LOGW; + output[out] = (unsigned int)(total >> bits) & (PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_W - 1); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_wots_gen_pk(unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + uint32_t addrx8[8 * 8]; + unsigned char pkbuf[8 * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N]; + + for (j = 0; j < 8; j++) { + memcpy(addrx8 + j * 8, addr, sizeof(uint32_t) * 8); + } + + /* The last iteration typically does not have complete set of 4 chains, + but because we use pkbuf, this is not an issue -- we still do as many + in parallel as possible. */ + for (i = 0; i < ((PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LEN + 7) & ~0x7); i += 8) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_chain_addr(addrx8 + j * 8, i + j); + } + wots_gen_skx8(pkbuf, sk_seed, addrx8); + gen_chainx8(pkbuf, pkbuf, 0, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_W - 1, pub_seed, addrx8, state_seeded); + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LEN) { + memcpy(pk + (i + j)*PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, pkbuf + j * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N); + } + } + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_wots_sign(unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, sk_seed, addr, state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, sig + i * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, 0, lengths[i], pub_seed, addr, state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_wots_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, sig + i * PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_N, + lengths[i], PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_W - 1 - lengths[i], pub_seed, addr, state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/wots.h b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/wots.h new file mode 100644 index 00000000..404bb9a1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/avx2/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_H +#define PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_AVX2_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/LICENSE b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..b21f15f2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-sha256-256s-robust_clean.lib +OBJECTS=address.obj wots.obj utils.obj fors.obj sign.obj hash_sha256.obj thash_sha256_robust.obj sha256.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/address.c b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/address.c new file mode 100644 index 00000000..fe922293 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/address.h b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/address.h new file mode 100644 index 00000000..a65b384a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ADDRESS_H +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/api.h b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/api.h new file mode 100644 index 00000000..8d9f42ad --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_API_H +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_CRYPTO_SECRETKEYBYTES 128 +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_CRYPTO_PUBLICKEYBYTES 64 +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_CRYPTO_BYTES 29792 +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_CRYPTO_SEEDBYTES 96 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/fors.c b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/fors.c new file mode 100644 index 00000000..b6c5f1cd --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/fors.c @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "thash.h" +#include "utils.h" + +static void fors_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t fors_leaf_addr[8], const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_prf_addr( + sk, sk_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_thash_1( + leaf, sk, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t fors_tree_addr[8], + const hash_state *hash_state_seeded) { + uint32_t fors_leaf_addr[8] = {0}; + + /* Only copy the parts that must be kept in fors_leaf_addr. */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_copy_keypair_addr( + fors_leaf_addr, fors_tree_addr); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_type( + fors_leaf_addr, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_tree_index( + fors_leaf_addr, addr_idx); + + fors_gen_sk(leaf, sk_seed, fors_leaf_addr, hash_state_seeded); + fors_sk_to_leaf(leaf, leaf, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_copy_keypair_addr( + fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_copy_keypair_addr( + fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_type( + fors_tree_addr, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_type( + fors_pk_addr, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_tree_height( + fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_tree_index( + fors_tree_addr, indices[i] + idx_offset); + + /* Include the secret key part that produces the selected leaf node. */ + fors_gen_sk(sig, sk_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N; + + /* Compute the authentication path for this leaf node. */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_treehash_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, sig, sk_seed, pub_seed, + indices[i], idx_offset, fors_gen_leaf, fors_tree_addr, + hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_thash_FORS_TREES( + pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_compute_root(roots + i * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, leaf, indices[i], idx_offset, sig, + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_HEIGHT, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/fors.h b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/fors.h new file mode 100644 index 00000000..1e0417d0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_H +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/hash.h b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/hash.h new file mode 100644 index 00000000..20f35f91 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_HASH_H +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/hash_sha256.c b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/hash_sha256.c new file mode 100644 index 00000000..a8264f8a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/hash_sha256.c @@ -0,0 +1,162 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "sha2.h" +#include "sha256.h" + +/* For SHA256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed) { + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_seed_state(hash_state_seeded, pub_seed); + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* Clean up hash state */ +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_destroy_hash_function(hash_state *hash_state_seeded) { + sha256_inc_ctx_release(hash_state_seeded); +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N bytes and an address + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_ADDR_BYTES]; + unsigned char outbuf[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_OUTPUT_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_compress_address(buf + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, addr); + + sha256(outbuf, buf, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_ADDR_BYTES); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed as a key + * for HMAC, and an optional randomization value prefixed to the message. + * This requires m to have at least PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N space + * available in front of the pointer, i.e. before the message to use for the + * prefix. This is necessary to prevent having to move the message around (and + * allocate memory for it). + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_OUTPUT_BYTES]; + sha256ctx state; + int i; + + /* This implements HMAC-SHA256 */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N; i++) { + buf[i] = 0x36 ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, 0x36, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N); + + sha256_inc_init(&state); + sha256_inc_blocks(&state, buf, 1); + + memcpy(buf, optrand, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N); + + /* If optrand + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N + mlen < PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_BLOCK_BYTES) { + memcpy(buf + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, m, mlen); + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_BLOCK_BYTES, &state, + buf, mlen + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(buf + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, m, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N); + sha256_inc_blocks(&state, buf, 1); + + m += PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N; + mlen -= PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N; + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_BLOCK_BYTES, &state, m, mlen); + } + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N; i++) { + buf[i] = 0x5c ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, 0x5c, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N); + + sha256(buf, buf, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_OUTPUT_BYTES); + memcpy(R, buf, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_TREE_BITS (PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_TREE_HEIGHT * (PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_D - 1)) +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_TREE_BYTES ((PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_LEAF_BITS PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_LEAF_BYTES ((PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_DGST_BYTES (PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_TREE_BYTES + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_LEAF_BYTES) + + unsigned char seed[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_OUTPUT_BYTES + 4]; + + /* Round to nearest multiple of PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_BLOCK_BYTES */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_INBLOCKS (((PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_PK_BYTES + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_BLOCK_BYTES - 1) & \ + -PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_BLOCK_BYTES) / PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_BLOCK_BYTES) + unsigned char inbuf[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_BLOCK_BYTES]; + + unsigned char buf[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_DGST_BYTES]; + unsigned char *bufp = buf; + sha256ctx state; + + sha256_inc_init(&state); + + memcpy(inbuf, R, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N); + memcpy(inbuf + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, pk, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_PK_BYTES); + + /* If R + pk + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_PK_BYTES + mlen < PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_BLOCK_BYTES) { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_PK_BYTES, m, mlen); + sha256_inc_finalize(seed, &state, inbuf, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_PK_BYTES + mlen); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_PK_BYTES, m, + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N - PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_PK_BYTES); + sha256_inc_blocks(&state, inbuf, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_INBLOCKS); + + m += PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N - PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_PK_BYTES; + mlen -= PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N - PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_PK_BYTES; + sha256_inc_finalize(seed, &state, m, mlen); + } + + /* By doing this in two steps, we prevent hashing the message twice; + otherwise each iteration in MGF1 would hash the message again. */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_mgf1(bufp, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_DGST_BYTES, seed, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_OUTPUT_BYTES); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_bytes_to_ull(bufp, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/hash_state.h b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/hash_state.h new file mode 100644 index 00000000..19fc335e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/hash_state.h @@ -0,0 +1,26 @@ +#ifndef SPX_HASH_STATE_H +#define SPX_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + */ + +#include "sha2.h" +#define hash_state sha256ctx + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/params.h b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/params.h new file mode 100644 index 00000000..2f6355fc --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_PARAMS_H +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N 32 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FULL_HEIGHT 64 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_D 8 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_HEIGHT 14 +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_TREES 22 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N / PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_LEN (PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_LEN1 + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_BYTES (PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_PK_BYTES PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_TREE_HEIGHT (PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FULL_HEIGHT / PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_BYTES ((PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_PK_BYTES PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_BYTES (PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_BYTES + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_D * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FULL_HEIGHT * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_PK_BYTES (2 * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SK_BYTES (2 * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/sha256.c b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/sha256.c new file mode 100644 index 00000000..df421b00 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/sha256.c @@ -0,0 +1,71 @@ +/* Based on the public domain implementation in + * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html + * by D. J. Bernstein */ + +#include +#include +#include + +#include "sha2.h" +#include "sha256.h" +#include "utils.h" + +/* + * Compresses an address to a 22-byte sequence. + * This reduces the number of required SHA256 compression calls, as the last + * block of input is padded with at least 65 bits. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_compress_address(unsigned char *out, const uint32_t addr[8]) { + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ull_to_bytes(out, 1, addr[0]); /* drop 3 bytes of the layer field */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ull_to_bytes(out + 1, 4, addr[2]); /* drop the highest tree address word */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ull_to_bytes(out + 5, 4, addr[3]); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ull_to_bytes(out + 9, 1, addr[4]); /* drop 3 bytes of the type field */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ull_to_bytes(out + 10, 4, addr[5]); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ull_to_bytes(out + 14, 4, addr[6]); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ull_to_bytes(out + 18, 4, addr[7]); +} + +/** + * Requires 'input_plus_four_bytes' to have 'inlen' + 4 bytes, so that the last + * four bytes can be used for the counter. Typically 'input' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen) { + unsigned char outbuf[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_OUTPUT_BYTES]; + unsigned long i; + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_OUTPUT_BYTES <= outlen; i++) { + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(out, input_plus_four_bytes, inlen + 4); + out += PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + if (outlen > i * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_OUTPUT_BYTES) { + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(outbuf, input_plus_four_bytes, inlen + 4); + memcpy(out, outbuf, outlen - i * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_OUTPUT_BYTES); + } +} + + +/** + * Absorb the constant pub_seed using one round of the compression function + * This initializes hash_state_seeded, which can then be reused in thash + **/ +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N; i < PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + sha256_inc_init(hash_state_seeded); + sha256_inc_blocks(hash_state_seeded, block, 1); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/sha256.h b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/sha256.h new file mode 100644 index 00000000..4af5b6b8 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/sha256.h @@ -0,0 +1,21 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_H +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_H + +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N */ +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_ADDR_BYTES 22 + +#include +#include + +#include "sha2.h" + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_compress_address(unsigned char *out, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/sign.c b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/sign.c new file mode 100644 index 00000000..3585144f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_D - 1); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, sk + 3 * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N); + + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N; + + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_BYTES; + + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N; + unsigned char mhash[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_BYTES; + + if (PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_BYTES, sm + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/thash.h b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/thash.h new file mode 100644 index 00000000..60ea0ded --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_THASH_H +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/thash_sha256_robust.c b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/thash_sha256_robust.c new file mode 100644 index 00000000..88c9e088 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/thash_sha256_robust.c @@ -0,0 +1,82 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "sha2.h" +#include "sha256.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N bytes. + */ +static void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char outbuf[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_OUTPUT_BYTES]; + unsigned char *bitmask = buf + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_ADDR_BYTES + 4; + sha256ctx sha2_state; + unsigned int i; + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_compress_address(buf + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, addr); + /* MGF1 requires us to have 4 extra bytes in 'buf' */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_mgf1(bitmask, inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, buf, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_ADDR_BYTES); + + /* Retrieve precomputed state containing pub_seed */ + sha256_inc_ctx_clone(&sha2_state, hash_state_seeded); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N; i++) { + buf[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_ADDR_BYTES + i] = in[i] ^ bitmask[i]; + } + + sha256_inc_finalize(outbuf, &sha2_state, buf + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_ADDR_BYTES + 4 + 1 * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_thash( + out, buf, in, 1, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_ADDR_BYTES + 4 + 2 * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_thash( + out, buf, in, 2, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_ADDR_BYTES + 4 + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_LEN, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_SHA256_ADDR_BYTES + 4 + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_TREES, pub_seed, addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/utils.c b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/utils.c new file mode 100644 index 00000000..95aeaa60 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, leaf, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N); + memcpy(buffer + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_thash_2( + buffer + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N); + } else { + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/utils.h b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/utils.h new file mode 100644 index 00000000..6ef41bd0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_UTILS_H +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/wots.c b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/wots.c new file mode 100644 index 00000000..95ac0049 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/wots.c @@ -0,0 +1,167 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], + const hash_state *hash_state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_prf_addr(sk, sk_seed, wots_addr, hash_state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_W; i++) { + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_thash_1( + out, out, pub_seed, addr, hash_state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const size_t out_len, + const unsigned char *input) { + size_t in = 0; + size_t out = 0; + unsigned char total = 0; + unsigned int bits = 0; + size_t consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_LOGW; + output[out] = (unsigned int)((total >> bits) & (PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_W - 1)); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, + const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_ull_to_bytes( + csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(pk + i * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, pk + i * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, + 0, PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_W - 1, pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, 0, lengths[i], pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_N, + lengths[i], PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_W - 1 - lengths[i], pub_seed, addr, + hash_state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/wots.h b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/wots.h new file mode 100644 index 00000000..36adc075 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-robust/clean/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_H +#define PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256SROBUST_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/META.yml b/crypto_sign/sphincs/sphincs-sha256-256s-simple/META.yml new file mode 100644 index 00000000..091310f0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/META.yml @@ -0,0 +1,35 @@ +name: SPHINCS+ +type: signature +claimed-nist-level: 5 +length-public-key: 64 +length-secret-key: 128 +length-signature: 29792 +testvectors-sha256: 796b5101fa5170c92f0186b347716dc0662eac35002a8c4d80ac9283cbef5a02 +nistkat-sha256: 768d61c537b3abacca3ab468623edafb33d28a33dc5a9859f803679a3020b639 +principal-submitters: + - Andreas Hülsing +auxiliary-submitters: + - Jean-Philippe Aumasson + - Daniel J. Bernstein, + - Christoph Dobraunig + - Maria Eichlseder + - Scott Fluhrer + - Stefan-Lukas Gazdag + - Panos Kampanakis + - Stefan Kölbl + - Tanja Lange + - Martin M. Lauridsen + - Florian Mendel + - Ruben Niederhagen + - Christian Rechberger + - Joost Rijneveld + - Peter Schwabe +implementations: + - name: clean + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + - name: avx2 + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + supported_platforms: + - architecture: x86_64 + required_flags: + - avx2 diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/LICENSE b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/Makefile.Microsoft_nmake new file mode 100644 index 00000000..b276945c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-sha256-256s-simple_avx2.lib +OBJECTS=address.obj wots.obj utils.obj utilsx8.obj sha256avx.obj sha256x8.obj fors.obj sign.obj hash_sha256.obj thash_sha256_simple.obj hash_sha256x8.obj thash_sha256_simplex8.obj sha256.obj + +CFLAGS=/nologo /arch:AVX2 /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/address.c b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/address.c new file mode 100644 index 00000000..76b83ad3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/address.h b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/address.h new file mode 100644 index 00000000..7de40b64 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDRESS_H +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/api.h b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/api.h new file mode 100644 index 00000000..d5b5e56e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_API_H +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES 128 +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES 64 +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_CRYPTO_BYTES 29792 +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_CRYPTO_SEEDBYTES 96 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/fors.c b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/fors.c new file mode 100644 index 00000000..ff492c58 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/fors.c @@ -0,0 +1,240 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hashx8.h" +#include "thash.h" +#include "thashx8.h" +#include "utils.h" +#include "utilsx8.h" + +static void fors_gen_skx8(unsigned char *sk0, + unsigned char *sk1, + unsigned char *sk2, + unsigned char *sk3, + unsigned char *sk4, + unsigned char *sk5, + unsigned char *sk6, + unsigned char *sk7, const unsigned char *sk_seed, + uint32_t fors_leaf_addrx8[8 * 8]) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_prf_addrx8(sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7, + sk_seed, fors_leaf_addrx8); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thash_1(leaf, sk, pub_seed, fors_leaf_addr, state_seeded); +} + +static void fors_sk_to_leafx8(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + unsigned char *leaf4, + unsigned char *leaf5, + unsigned char *leaf6, + unsigned char *leaf7, + const unsigned char *sk0, + const unsigned char *sk1, + const unsigned char *sk2, + const unsigned char *sk3, + const unsigned char *sk4, + const unsigned char *sk5, + const unsigned char *sk6, + const unsigned char *sk7, + const unsigned char *pub_seed, + uint32_t fors_leaf_addrx8[8 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thashx8_1(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + sk0, sk1, sk2, sk3, sk4, sk5, sk6, sk7, + pub_seed, fors_leaf_addrx8, state_seeded); +} + +static void fors_gen_leafx8(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + unsigned char *leaf4, + unsigned char *leaf5, + unsigned char *leaf6, + unsigned char *leaf7, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx0, + uint32_t addr_idx1, + uint32_t addr_idx2, + uint32_t addr_idx3, + uint32_t addr_idx4, + uint32_t addr_idx5, + uint32_t addr_idx6, + uint32_t addr_idx7, + const uint32_t fors_tree_addr[8], + const hash_state *state_seeded) { + uint32_t fors_leaf_addrx8[8 * 8] = {0}; + unsigned int j; + + /* Only copy the parts that must be kept in fors_leaf_addrx8. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_copy_keypair_addr(fors_leaf_addrx8 + j * 8, fors_tree_addr); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_type(fors_leaf_addrx8 + j * 8, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 0 * 8, addr_idx0); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 1 * 8, addr_idx1); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 2 * 8, addr_idx2); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 3 * 8, addr_idx3); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 4 * 8, addr_idx4); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 5 * 8, addr_idx5); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 6 * 8, addr_idx6); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx8 + 7 * 8, addr_idx7); + + fors_gen_skx8(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + sk_seed, fors_leaf_addrx8); + fors_sk_to_leafx8(leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + leaf0, leaf1, leaf2, leaf3, leaf4, leaf5, leaf6, leaf7, + pub_seed, fors_leaf_addrx8, state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + /* Round up to multiple of 8 to prevent out-of-bounds for x8 parallelism */ + uint32_t indices[(PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_TREES + 7) & ~7] = {0}; + unsigned char roots[((PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_TREES + 7) & ~7) * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N]; + /* Sign to a buffer, since we may not have a nice multiple of 8 and would + otherwise overrun the signature. */ + unsigned char sigbufx8[8 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N * (1 + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_HEIGHT)]; + uint32_t fors_tree_addrx8[8 * 8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset[8] = {0}; + unsigned int i, j; + + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_copy_keypair_addr(fors_tree_addrx8 + j * 8, fors_addr); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_type(fors_tree_addrx8 + j * 8, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < ((PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_TREES + 7) & ~0x7); i += 8) { + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_TREES) { + idx_offset[j] = (i + j) * (1 << PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_height(fors_tree_addrx8 + j * 8, 0); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_index(fors_tree_addrx8 + j * 8, + indices[i + j] + idx_offset[j]); + } + } + + /* Include the secret key part that produces the selected leaf nodes. */ + fors_gen_skx8(sigbufx8 + 0 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + sigbufx8 + 1 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + sigbufx8 + 2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + sigbufx8 + 3 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + sigbufx8 + 4 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + sigbufx8 + 5 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + sigbufx8 + 6 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + sigbufx8 + 7 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + sk_seed, fors_tree_addrx8); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_treehashx8_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, sigbufx8 + 8 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, sk_seed, pub_seed, + &indices[i], idx_offset, fors_gen_leafx8, fors_tree_addrx8, + state_seeded); + + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_TREES) { + memcpy(sig, sigbufx8 + j * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + memcpy(sig + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + sigbufx8 + 8 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + j * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_HEIGHT, + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_HEIGHT); + sig += PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N * (1 + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_HEIGHT); + } + } + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_fors_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_compute_root(roots + i * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, leaf, indices[i], idx_offset, + sig, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_HEIGHT, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/fors.h b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/fors.h new file mode 100644 index 00000000..5565fcb1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_H +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/hash.h b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/hash.h new file mode 100644 index 00000000..a44261f8 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_HASH_H +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/hash_sha256.c b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/hash_sha256.c new file mode 100644 index 00000000..4a48a99e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/hash_sha256.c @@ -0,0 +1,166 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "sha2.h" +#include "sha256.h" +#include "sha256x8.h" + +/** + * Initializes the hash function states + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_seed_state(&hash_state_seeded->x1, pub_seed); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_seed_statex8(&hash_state_seeded->x8, pub_seed); + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/** + * Cleans up the hash function states + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_destroy_hash_function(hash_state *hash_state_seeded) { + sha256_inc_ctx_release(&hash_state_seeded->x1); +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N bytes and an address + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_prf_addr(unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES]; + unsigned char outbuf[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_compress_address(buf + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, addr); + + sha256(outbuf, buf, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed as a key + * for HMAC, and an optional randomization value prefixed to the message. + * This requires m to have at least PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N space + * available in front of the pointer, i.e. before the message to use for the + * prefix. This is necessary to prevent having to move the message around (and + * allocate memory for it). + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + sha256ctx state; + int i; + + /* This implements HMAC-SHA256 */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N; i++) { + buf[i] = 0x36 ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, 0x36, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + + sha256_inc_init(&state); + sha256_inc_blocks(&state, buf, 1); + + memcpy(buf, optrand, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + + /* If optrand + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + mlen < PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES) { + memcpy(buf + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, m, mlen); + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES, &state, + buf, mlen + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(buf + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, m, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + sha256_inc_blocks(&state, buf, 1); + + m += PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N; + mlen -= PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N; + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES, &state, m, mlen); + } + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N; i++) { + buf[i] = 0x5c ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, 0x5c, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + + sha256(buf, buf, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + memcpy(R, buf, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_TREE_BITS (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_TREE_HEIGHT * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_D - 1)) +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_TREE_BYTES ((PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_LEAF_BITS PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_LEAF_BYTES ((PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_DGST_BYTES (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_TREE_BYTES + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_LEAF_BYTES) + + unsigned char seed[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES + 4]; + + /* Round to nearest multiple of PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_INBLOCKS (((PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_PK_BYTES + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES - 1) & \ + -PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES) / PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES) + unsigned char inbuf[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES]; + + unsigned char buf[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_DGST_BYTES]; + unsigned char *bufp = buf; + sha256ctx state; + + sha256_inc_init(&state); + + memcpy(inbuf, R, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + memcpy(inbuf + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, pk, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_PK_BYTES); + + /* If R + pk + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_PK_BYTES + mlen < PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES) { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_PK_BYTES, m, mlen); + sha256_inc_finalize(seed, &state, inbuf, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_PK_BYTES + mlen); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_PK_BYTES, m, + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N - PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_PK_BYTES); + sha256_inc_blocks(&state, inbuf, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_INBLOCKS); + + m += PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N - PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_PK_BYTES; + mlen -= PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_INBLOCKS * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N - PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_PK_BYTES; + sha256_inc_finalize(seed, &state, m, mlen); + } + + /* By doing this in two steps, we prevent hashing the message twice; + otherwise each iteration in MGF1 would hash the message again. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_mgf1(bufp, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_DGST_BYTES, seed, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_bytes_to_ull(bufp, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/hash_sha256x8.c b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/hash_sha256x8.c new file mode 100644 index 00000000..a091d6bb --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/hash_sha256x8.c @@ -0,0 +1,61 @@ +#include +#include + +#include "address.h" +#include "hashx8.h" +#include "params.h" +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "utils.h" + +/* + * 8-way parallel version of prf_addr; takes 8x as much input and output + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_prf_addrx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *key, + const uint32_t addrx8[8 * 8]) { + unsigned char bufx8[8 * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES)]; + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + unsigned int j; + + for (j = 0; j < 8; j++) { + memcpy(bufx8 + j * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES), key, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_compress_address(bufx8 + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + j * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES), + addrx8 + j * 8); + } + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256x8(outbufx8 + 0 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + bufx8 + 0 * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 1 * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 2 * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 3 * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 4 * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 5 * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 6 * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES), + bufx8 + 7 * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES), + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES); + + memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/hash_state.h b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/hash_state.h new file mode 100644 index 00000000..30777c2d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/hash_state.h @@ -0,0 +1,33 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_HASH_STATE_H +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * We use a struct to differentiate between the x1 and x8 variants of SHA256. + */ + +#include "sha2.h" +#include "sha256avx.h" + +typedef struct { + sha256ctx x1; + sha256ctxx8 x8; +} hash_state; + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/hashx8.h b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/hashx8.h new file mode 100644 index 00000000..a5ca75b0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/hashx8.h @@ -0,0 +1,19 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_HASHX8_H +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_HASHX8_H + +#include + +#include "params.h" + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_prf_addrx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *key, + const uint32_t addrx8[8 * 8]); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/params.h b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/params.h new file mode 100644 index 00000000..78a58506 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_PARAMS_H +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N 32 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FULL_HEIGHT 64 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_D 8 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_HEIGHT 14 +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_TREES 22 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N / PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LEN (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LEN1 + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_BYTES (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_PK_BYTES PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_TREE_HEIGHT (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FULL_HEIGHT / PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_BYTES ((PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_PK_BYTES PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_BYTES (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_BYTES + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_D * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FULL_HEIGHT * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_PK_BYTES (2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SK_BYTES (2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/sha256.c b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/sha256.c new file mode 100644 index 00000000..34a43491 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/sha256.c @@ -0,0 +1,71 @@ +/* Based on the public domain implementation in + * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html + * by D. J. Bernstein */ + +#include +#include +#include + +#include "sha2.h" +#include "sha256.h" +#include "utils.h" + +/* + * Compresses an address to a 22-byte sequence. + * This reduces the number of required SHA256 compression calls, as the last + * block of input is padded with at least 65 bits. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_compress_address(unsigned char *out, const uint32_t addr[8]) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ull_to_bytes(out, 1, addr[0]); /* drop 3 bytes of the layer field */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ull_to_bytes(out + 1, 4, addr[2]); /* drop the highest tree address word */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ull_to_bytes(out + 5, 4, addr[3]); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ull_to_bytes(out + 9, 1, addr[4]); /* drop 3 bytes of the type field */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ull_to_bytes(out + 10, 4, addr[5]); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ull_to_bytes(out + 14, 4, addr[6]); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ull_to_bytes(out + 18, 4, addr[7]); +} + +/** + * Requires 'input_plus_four_bytes' to have 'inlen' + 4 bytes, so that the last + * four bytes can be used for the counter. Typically 'input' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen) { + unsigned char outbuf[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + unsigned long i; + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES <= outlen; i++) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(out, input_plus_four_bytes, inlen + 4); + out += PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + if (outlen > i * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(outbuf, input_plus_four_bytes, inlen + 4); + memcpy(out, outbuf, outlen - i * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + } +} + + +/** + * Absorb the constant pub_seed using one round of the compression function + * This initializes hash_state_seeded, which can then be reused in thash + **/ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N; i < PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + sha256_inc_init(hash_state_seeded); + sha256_inc_blocks(hash_state_seeded, block, 1); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/sha256.h b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/sha256.h new file mode 100644 index 00000000..88b06b9c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/sha256.h @@ -0,0 +1,21 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_H +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_H + +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES 22 + +#include +#include + +#include "sha2.h" + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_compress_address(unsigned char *out, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/sha256avx.c b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/sha256avx.c new file mode 100644 index 00000000..a1bd5d95 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/sha256avx.c @@ -0,0 +1,296 @@ +#include +#include +#include + +#include "sha256avx.h" + +// Transpose 8 vectors containing 32-bit values +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_transpose(u256 s[8]) { + u256 tmp0[8]; + u256 tmp1[8]; + tmp0[0] = _mm256_unpacklo_epi32(s[0], s[1]); + tmp0[1] = _mm256_unpackhi_epi32(s[0], s[1]); + tmp0[2] = _mm256_unpacklo_epi32(s[2], s[3]); + tmp0[3] = _mm256_unpackhi_epi32(s[2], s[3]); + tmp0[4] = _mm256_unpacklo_epi32(s[4], s[5]); + tmp0[5] = _mm256_unpackhi_epi32(s[4], s[5]); + tmp0[6] = _mm256_unpacklo_epi32(s[6], s[7]); + tmp0[7] = _mm256_unpackhi_epi32(s[6], s[7]); + tmp1[0] = _mm256_unpacklo_epi64(tmp0[0], tmp0[2]); + tmp1[1] = _mm256_unpackhi_epi64(tmp0[0], tmp0[2]); + tmp1[2] = _mm256_unpacklo_epi64(tmp0[1], tmp0[3]); + tmp1[3] = _mm256_unpackhi_epi64(tmp0[1], tmp0[3]); + tmp1[4] = _mm256_unpacklo_epi64(tmp0[4], tmp0[6]); + tmp1[5] = _mm256_unpackhi_epi64(tmp0[4], tmp0[6]); + tmp1[6] = _mm256_unpacklo_epi64(tmp0[5], tmp0[7]); + tmp1[7] = _mm256_unpackhi_epi64(tmp0[5], tmp0[7]); + s[0] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x20); + s[1] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x20); + s[2] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x20); + s[3] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x20); + s[4] = _mm256_permute2x128_si256(tmp1[0], tmp1[4], 0x31); + s[5] = _mm256_permute2x128_si256(tmp1[1], tmp1[5], 0x31); + s[6] = _mm256_permute2x128_si256(tmp1[2], tmp1[6], 0x31); + s[7] = _mm256_permute2x128_si256(tmp1[3], tmp1[7], 0x31); +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256_clone_statex8(sha256ctxx8 *outctx, const sha256ctxx8 *inctx) { + memcpy(outctx, inctx, sizeof(sha256ctxx8)); +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256_init8x(sha256ctxx8 *ctx) { + ctx->s[0] = _mm256_set_epi32((int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667, (int)0x6a09e667); + ctx->s[1] = _mm256_set_epi32((int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85, (int)0xbb67ae85); + ctx->s[2] = _mm256_set_epi32((int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372, (int)0x3c6ef372); + ctx->s[3] = _mm256_set_epi32((int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a, (int)0xa54ff53a); + ctx->s[4] = _mm256_set_epi32((int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f, (int)0x510e527f); + ctx->s[5] = _mm256_set_epi32((int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c, (int)0x9b05688c); + ctx->s[6] = _mm256_set_epi32((int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab, (int)0x1f83d9ab); + ctx->s[7] = _mm256_set_epi32((int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19, (int)0x5be0cd19); + + ctx->datalen = 0; + ctx->msglen = 0; +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256_update8x(sha256ctxx8 *ctx, + const unsigned char *d0, + const unsigned char *d1, + const unsigned char *d2, + const unsigned char *d3, + const unsigned char *d4, + const unsigned char *d5, + const unsigned char *d6, + const unsigned char *d7, + unsigned long long len) { + size_t i = 0; + size_t bytes_to_copy; + + while (i < len) { + bytes_to_copy = (size_t)len - i; + if (bytes_to_copy > 64) { + bytes_to_copy = 64; + } + memcpy(&ctx->msgblocks[64 * 0], d0 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 1], d1 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 2], d2 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 3], d3 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 4], d4 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 5], d5 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 6], d6 + i, bytes_to_copy); + memcpy(&ctx->msgblocks[64 * 7], d7 + i, bytes_to_copy); + ctx->datalen += (unsigned int)bytes_to_copy; + i += bytes_to_copy; + if (ctx->datalen == 64) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + ctx->msglen += 512; + ctx->datalen = 0; + } + } +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256_final8x(sha256ctxx8 *ctx, + unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7) { + unsigned int i, curlen; + + // Padding + if (ctx->datalen < 56) { + for (i = 0; i < 8; ++i) { + curlen = ctx->datalen; + ctx->msgblocks[64 * i + curlen++] = 0x80; + while (curlen < 64) { + ctx->msgblocks[64 * i + curlen++] = 0x00; + } + } + } else { + for (i = 0; i < 8; ++i) { + curlen = ctx->datalen; + ctx->msgblocks[64 * i + curlen++] = 0x80; + while (curlen < 64) { + ctx->msgblocks[64 * i + curlen++] = 0x00; + } + } + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + memset(ctx->msgblocks, 0, 8 * 64); + } + + // Add length of the message to each block + ctx->msglen += ctx->datalen * 8; + for (i = 0; i < 8; i++) { + ctx->msgblocks[64 * i + 63] = (unsigned char)ctx->msglen; + ctx->msgblocks[64 * i + 62] = (unsigned char)(ctx->msglen >> 8); + ctx->msgblocks[64 * i + 61] = (unsigned char)(ctx->msglen >> 16); + ctx->msgblocks[64 * i + 60] = (unsigned char)(ctx->msglen >> 24); + ctx->msgblocks[64 * i + 59] = (unsigned char)(ctx->msglen >> 32); + ctx->msgblocks[64 * i + 58] = (unsigned char)(ctx->msglen >> 40); + ctx->msgblocks[64 * i + 57] = (unsigned char)(ctx->msglen >> 48); + ctx->msgblocks[64 * i + 56] = (unsigned char)(ctx->msglen >> 56); + } + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256_transform8x(ctx, ctx->msgblocks); + + // Compute final hash output + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_transpose(ctx->s); + + // Store Hash value + STORE(out0, BYTESWAP(ctx->s[0])); + STORE(out1, BYTESWAP(ctx->s[1])); + STORE(out2, BYTESWAP(ctx->s[2])); + STORE(out3, BYTESWAP(ctx->s[3])); + STORE(out4, BYTESWAP(ctx->s[4])); + STORE(out5, BYTESWAP(ctx->s[5])); + STORE(out6, BYTESWAP(ctx->s[6])); + STORE(out7, BYTESWAP(ctx->s[7])); +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256_transform8x(sha256ctxx8 *ctx, const unsigned char *data) { + u256 s[8], w[64], T0, T1; + int i; + + // Load words and transform data correctly + for (i = 0; i < 8; i++) { + w[i] = BYTESWAP(LOAD(data + 64 * i)); + w[i + 8] = BYTESWAP(LOAD(data + 32 + 64 * i)); + } + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_transpose(w); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_transpose(w + 8); + + // Initial State + s[0] = ctx->s[0]; + s[1] = ctx->s[1]; + s[2] = ctx->s[2]; + s[3] = ctx->s[3]; + s[4] = ctx->s[4]; + s[5] = ctx->s[5]; + s[6] = ctx->s[6]; + s[7] = ctx->s[7]; + + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 0, w[0]); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 1, w[1]); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 2, w[2]); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 3, w[3]); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 4, w[4]); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 5, w[5]); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 6, w[6]); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 7, w[7]); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 8, w[8]); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 9, w[9]); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 10, w[10]); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 11, w[11]); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 12, w[12]); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 13, w[13]); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 14, w[14]); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 15, w[15]); + w[16] = ADD4_32(WSIGMA1_AVX(w[14]), w[0], w[9], WSIGMA0_AVX(w[1])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 16, w[16]); + w[17] = ADD4_32(WSIGMA1_AVX(w[15]), w[1], w[10], WSIGMA0_AVX(w[2])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 17, w[17]); + w[18] = ADD4_32(WSIGMA1_AVX(w[16]), w[2], w[11], WSIGMA0_AVX(w[3])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 18, w[18]); + w[19] = ADD4_32(WSIGMA1_AVX(w[17]), w[3], w[12], WSIGMA0_AVX(w[4])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 19, w[19]); + w[20] = ADD4_32(WSIGMA1_AVX(w[18]), w[4], w[13], WSIGMA0_AVX(w[5])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 20, w[20]); + w[21] = ADD4_32(WSIGMA1_AVX(w[19]), w[5], w[14], WSIGMA0_AVX(w[6])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 21, w[21]); + w[22] = ADD4_32(WSIGMA1_AVX(w[20]), w[6], w[15], WSIGMA0_AVX(w[7])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 22, w[22]); + w[23] = ADD4_32(WSIGMA1_AVX(w[21]), w[7], w[16], WSIGMA0_AVX(w[8])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 23, w[23]); + w[24] = ADD4_32(WSIGMA1_AVX(w[22]), w[8], w[17], WSIGMA0_AVX(w[9])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 24, w[24]); + w[25] = ADD4_32(WSIGMA1_AVX(w[23]), w[9], w[18], WSIGMA0_AVX(w[10])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 25, w[25]); + w[26] = ADD4_32(WSIGMA1_AVX(w[24]), w[10], w[19], WSIGMA0_AVX(w[11])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 26, w[26]); + w[27] = ADD4_32(WSIGMA1_AVX(w[25]), w[11], w[20], WSIGMA0_AVX(w[12])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 27, w[27]); + w[28] = ADD4_32(WSIGMA1_AVX(w[26]), w[12], w[21], WSIGMA0_AVX(w[13])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 28, w[28]); + w[29] = ADD4_32(WSIGMA1_AVX(w[27]), w[13], w[22], WSIGMA0_AVX(w[14])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 29, w[29]); + w[30] = ADD4_32(WSIGMA1_AVX(w[28]), w[14], w[23], WSIGMA0_AVX(w[15])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 30, w[30]); + w[31] = ADD4_32(WSIGMA1_AVX(w[29]), w[15], w[24], WSIGMA0_AVX(w[16])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 31, w[31]); + w[32] = ADD4_32(WSIGMA1_AVX(w[30]), w[16], w[25], WSIGMA0_AVX(w[17])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 32, w[32]); + w[33] = ADD4_32(WSIGMA1_AVX(w[31]), w[17], w[26], WSIGMA0_AVX(w[18])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 33, w[33]); + w[34] = ADD4_32(WSIGMA1_AVX(w[32]), w[18], w[27], WSIGMA0_AVX(w[19])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 34, w[34]); + w[35] = ADD4_32(WSIGMA1_AVX(w[33]), w[19], w[28], WSIGMA0_AVX(w[20])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 35, w[35]); + w[36] = ADD4_32(WSIGMA1_AVX(w[34]), w[20], w[29], WSIGMA0_AVX(w[21])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 36, w[36]); + w[37] = ADD4_32(WSIGMA1_AVX(w[35]), w[21], w[30], WSIGMA0_AVX(w[22])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 37, w[37]); + w[38] = ADD4_32(WSIGMA1_AVX(w[36]), w[22], w[31], WSIGMA0_AVX(w[23])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 38, w[38]); + w[39] = ADD4_32(WSIGMA1_AVX(w[37]), w[23], w[32], WSIGMA0_AVX(w[24])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 39, w[39]); + w[40] = ADD4_32(WSIGMA1_AVX(w[38]), w[24], w[33], WSIGMA0_AVX(w[25])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 40, w[40]); + w[41] = ADD4_32(WSIGMA1_AVX(w[39]), w[25], w[34], WSIGMA0_AVX(w[26])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 41, w[41]); + w[42] = ADD4_32(WSIGMA1_AVX(w[40]), w[26], w[35], WSIGMA0_AVX(w[27])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 42, w[42]); + w[43] = ADD4_32(WSIGMA1_AVX(w[41]), w[27], w[36], WSIGMA0_AVX(w[28])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 43, w[43]); + w[44] = ADD4_32(WSIGMA1_AVX(w[42]), w[28], w[37], WSIGMA0_AVX(w[29])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 44, w[44]); + w[45] = ADD4_32(WSIGMA1_AVX(w[43]), w[29], w[38], WSIGMA0_AVX(w[30])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 45, w[45]); + w[46] = ADD4_32(WSIGMA1_AVX(w[44]), w[30], w[39], WSIGMA0_AVX(w[31])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 46, w[46]); + w[47] = ADD4_32(WSIGMA1_AVX(w[45]), w[31], w[40], WSIGMA0_AVX(w[32])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 47, w[47]); + w[48] = ADD4_32(WSIGMA1_AVX(w[46]), w[32], w[41], WSIGMA0_AVX(w[33])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 48, w[48]); + w[49] = ADD4_32(WSIGMA1_AVX(w[47]), w[33], w[42], WSIGMA0_AVX(w[34])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 49, w[49]); + w[50] = ADD4_32(WSIGMA1_AVX(w[48]), w[34], w[43], WSIGMA0_AVX(w[35])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 50, w[50]); + w[51] = ADD4_32(WSIGMA1_AVX(w[49]), w[35], w[44], WSIGMA0_AVX(w[36])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 51, w[51]); + w[52] = ADD4_32(WSIGMA1_AVX(w[50]), w[36], w[45], WSIGMA0_AVX(w[37])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 52, w[52]); + w[53] = ADD4_32(WSIGMA1_AVX(w[51]), w[37], w[46], WSIGMA0_AVX(w[38])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 53, w[53]); + w[54] = ADD4_32(WSIGMA1_AVX(w[52]), w[38], w[47], WSIGMA0_AVX(w[39])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 54, w[54]); + w[55] = ADD4_32(WSIGMA1_AVX(w[53]), w[39], w[48], WSIGMA0_AVX(w[40])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 55, w[55]); + w[56] = ADD4_32(WSIGMA1_AVX(w[54]), w[40], w[49], WSIGMA0_AVX(w[41])); + SHA256ROUND_AVX(s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], 56, w[56]); + w[57] = ADD4_32(WSIGMA1_AVX(w[55]), w[41], w[50], WSIGMA0_AVX(w[42])); + SHA256ROUND_AVX(s[7], s[0], s[1], s[2], s[3], s[4], s[5], s[6], 57, w[57]); + w[58] = ADD4_32(WSIGMA1_AVX(w[56]), w[42], w[51], WSIGMA0_AVX(w[43])); + SHA256ROUND_AVX(s[6], s[7], s[0], s[1], s[2], s[3], s[4], s[5], 58, w[58]); + w[59] = ADD4_32(WSIGMA1_AVX(w[57]), w[43], w[52], WSIGMA0_AVX(w[44])); + SHA256ROUND_AVX(s[5], s[6], s[7], s[0], s[1], s[2], s[3], s[4], 59, w[59]); + w[60] = ADD4_32(WSIGMA1_AVX(w[58]), w[44], w[53], WSIGMA0_AVX(w[45])); + SHA256ROUND_AVX(s[4], s[5], s[6], s[7], s[0], s[1], s[2], s[3], 60, w[60]); + w[61] = ADD4_32(WSIGMA1_AVX(w[59]), w[45], w[54], WSIGMA0_AVX(w[46])); + SHA256ROUND_AVX(s[3], s[4], s[5], s[6], s[7], s[0], s[1], s[2], 61, w[61]); + w[62] = ADD4_32(WSIGMA1_AVX(w[60]), w[46], w[55], WSIGMA0_AVX(w[47])); + SHA256ROUND_AVX(s[2], s[3], s[4], s[5], s[6], s[7], s[0], s[1], 62, w[62]); + w[63] = ADD4_32(WSIGMA1_AVX(w[61]), w[47], w[56], WSIGMA0_AVX(w[48])); + SHA256ROUND_AVX(s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[0], 63, w[63]); + + // Feed Forward + ctx->s[0] = ADD32(s[0], ctx->s[0]); + ctx->s[1] = ADD32(s[1], ctx->s[1]); + ctx->s[2] = ADD32(s[2], ctx->s[2]); + ctx->s[3] = ADD32(s[3], ctx->s[3]); + ctx->s[4] = ADD32(s[4], ctx->s[4]); + ctx->s[5] = ADD32(s[5], ctx->s[5]); + ctx->s[6] = ADD32(s[6], ctx->s[6]); + ctx->s[7] = ADD32(s[7], ctx->s[7]); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/sha256avx.h b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/sha256avx.h new file mode 100644 index 00000000..641c1e74 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/sha256avx.h @@ -0,0 +1,103 @@ +#ifndef SHA256AVX_H +#define SHA256AVX_H + +#include +#include + +static const unsigned int RC[] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +#define u32 uint32_t +#define u256 __m256i + +#define XOR _mm256_xor_si256 +#define OR _mm256_or_si256 +#define AND _mm256_and_si256 +#define ADD32 _mm256_add_epi32 +#define NOT(x) _mm256_xor_si256(x, _mm256_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1)) + +#define LOAD(src) _mm256_loadu_si256((__m256i *)(src)) +#define STORE(dest,src) _mm256_storeu_si256((__m256i *)(dest),src) + +#define BYTESWAP(x) _mm256_shuffle_epi8(x, _mm256_set_epi8(0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3,0xc,0xd,0xe,0xf,0x8,0x9,0xa,0xb,0x4,0x5,0x6,0x7,0x0,0x1,0x2,0x3)) + +#define SHIFTR32(x, y) _mm256_srli_epi32(x, y) +#define SHIFTL32(x, y) _mm256_slli_epi32(x, y) + +#define ROTR32(x, y) OR(SHIFTR32(x, y), SHIFTL32(x, 32 - (y))) +#define ROTL32(x, y) OR(SHIFTL32(x, y), SHIFTR32(x, 32 - (y))) + +#define XOR3(a, b, c) XOR(XOR(a, b), c) + +#define ADD3_32(a, b, c) ADD32(ADD32(a, b), c) +#define ADD4_32(a, b, c, d) ADD32(ADD32(ADD32(a, b), c), d) +#define ADD5_32(a, b, c, d, e) ADD32(ADD32(ADD32(ADD32(a, b), c), d), e) + +#define MAJ_AVX(a, b, c) XOR3(AND(a, b), AND(a, c), AND(b, c)) +#define CH_AVX(a, b, c) XOR(AND(a, b), AND(NOT(a), c)) + +#define SIGMA1_AVX(x) XOR3(ROTR32(x, 6), ROTR32(x, 11), ROTR32(x, 25)) +#define SIGMA0_AVX(x) XOR3(ROTR32(x, 2), ROTR32(x, 13), ROTR32(x, 22)) + +#define WSIGMA1_AVX(x) XOR3(ROTR32(x, 17), ROTR32(x, 19), SHIFTR32(x, 10)) +#define WSIGMA0_AVX(x) XOR3(ROTR32(x, 7), ROTR32(x, 18), SHIFTR32(x, 3)) + +#define SHA256ROUND_AVX(a, b, c, d, e, f, g, h, rc, w) \ + T0 = ADD5_32(h, SIGMA1_AVX(e), CH_AVX(e, f, g), _mm256_set1_epi32((int)RC[rc]), w); \ + (d) = ADD32(d, T0); \ + T1 = ADD32(SIGMA0_AVX(a), MAJ_AVX(a, b, c)); \ + (h) = ADD32(T0, T1); + +typedef struct SHA256state { + u256 s[8]; + uint8_t msgblocks[8 * 64]; + unsigned int datalen; + uint64_t msglen; +} sha256ctxx8; + + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_transpose(u256 s[8]); +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256_init_frombytes_x8(sha256ctxx8 *ctx, const uint8_t *s, unsigned long long msglen); +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256_init8x(sha256ctxx8 *ctx); +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256_update8x(sha256ctxx8 *ctx, + const unsigned char *d0, + const unsigned char *d1, + const unsigned char *d2, + const unsigned char *d3, + const unsigned char *d4, + const unsigned char *d5, + const unsigned char *d6, + const unsigned char *d7, + unsigned long long len); +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256_final8x(sha256ctxx8 *ctx, + unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256_transform8x(sha256ctxx8 *ctx, const unsigned char *data); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256_clone_statex8(sha256ctxx8 *outctx, const sha256ctxx8 *inctx); + + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/sha256x8.c b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/sha256x8.c new file mode 100644 index 00000000..fa193378 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/sha256x8.c @@ -0,0 +1,128 @@ +#include + +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_seed_statex8(sha256ctxx8 *ctx, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N; i < PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256_init8x(ctx); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256_update8x(ctx, block, block, block, block, block, block, block, block, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES); + +} + +/* This provides a wrapper around the internals of 8x parallel SHA256 */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256x8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned long long inlen) { + sha256ctxx8 ctx; + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256_init8x(&ctx); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256_update8x(&ctx, in0, in1, in2, in3, in4, in5, in6, in7, inlen); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256_final8x(&ctx, out0, out1, out2, out3, out4, out5, out6, out7); +} + +/** + * Note that inlen should be sufficiently small that it still allows for + * an array to be allocated on the stack. Typically 'in' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_mgf1x8( + unsigned char *outx8, + unsigned long outlen, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, + unsigned long inlen) { + unsigned char inbufx8[8 * ((PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES) + 4)]; + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + unsigned long i; + unsigned int j; + + memcpy(inbufx8 + 0 * (inlen + 4), in0, inlen); + memcpy(inbufx8 + 1 * (inlen + 4), in1, inlen); + memcpy(inbufx8 + 2 * (inlen + 4), in2, inlen); + memcpy(inbufx8 + 3 * (inlen + 4), in3, inlen); + memcpy(inbufx8 + 4 * (inlen + 4), in4, inlen); + memcpy(inbufx8 + 5 * (inlen + 4), in5, inlen); + memcpy(inbufx8 + 6 * (inlen + 4), in6, inlen); + memcpy(inbufx8 + 7 * (inlen + 4), in7, inlen); + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES <= outlen; i++) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ull_to_bytes(inbufx8 + inlen + j * (inlen + 4), 4, i); + } + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256x8(outx8 + 0 * outlen, + outx8 + 1 * outlen, + outx8 + 2 * outlen, + outx8 + 3 * outlen, + outx8 + 4 * outlen, + outx8 + 5 * outlen, + outx8 + 6 * outlen, + outx8 + 7 * outlen, + inbufx8 + 0 * (inlen + 4), + inbufx8 + 1 * (inlen + 4), + inbufx8 + 2 * (inlen + 4), + inbufx8 + 3 * (inlen + 4), + inbufx8 + 4 * (inlen + 4), + inbufx8 + 5 * (inlen + 4), + inbufx8 + 6 * (inlen + 4), + inbufx8 + 7 * (inlen + 4), inlen + 4); + outx8 += PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ull_to_bytes(inbufx8 + inlen + j * (inlen + 4), 4, i); + } + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256x8(outbufx8 + 0 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + inbufx8 + 0 * (inlen + 4), + inbufx8 + 1 * (inlen + 4), + inbufx8 + 2 * (inlen + 4), + inbufx8 + 3 * (inlen + 4), + inbufx8 + 4 * (inlen + 4), + inbufx8 + 5 * (inlen + 4), + inbufx8 + 6 * (inlen + 4), + inbufx8 + 7 * (inlen + 4), inlen + 4); + + for (j = 0; j < 8; j++) { + memcpy(outx8 + j * outlen, + outbufx8 + j * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outlen - i * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/sha256x8.h b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/sha256x8.h new file mode 100644 index 00000000..fe473ab4 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/sha256x8.h @@ -0,0 +1,44 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256X8_H +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256X8_H + +#include "sha256avx.h" + +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N */ + +/* This provides a wrapper around the internals of 8x parallel SHA256 */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256x8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned long long inlen); + +/** + * Note that inlen should be sufficiently small that it still allows for + * an array to be allocated on the stack. Typically 'in' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_mgf1x8(unsigned char *outx8, unsigned long outlen, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, + unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_seed_statex8(sha256ctxx8 *ctx, const unsigned char *pub_seed); +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/sign.c b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/sign.c new file mode 100644 index 00000000..b2acc286 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_D - 1); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, sk + 3 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N; + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_D; i++) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_BYTES; + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N; + unsigned char mhash[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_D; i++) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_BYTES; + + if (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_BYTES, sm + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/thash.h b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/thash.h new file mode 100644 index 00000000..241bfc1e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_THASH_H +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/thash_sha256_simple.c b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/thash_sha256_simple.c new file mode 100644 index 00000000..2b84447f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/thash_sha256_simple.c @@ -0,0 +1,75 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "sha2.h" +#include "sha256.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N bytes. + */ +static void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char outbuf[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + sha256ctx sha2_state; + + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + + /* Retrieve precomputed state containing pub_seed */ + sha256_inc_ctx_clone(&sha2_state, &hash_state_seeded->x1); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_compress_address(buf, addr); + memcpy(buf + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES, in, inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + + sha256_inc_finalize(outbuf, &sha2_state, buf, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thash( + out, buf, in, 1, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thash( + out, buf, in, 2, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LEN, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_TREES, pub_seed, addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/thash_sha256_simplex8.c b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/thash_sha256_simplex8.c new file mode 100644 index 00000000..e50719c0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/thash_sha256_simplex8.c @@ -0,0 +1,129 @@ +#include +#include + +#include "address.h" +#include "hash_state.h" +#include "params.h" +#include "sha256.h" +#include "sha256avx.h" +#include "sha256x8.h" +#include "thashx8.h" +#include "utils.h" + +/** + * 8-way parallel version of thash; takes 8x as much input and output + */ +static void thashx8(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned char *out4, + unsigned char *out5, + unsigned char *out6, + unsigned char *out7, + const unsigned char *in0, + const unsigned char *in1, + const unsigned char *in2, + const unsigned char *in3, + const unsigned char *in4, + const unsigned char *in5, + const unsigned char *in6, + const unsigned char *in7, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addrx8[8 * 8], + uint8_t *bufx8, + const hash_state *state_seeded) { + unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES]; + unsigned int i; + sha256ctxx8 ctx; + + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256_clone_statex8(&ctx, &state_seeded->x8); + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_compress_address(bufx8 + i * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N), + addrx8 + i * 8); + } + + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + + 0 * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N), in0, inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + + 1 * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N), in1, inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + + 2 * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N), in2, inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + + 3 * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N), in3, inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + + 4 * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N), in4, inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + + 5 * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N), in5, inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + + 6 * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N), in6, inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + memcpy(bufx8 + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + + 7 * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N), in7, inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256_update8x(&ctx, + bufx8 + 0 * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N), + bufx8 + 1 * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N), + bufx8 + 2 * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N), + bufx8 + 3 * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N), + bufx8 + 4 * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N), + bufx8 + 5 * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N), + bufx8 + 6 * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N), + bufx8 + 7 * (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N), + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_sha256_final8x(&ctx, + outbufx8 + 0 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 1 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 3 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 4 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 5 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 6 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, + outbufx8 + 7 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES); + + memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); +} + +#define thashx8_variant_impl(name, size) \ + void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thashx8_##name(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + unsigned char *out4, \ + unsigned char *out5, \ + unsigned char *out6, \ + unsigned char *out7, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *in4, \ + const unsigned char *in5, \ + const unsigned char *in6, \ + const unsigned char *in7, \ + const unsigned char *pub_seed, \ + uint32_t addrx8[8*8], \ + const hash_state *state_seeded) \ + { \ + const unsigned int inblocks = (size); \ + uint8_t bufx8[8*(PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_SHA256_ADDR_BYTES + (size)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N)]; \ + thashx8(out0, out1, out2, out3, out4, out5, out6, out7, \ + in0, in1, in2, in3, in4, in5, in6, in7, inblocks, \ + pub_seed, addrx8, bufx8, state_seeded); \ + } + +thashx8_variant_impl(1, 1) +thashx8_variant_impl(2, 2) +thashx8_variant_impl(WOTS_LEN, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LEN) +thashx8_variant_impl(FORS_TREES, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_TREES) + +#undef thashx8_variant_impl diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/thashx8.h b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/thashx8.h new file mode 100644 index 00000000..62d88c86 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/thashx8.h @@ -0,0 +1,39 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_THASHX8_H +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_THASHX8_H + +#include + +#include "hash_state.h" +#include "sha256avx.h" + + +#define thashx8_variant(name) \ + void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thashx8_##name( \ + unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + unsigned char *out4, \ + unsigned char *out5, \ + unsigned char *out6, \ + unsigned char *out7, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *in4, \ + const unsigned char *in5, \ + const unsigned char *in6, \ + const unsigned char *in7, \ + const unsigned char *pub_seed, \ + uint32_t addrx8[8*8], \ + const hash_state *state_seeded) + + +thashx8_variant(1); +thashx8_variant(2); +thashx8_variant(WOTS_LEN); +thashx8_variant(FORS_TREES); + +#undef thashx8_variant +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/utils.c b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/utils.c new file mode 100644 index 00000000..7672d40a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, leaf, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + memcpy(buffer + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, auth_path, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thash_2( + buffer + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + } else { + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, auth_path, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/utils.h b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/utils.h new file mode 100644 index 00000000..1b59e1f7 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_UTILS_H +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/utilsx8.c b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/utilsx8.c new file mode 100644 index 00000000..7234c250 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/utilsx8.c @@ -0,0 +1,172 @@ +#include + +#include "address.h" +#include "params.h" +#include "thashx8.h" +#include "utils.h" + +#include "utilsx8.h" + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void treehashx8(unsigned char *rootx8, unsigned char *auth_pathx8, + unsigned char *stackx8, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t leaf_idx[8], uint32_t idx_offset[8], + uint32_t tree_height, + void (*gen_leafx8)( + unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + unsigned char * /* leaf4 */, + unsigned char * /* leaf5 */, + unsigned char * /* leaf6 */, + unsigned char * /* leaf7 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + uint32_t /* addr_idx4 */, + uint32_t /* addr_idx5 */, + uint32_t /* addr_idx6 */, + uint32_t /* addr_idx7 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx8[8 * 8], + const hash_state *state_seeded) { + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + unsigned int j; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leafx8(stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + sk_seed, pub_seed, + idx + idx_offset[0], + idx + idx_offset[1], + idx + idx_offset[2], + idx + idx_offset[3], + idx + idx_offset[4], + idx + idx_offset[5], + idx + idx_offset[6], + idx + idx_offset[7], + tree_addrx8, + state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + for (j = 0; j < 8; j++) { + if ((leaf_idx[j] ^ 0x1) == idx) { + memcpy(auth_pathx8 + j * tree_height * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + (offset - 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + } + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_height(tree_addrx8 + j * 8, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_tree_index(tree_addrx8 + j * 8, + tree_idx + (idx_offset[j] >> (heights[offset - 1] + 1))); + } + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thashx8_2(stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + stackx8 + 0 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + stackx8 + 1 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + stackx8 + 2 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + stackx8 + 3 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + stackx8 + 4 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + stackx8 + 5 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + stackx8 + 6 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + stackx8 + 7 * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + (offset - 2)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + pub_seed, tree_addrx8, state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + for (j = 0; j < 8; j++) { + if (((leaf_idx[j] >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_pathx8 + j * tree_height * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + heights[offset - 1]*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N + (offset - 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + } + } + } + } + + for (j = 0; j < 8; j++) { + memcpy(rootx8 + j * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, stackx8 + j * (tree_height + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + } +} + +/* The wrappers below ensure we used fixed-size buffers on the stack (no VLAs) */ + + +#define treehashx8_variant(name, size) \ + void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_treehashx8_##name( \ + unsigned char *rootx8, unsigned char *auth_pathx8, \ + const unsigned char *sk_seed, const unsigned char *pub_seed, \ + const uint32_t leaf_idx[8], uint32_t idx_offset[8], \ + void (*gen_leafx8)( \ + unsigned char* /* leaf0 */, \ + unsigned char* /* leaf1 */, \ + unsigned char* /* leaf2 */, \ + unsigned char* /* leaf3 */, \ + unsigned char* /* leaf4 */, \ + unsigned char* /* leaf5 */, \ + unsigned char* /* leaf6 */, \ + unsigned char* /* leaf7 */, \ + const unsigned char* /* sk_seed */, \ + const unsigned char* /* pub_seed */, \ + uint32_t /* addr_idx0 */, \ + uint32_t /* addr_idx1 */, \ + uint32_t /* addr_idx2 */, \ + uint32_t /* addr_idx3 */, \ + uint32_t /* addr_idx4 */, \ + uint32_t /* addr_idx5 */, \ + uint32_t /* addr_idx6 */, \ + uint32_t /* addr_idx7 */, \ + const uint32_t[8] /* tree_addr */, \ + const hash_state* /* state_seeded */), \ + uint32_t tree_addrx8[8*8], \ + const hash_state *state_seeded) \ + { \ + const uint32_t tree_height = (size); \ + unsigned char stackx8[8*((size) + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N]; \ + unsigned int heights[(size) + 1]; \ + treehashx8(rootx8, auth_pathx8, stackx8, heights, sk_seed, pub_seed, \ + leaf_idx, idx_offset, tree_height, gen_leafx8, tree_addrx8, state_seeded); \ + } + +treehashx8_variant(FORS_HEIGHT, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_FORS_HEIGHT) + +#undef treehashx8_variant diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/utilsx8.h b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/utilsx8.h new file mode 100644 index 00000000..6a1e0b06 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/utilsx8.h @@ -0,0 +1,46 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_UTILSX8_H +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_UTILSX8_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_treehashx8_FORS_HEIGHT( + unsigned char *rootx8, unsigned char *auth_pathx8, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t leaf_idx[8], uint32_t idx_offset[8], + void (*gen_leafx8)( + unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + unsigned char * /* leaf4 */, + unsigned char * /* leaf5 */, + unsigned char * /* leaf6 */, + unsigned char * /* leaf7 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + uint32_t /* addr_idx4 */, + uint32_t /* addr_idx5 */, + uint32_t /* addr_idx6 */, + uint32_t /* addr_idx7 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx8[8 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/wots.c b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/wots.c new file mode 100644 index 00000000..2e6f1414 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/wots.c @@ -0,0 +1,240 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "hashx8.h" +#include "params.h" +#include "thash.h" +#include "thashx8.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], const hash_state *state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_prf_addr(sk, sk_seed, wots_addr, state_seeded); +} + +/** + * 8-way parallel version of wots_gen_sk; expects 8x as much space in sk + */ +static void wots_gen_skx8(unsigned char *skx8, const unsigned char *sk_seed, + uint32_t wots_addrx8[8 * 8]) { + unsigned int j; + + /* Make sure that the hash address is actually zeroed. */ + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_hash_addr(wots_addrx8 + j * 8, 0); + } + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_prf_addrx8(skx8 + 0 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + skx8 + 1 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + skx8 + 2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + skx8 + 3 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + skx8 + 4 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + skx8 + 5 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + skx8 + 6 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + skx8 + 7 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + sk_seed, wots_addrx8); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_W; i++) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thash_1(out, out, pub_seed, addr, state_seeded); + } +} + +/** + * 8-way parallel version of gen_chain; expects 8x as much space in out, and + * 8x as much space in inx8. Assumes start and step identical across chains. + */ +static void gen_chainx8(unsigned char *outx8, const unsigned char *inx8, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addrx8[8 * 8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + /* Initialize outx8 with the value at position 'start'. */ + memcpy(outx8, inx8, 8 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_W; i++) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_hash_addr(addrx8 + j * 8, i); + } + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_thashx8_1(outx8 + 0 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + outx8 + 1 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + outx8 + 2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + outx8 + 3 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + outx8 + 4 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + outx8 + 5 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + outx8 + 6 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + outx8 + 7 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + outx8 + 0 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + outx8 + 1 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + outx8 + 2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + outx8 + 3 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + outx8 + 4 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + outx8 + 5 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + outx8 + 6 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + outx8 + 7 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + pub_seed, addrx8, state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { + int in = 0; + int out = 0; + unsigned char total = 0; + int bits = 0; + int consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LOGW; + output[out] = (unsigned int)(total >> bits) & (PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_W - 1); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_wots_gen_pk(unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + uint32_t addrx8[8 * 8]; + unsigned char pkbuf[8 * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N]; + + for (j = 0; j < 8; j++) { + memcpy(addrx8 + j * 8, addr, sizeof(uint32_t) * 8); + } + + /* The last iteration typically does not have complete set of 4 chains, + but because we use pkbuf, this is not an issue -- we still do as many + in parallel as possible. */ + for (i = 0; i < ((PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LEN + 7) & ~0x7); i += 8) { + for (j = 0; j < 8; j++) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_chain_addr(addrx8 + j * 8, i + j); + } + wots_gen_skx8(pkbuf, sk_seed, addrx8); + gen_chainx8(pkbuf, pkbuf, 0, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_W - 1, pub_seed, addrx8, state_seeded); + for (j = 0; j < 8; j++) { + if (i + j < PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LEN) { + memcpy(pk + (i + j)*PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, pkbuf + j * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N); + } + } + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_wots_sign(unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, sk_seed, addr, state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, sig + i * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, 0, lengths[i], pub_seed, addr, state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_wots_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, sig + i * PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_N, + lengths[i], PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_W - 1 - lengths[i], pub_seed, addr, state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/wots.h b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/wots.h new file mode 100644 index 00000000..4978c152 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/avx2/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_H +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_AVX2_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/LICENSE b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..cf1372b2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-sha256-256s-simple_clean.lib +OBJECTS=address.obj wots.obj utils.obj fors.obj sign.obj hash_sha256.obj thash_sha256_simple.obj sha256.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/address.c b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/address.c new file mode 100644 index 00000000..67e83a3d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/address.h b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/address.h new file mode 100644 index 00000000..269dc1f7 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ADDRESS_H +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/api.h b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/api.h new file mode 100644 index 00000000..d9560eb2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_API_H +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_CRYPTO_SECRETKEYBYTES 128 +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_CRYPTO_PUBLICKEYBYTES 64 +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_CRYPTO_BYTES 29792 +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_CRYPTO_SEEDBYTES 96 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/fors.c b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/fors.c new file mode 100644 index 00000000..4812be3c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/fors.c @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "thash.h" +#include "utils.h" + +static void fors_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t fors_leaf_addr[8], const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_prf_addr( + sk, sk_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_thash_1( + leaf, sk, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t fors_tree_addr[8], + const hash_state *hash_state_seeded) { + uint32_t fors_leaf_addr[8] = {0}; + + /* Only copy the parts that must be kept in fors_leaf_addr. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_copy_keypair_addr( + fors_leaf_addr, fors_tree_addr); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_type( + fors_leaf_addr, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_tree_index( + fors_leaf_addr, addr_idx); + + fors_gen_sk(leaf, sk_seed, fors_leaf_addr, hash_state_seeded); + fors_sk_to_leaf(leaf, leaf, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_copy_keypair_addr( + fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_copy_keypair_addr( + fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_type( + fors_tree_addr, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_type( + fors_pk_addr, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_tree_height( + fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_tree_index( + fors_tree_addr, indices[i] + idx_offset); + + /* Include the secret key part that produces the selected leaf node. */ + fors_gen_sk(sig, sk_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N; + + /* Compute the authentication path for this leaf node. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_treehash_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, sig, sk_seed, pub_seed, + indices[i], idx_offset, fors_gen_leaf, fors_tree_addr, + hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_thash_FORS_TREES( + pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_compute_root(roots + i * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, leaf, indices[i], idx_offset, sig, + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_HEIGHT, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/fors.h b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/fors.h new file mode 100644 index 00000000..105ba4a9 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_H +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/hash.h b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/hash.h new file mode 100644 index 00000000..797ec37f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_HASH_H +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/hash_sha256.c b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/hash_sha256.c new file mode 100644 index 00000000..2948d690 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/hash_sha256.c @@ -0,0 +1,162 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "sha2.h" +#include "sha256.h" + +/* For SHA256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_seed_state(hash_state_seeded, pub_seed); + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* Clean up hash state */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_destroy_hash_function(hash_state *hash_state_seeded) { + sha256_inc_ctx_release(hash_state_seeded); +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N bytes and an address + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_ADDR_BYTES]; + unsigned char outbuf[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_compress_address(buf + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, addr); + + sha256(outbuf, buf, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_ADDR_BYTES); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed as a key + * for HMAC, and an optional randomization value prefixed to the message. + * This requires m to have at least PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N space + * available in front of the pointer, i.e. before the message to use for the + * prefix. This is necessary to prevent having to move the message around (and + * allocate memory for it). + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES]; + sha256ctx state; + int i; + + /* This implements HMAC-SHA256 */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N; i++) { + buf[i] = 0x36 ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, 0x36, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N); + + sha256_inc_init(&state); + sha256_inc_blocks(&state, buf, 1); + + memcpy(buf, optrand, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N); + + /* If optrand + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N + mlen < PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_BLOCK_BYTES) { + memcpy(buf + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, m, mlen); + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_BLOCK_BYTES, &state, + buf, mlen + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(buf + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, m, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N); + sha256_inc_blocks(&state, buf, 1); + + m += PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N; + mlen -= PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N; + sha256_inc_finalize(buf + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_BLOCK_BYTES, &state, m, mlen); + } + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N; i++) { + buf[i] = 0x5c ^ sk_prf[i]; + } + memset(buf + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, 0x5c, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N); + + sha256(buf, buf, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_BLOCK_BYTES + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES); + memcpy(R, buf, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_TREE_BITS (PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_TREE_HEIGHT * (PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_D - 1)) +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_TREE_BYTES ((PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_LEAF_BITS PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_LEAF_BYTES ((PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_DGST_BYTES (PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_TREE_BYTES + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_LEAF_BYTES) + + unsigned char seed[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES + 4]; + + /* Round to nearest multiple of PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_BLOCK_BYTES */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_INBLOCKS (((PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_PK_BYTES + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - 1) & \ + -PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_BLOCK_BYTES) / PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_BLOCK_BYTES) + unsigned char inbuf[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_BLOCK_BYTES]; + + unsigned char buf[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_DGST_BYTES]; + unsigned char *bufp = buf; + sha256ctx state; + + sha256_inc_init(&state); + + memcpy(inbuf, R, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N); + memcpy(inbuf + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, pk, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_PK_BYTES); + + /* If R + pk + message cannot fill up an entire block */ + if (PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_PK_BYTES + mlen < PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_BLOCK_BYTES) { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_PK_BYTES, m, mlen); + sha256_inc_finalize(seed, &state, inbuf, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_PK_BYTES + mlen); + } + /* Otherwise first fill a block, so that finalize only uses the message */ + else { + memcpy(inbuf + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_PK_BYTES, m, + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N - PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_PK_BYTES); + sha256_inc_blocks(&state, inbuf, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_INBLOCKS); + + m += PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N - PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_PK_BYTES; + mlen -= PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_INBLOCKS * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_BLOCK_BYTES - PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N - PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_PK_BYTES; + sha256_inc_finalize(seed, &state, m, mlen); + } + + /* By doing this in two steps, we prevent hashing the message twice; + otherwise each iteration in MGF1 would hash the message again. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_mgf1(bufp, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_DGST_BYTES, seed, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_bytes_to_ull(bufp, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/hash_state.h b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/hash_state.h new file mode 100644 index 00000000..19fc335e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/hash_state.h @@ -0,0 +1,26 @@ +#ifndef SPX_HASH_STATE_H +#define SPX_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + */ + +#include "sha2.h" +#define hash_state sha256ctx + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/params.h b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/params.h new file mode 100644 index 00000000..c36552bd --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_PARAMS_H +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N 32 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FULL_HEIGHT 64 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_D 8 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_HEIGHT 14 +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_TREES 22 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N / PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_LEN (PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_LEN1 + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_BYTES (PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_PK_BYTES PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_TREE_HEIGHT (PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FULL_HEIGHT / PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_BYTES ((PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_PK_BYTES PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_BYTES (PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_BYTES + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_D * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FULL_HEIGHT * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_PK_BYTES (2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SK_BYTES (2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/sha256.c b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/sha256.c new file mode 100644 index 00000000..f9d103ec --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/sha256.c @@ -0,0 +1,71 @@ +/* Based on the public domain implementation in + * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html + * by D. J. Bernstein */ + +#include +#include +#include + +#include "sha2.h" +#include "sha256.h" +#include "utils.h" + +/* + * Compresses an address to a 22-byte sequence. + * This reduces the number of required SHA256 compression calls, as the last + * block of input is padded with at least 65 bits. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_compress_address(unsigned char *out, const uint32_t addr[8]) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ull_to_bytes(out, 1, addr[0]); /* drop 3 bytes of the layer field */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ull_to_bytes(out + 1, 4, addr[2]); /* drop the highest tree address word */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ull_to_bytes(out + 5, 4, addr[3]); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ull_to_bytes(out + 9, 1, addr[4]); /* drop 3 bytes of the type field */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ull_to_bytes(out + 10, 4, addr[5]); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ull_to_bytes(out + 14, 4, addr[6]); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ull_to_bytes(out + 18, 4, addr[7]); +} + +/** + * Requires 'input_plus_four_bytes' to have 'inlen' + 4 bytes, so that the last + * four bytes can be used for the counter. Typically 'input' is merely a seed. + * Outputs outlen number of bytes + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen) { + unsigned char outbuf[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES]; + unsigned long i; + + /* While we can fit in at least another full block of SHA256 output.. */ + for (i = 0; (i + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES <= outlen; i++) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(out, input_plus_four_bytes, inlen + 4); + out += PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES; + } + /* Until we cannot anymore, and we fill the remainder. */ + if (outlen > i * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ull_to_bytes(input_plus_four_bytes + inlen, 4, i); + sha256(outbuf, input_plus_four_bytes, inlen + 4); + memcpy(out, outbuf, outlen - i * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES); + } +} + + +/** + * Absorb the constant pub_seed using one round of the compression function + * This initializes hash_state_seeded, which can then be reused in thash + **/ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed) { + uint8_t block[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_BLOCK_BYTES]; + size_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N; ++i) { + block[i] = pub_seed[i]; + } + for (i = PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N; i < PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_BLOCK_BYTES; ++i) { + block[i] = 0; + } + + sha256_inc_init(hash_state_seeded); + sha256_inc_blocks(hash_state_seeded, block, 1); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/sha256.h b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/sha256.h new file mode 100644 index 00000000..e6f8094a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/sha256.h @@ -0,0 +1,21 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_H +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_H + +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_BLOCK_BYTES 64 +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES 32 /* This does not necessarily equal PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N */ +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_ADDR_BYTES 22 + +#include +#include + +#include "sha2.h" + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_compress_address(unsigned char *out, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_mgf1( + unsigned char *out, unsigned long outlen, + unsigned char *input_plus_four_bytes, unsigned long inlen); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_seed_state(sha256ctx *hash_state_seeded, const unsigned char *pub_seed); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/sign.c b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/sign.c new file mode 100644 index 00000000..be0d36c3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_D - 1); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, sk + 3 * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N; + + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_BYTES; + + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N; + unsigned char mhash[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_BYTES; + + if (PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_BYTES, sm + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/thash.h b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/thash.h new file mode 100644 index 00000000..d9e8aea9 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_THASH_H +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/thash_sha256_simple.c b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/thash_sha256_simple.c new file mode 100644 index 00000000..44023c41 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/thash_sha256_simple.c @@ -0,0 +1,75 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "sha2.h" +#include "sha256.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N bytes. + */ +static void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char outbuf[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_OUTPUT_BYTES]; + sha256ctx sha2_state; + + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + + /* Retrieve precomputed state containing pub_seed */ + sha256_inc_ctx_clone(&sha2_state, hash_state_seeded); + + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_compress_address(buf, addr); + memcpy(buf + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_ADDR_BYTES, in, inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N); + + sha256_inc_finalize(outbuf, &sha2_state, buf, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N); + memcpy(out, outbuf, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_thash( + out, buf, in, 1, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_thash( + out, buf, in, 2, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_ADDR_BYTES + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_LEN, pub_seed, addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const sha256ctx *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_SHA256_ADDR_BYTES + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_TREES, pub_seed, addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/utils.c b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/utils.c new file mode 100644 index 00000000..faf91898 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, leaf, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N); + memcpy(buffer + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_thash_2( + buffer + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N); + } else { + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/utils.h b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/utils.h new file mode 100644 index 00000000..a59dd7fa --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_UTILS_H +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/wots.c b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/wots.c new file mode 100644 index 00000000..e8bda4b3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/wots.c @@ -0,0 +1,167 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], + const hash_state *hash_state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_prf_addr(sk, sk_seed, wots_addr, hash_state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_W; i++) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_thash_1( + out, out, pub_seed, addr, hash_state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const size_t out_len, + const unsigned char *input) { + size_t in = 0; + size_t out = 0; + unsigned char total = 0; + unsigned int bits = 0; + size_t consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_LOGW; + output[out] = (unsigned int)((total >> bits) & (PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_W - 1)); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, + const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_ull_to_bytes( + csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(pk + i * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, pk + i * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, + 0, PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_W - 1, pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, 0, lengths[i], pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_N, + lengths[i], PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_W - 1 - lengths[i], pub_seed, addr, + hash_state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/wots.h b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/wots.h new file mode 100644 index 00000000..ed817600 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-sha256-256s-simple/clean/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_H +#define PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHA256256SSIMPLE_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/META.yml b/crypto_sign/sphincs/sphincs-shake256-128f-robust/META.yml new file mode 100644 index 00000000..c68ce4e6 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/META.yml @@ -0,0 +1,35 @@ +name: SPHINCS+ +type: signature +claimed-nist-level: 1 +length-public-key: 32 +length-secret-key: 64 +length-signature: 16976 +testvectors-sha256: eea7f59958e732c15110d0d06e3c23005d73df2b15a1e7b4ebc0ca2dcf162bb5 +nistkat-sha256: e7789df37278d1e147996bd9bf4cda55d5ec5cbe921e64b0766927af4b02decd +principal-submitters: + - Andreas Hülsing +auxiliary-submitters: + - Jean-Philippe Aumasson + - Daniel J. Bernstein, + - Christoph Dobraunig + - Maria Eichlseder + - Scott Fluhrer + - Stefan-Lukas Gazdag + - Panos Kampanakis + - Stefan Kölbl + - Tanja Lange + - Martin M. Lauridsen + - Florian Mendel + - Ruben Niederhagen + - Christian Rechberger + - Joost Rijneveld + - Peter Schwabe +implementations: + - name: clean + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + - name: avx2 + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + supported_platforms: + - architecture: x86_64 + required_flags: + - avx2 diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/LICENSE b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/Makefile.Microsoft_nmake new file mode 100644 index 00000000..0c125655 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/Makefile.Microsoft_nmake @@ -0,0 +1,27 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-shake256-128f-robust_avx2.lib +OBJECTS=address.obj wots.obj utils.obj utilsx4.obj fips202x4.obj fors.obj sign.obj hash_shake256.obj thash_shake256_robust.obj hash_shake256x4.obj thash_shake256_robustx4.obj + +KECCAK4XDIR=..\..\..\common\keccak4x +KECCAK4XOBJ=KeccakP-1600-times4-SIMD256.obj +KECCAK4X=$(KECCAK4XDIR)\$(KECCAK4XOBJ) + +CFLAGS=/nologo /arch:AVX2 /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) $(KECCAK4X) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +$(KECCAK4X): + cd $(KECCAK4XDIR) && $(MAKE) /f Makefile.Microsoft_nmake $(KECCAK4XOBJ) + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) + -DEL $(KECCAK4X) diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/address.c b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/address.c new file mode 100644 index 00000000..68ccca4f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/address.h b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/address.h new file mode 100644 index 00000000..f46651ff --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDRESS_H +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/api.h b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/api.h new file mode 100644 index 00000000..59085730 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_API_H +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_CRYPTO_SECRETKEYBYTES 64 +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES 32 +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_CRYPTO_BYTES 16976 +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_CRYPTO_SEEDBYTES 48 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/fips202x4.c b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/fips202x4.c new file mode 100644 index 00000000..ed4fe624 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/fips202x4.c @@ -0,0 +1,205 @@ +#include +#include +#include + +#include "fips202.h" +#include "fips202x4.h" + +#define NROUNDS 24 +#define ROL(a, offset) (((a) << (offset)) ^ ((a) >> (64-(offset)))) + +static uint64_t load64(const unsigned char *x) { + unsigned long long r = 0, i; + + for (i = 0; i < 8; ++i) { + r |= (unsigned long long)x[i] << 8 * i; + } + return r; +} + +static void store64(uint8_t *x, uint64_t u) { + unsigned int i; + + for (i = 0; i < 8; ++i) { + x[i] = (uint8_t)u; + u >>= 8; + } +} + +/* Use implementation from the Keccak Code Package */ +extern void KeccakP1600times4_PermuteAll_24rounds(__m256i *s); +#define KeccakF1600_StatePermute4x KeccakP1600times4_PermuteAll_24rounds + +static void keccak_absorb4x(__m256i *s, + unsigned int r, + const unsigned char *m0, + const unsigned char *m1, + const unsigned char *m2, + const unsigned char *m3, + size_t mlen, + unsigned char p) { + unsigned char t0[200] = {0}; + unsigned char t1[200] = {0}; + unsigned char t2[200] = {0}; + unsigned char t3[200] = {0}; + + unsigned long long *ss = (unsigned long long *)s; + + + while (mlen >= r) { + for (size_t i = 0; i < r / 8; ++i) { + ss[4 * i + 0] ^= load64(m0 + 8 * i); + ss[4 * i + 1] ^= load64(m1 + 8 * i); + ss[4 * i + 2] ^= load64(m2 + 8 * i); + ss[4 * i + 3] ^= load64(m3 + 8 * i); + } + + KeccakF1600_StatePermute4x(s); + mlen -= r; + m0 += r; + m1 += r; + m2 += r; + m3 += r; + } + + memcpy(t0, m0, mlen); + memcpy(t1, m1, mlen); + memcpy(t2, m2, mlen); + memcpy(t3, m3, mlen); + + t0[mlen] = p; + t1[mlen] = p; + t2[mlen] = p; + t3[mlen] = p; + + t0[r - 1] |= 128; + t1[r - 1] |= 128; + t2[r - 1] |= 128; + t3[r - 1] |= 128; + + for (size_t i = 0; i < r / 8; ++i) { + ss[4 * i + 0] ^= load64(t0 + 8 * i); + ss[4 * i + 1] ^= load64(t1 + 8 * i); + ss[4 * i + 2] ^= load64(t2 + 8 * i); + ss[4 * i + 3] ^= load64(t3 + 8 * i); + } +} + + +static void keccak_squeezeblocks4x(unsigned char *h0, + unsigned char *h1, + unsigned char *h2, + unsigned char *h3, + unsigned long long int nblocks, + __m256i *s, + unsigned int r) { + unsigned int i; + + unsigned long long *ss = (unsigned long long *)s; + + while (nblocks > 0) { + KeccakF1600_StatePermute4x(s); + for (i = 0; i < (r >> 3); i++) { + store64(h0 + 8 * i, ss[4 * i + 0]); + store64(h1 + 8 * i, ss[4 * i + 1]); + store64(h2 + 8 * i, ss[4 * i + 2]); + store64(h3 + 8 * i, ss[4 * i + 3]); + } + h0 += r; + h1 += r; + h2 += r; + h3 += r; + nblocks--; + } +} + + + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_shake128x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen) { + __m256i s[25]; + unsigned char t0[SHAKE128_RATE]; + unsigned char t1[SHAKE128_RATE]; + unsigned char t2[SHAKE128_RATE]; + unsigned char t3[SHAKE128_RATE]; + unsigned int i; + + /* zero state */ + for (i = 0; i < 25; i++) { + s[i] = _mm256_xor_si256(s[i], s[i]); + } + + /* absorb 4 message of identical length in parallel */ + keccak_absorb4x(s, SHAKE128_RATE, in0, in1, in2, in3, (size_t)inlen, 0x1F); + + /* Squeeze output */ + keccak_squeezeblocks4x(out0, out1, out2, out3, outlen / SHAKE128_RATE, s, SHAKE128_RATE); + + out0 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out1 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out2 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out3 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + + if (outlen % SHAKE128_RATE) { + keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE128_RATE); + for (i = 0; i < outlen % SHAKE128_RATE; i++) { + out0[i] = t0[i]; + out1[i] = t1[i]; + out2[i] = t2[i]; + out3[i] = t3[i]; + } + } +} + + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_shake256x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen) { + __m256i s[25]; + unsigned char t0[SHAKE256_RATE]; + unsigned char t1[SHAKE256_RATE]; + unsigned char t2[SHAKE256_RATE]; + unsigned char t3[SHAKE256_RATE]; + unsigned int i; + + /* zero state */ + for (i = 0; i < 25; i++) { + s[i] = _mm256_xor_si256(s[i], s[i]); + } + + /* absorb 4 message of identical length in parallel */ + keccak_absorb4x(s, SHAKE256_RATE, in0, in1, in2, in3, (size_t)inlen, 0x1F); + + /* Squeeze output */ + keccak_squeezeblocks4x(out0, out1, out2, out3, outlen / SHAKE256_RATE, s, SHAKE256_RATE); + + out0 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out1 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out2 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out3 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + + if (outlen % SHAKE256_RATE) { + keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE256_RATE); + for (i = 0; i < outlen % SHAKE256_RATE; i++) { + out0[i] = t0[i]; + out1[i] = t1[i]; + out2[i] = t2[i]; + out3[i] = t3[i]; + } + } +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/fips202x4.h b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/fips202x4.h new file mode 100644 index 00000000..7d015781 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/fips202x4.h @@ -0,0 +1,27 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FIPS202X4_H +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FIPS202X4_H + +#include + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_shake128x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, unsigned long long inlen); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_shake256x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/fors.c b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/fors.c new file mode 100644 index 00000000..668f87de --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/fors.c @@ -0,0 +1,206 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hashx4.h" +#include "thash.h" +#include "thashx4.h" +#include "utils.h" +#include "utilsx4.h" + +static void fors_gen_skx4(unsigned char *sk0, + unsigned char *sk1, + unsigned char *sk2, + unsigned char *sk3, const unsigned char *sk_seed, + uint32_t fors_leaf_addrx4[4 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_prf_addrx4(sk0, sk1, sk2, sk3, sk_seed, fors_leaf_addrx4, state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thash_1(leaf, sk, pub_seed, fors_leaf_addr, state_seeded); +} + +static void fors_sk_to_leafx4(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + const unsigned char *sk0, + const unsigned char *sk1, + const unsigned char *sk2, + const unsigned char *sk3, + const unsigned char *pub_seed, + uint32_t fors_leaf_addrx4[4 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thashx4_1(leaf0, leaf1, leaf2, leaf3, + sk0, sk1, sk2, sk3, pub_seed, fors_leaf_addrx4, state_seeded); +} + +static void fors_gen_leafx4(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx0, + uint32_t addr_idx1, + uint32_t addr_idx2, + uint32_t addr_idx3, + const uint32_t fors_tree_addr[8], + const hash_state *state_seeded) { + uint32_t fors_leaf_addrx4[4 * 8] = {0}; + unsigned int j; + + /* Only copy the parts that must be kept in fors_leaf_addrx4. */ + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_copy_keypair_addr(fors_leaf_addrx4 + j * 8, fors_tree_addr); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_type(fors_leaf_addrx4 + j * 8, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_tree_index(fors_leaf_addrx4 + 0 * 8, addr_idx0); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_tree_index(fors_leaf_addrx4 + 1 * 8, addr_idx1); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_tree_index(fors_leaf_addrx4 + 2 * 8, addr_idx2); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_tree_index(fors_leaf_addrx4 + 3 * 8, addr_idx3); + + fors_gen_skx4(leaf0, leaf1, leaf2, leaf3, sk_seed, fors_leaf_addrx4, state_seeded); + fors_sk_to_leafx4(leaf0, leaf1, leaf2, leaf3, + leaf0, leaf1, leaf2, leaf3, pub_seed, fors_leaf_addrx4, state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_fors_sign(unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *state_seeded) { + /* Round up to multiple of 4 to prevent out-of-bounds for x4 parallelism */ + uint32_t indices[(PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_TREES + 3) & ~3] = {0}; + unsigned char roots[((PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_TREES + 3) & ~3) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N]; + /* Sign to a buffer, since we may not have a nice multiple of 4 and would + otherwise overrun the signature. */ + unsigned char sigbufx4[4 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N * (1 + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_HEIGHT)]; + uint32_t fors_tree_addrx4[4 * 8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset[4] = {0}; + unsigned int i, j; + + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_copy_keypair_addr(fors_tree_addrx4 + j * 8, fors_addr); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_type(fors_tree_addrx4 + j * 8, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < ((PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_TREES + 3) & ~0x3); i += 4) { + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_TREES) { + idx_offset[j] = (i + j) * (1 << PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_tree_height(fors_tree_addrx4 + j * 8, 0); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_tree_index(fors_tree_addrx4 + j * 8, + indices[i + j] + idx_offset[j]); + } + } + + /* Include the secret key part that produces the selected leaf nodes. */ + fors_gen_skx4(sigbufx4 + 0 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, + sigbufx4 + 1 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, + sigbufx4 + 2 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, + sigbufx4 + 3 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, + sk_seed, fors_tree_addrx4, state_seeded); + + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_treehashx4_FORS_HEIGHT(roots + i * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, sigbufx4 + 4 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, sk_seed, pub_seed, + &indices[i], idx_offset, fors_gen_leafx4, fors_tree_addrx4, + state_seeded); + + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_TREES) { + memcpy(sig, sigbufx4 + j * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); + memcpy(sig + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, + sigbufx4 + 4 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + j * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_HEIGHT, + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_HEIGHT); + sig += PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N * (1 + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_HEIGHT); + } + } + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_fors_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_compute_root(roots + i * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, leaf, indices[i], idx_offset, + sig, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_HEIGHT, pub_seed, fors_tree_addr, + state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/fors.h b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/fors.h new file mode 100644 index 00000000..103d2372 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_H +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/hash.h b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/hash.h new file mode 100644 index 00000000..66bee453 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_HASH_H +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/hash_shake256.c b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/hash_shake256.c new file mode 100644 index 00000000..fe3aa3bd --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/hash_shake256.c @@ -0,0 +1,106 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "fips202.h" + +/* For SHAKE256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, // NOLINT(readability-non-const-parameter) + const unsigned char *pub_seed, const unsigned char *sk_seed) { + (void)hash_state_seeded; /* Suppress an 'unused parameter' warning. */ + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* This is not necessary for SHAKE256, so we don't do anything */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_destroy_hash_function( + hash_state *hash_state_seeded) { // NOLINT(readability-non-const-parameter) + (void)hash_state_seeded; +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N bytes and an address + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, addr); + + shake256(out, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed and an + * optional randomization value as well as the message. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, sk_prf, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); + shake256_inc_absorb(&state, optrand, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(R, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, &state); + shake256_inc_ctx_release(&state); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_TREE_BITS (PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_TREE_HEIGHT * (PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_D - 1)) +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_TREE_BYTES ((PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_LEAF_BITS PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_LEAF_BYTES ((PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_DGST_BYTES (PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_TREE_BYTES + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_LEAF_BYTES) + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_DGST_BYTES]; + unsigned char *bufp = buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, R, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); + shake256_inc_absorb(&state, pk, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_PK_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_DGST_BYTES, &state); + shake256_inc_ctx_release(&state); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/hash_shake256x4.c b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/hash_shake256x4.c new file mode 100644 index 00000000..b8495584 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/hash_shake256x4.c @@ -0,0 +1,38 @@ +#include +#include + +#include "address.h" +#include "fips202x4.h" +#include "hashx4.h" +#include "params.h" + +/* + * 4-way parallel version of prf_addr; takes 4x as much input and output + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_prf_addrx4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + const unsigned char *key, + const uint32_t addrx4[4 * 8], + const hash_state *state_seeded) { + unsigned char bufx4[4 * (PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES)]; + unsigned int j; + + for (j = 0; j < 4; j++) { + memcpy(bufx4 + j * (PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES), key, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_addr_to_bytes(bufx4 + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + j * (PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES), addrx4 + j * 8); + } + + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_shake256x4(out0, + out1, + out2, + out3, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, + bufx4 + 0 * (PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES), + bufx4 + 1 * (PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES), + bufx4 + 2 * (PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES), + bufx4 + 3 * (PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES), PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES); + + /* Avoid unused parameter warning */ + (void)state_seeded; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/hash_state.h b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/hash_state.h new file mode 100644 index 00000000..675bf23b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/hash_state.h @@ -0,0 +1,30 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_HASH_STATE_H +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * SHAKE256 does not need this state. Because this implementation is generated + * from a shared code base, we still need to specify some hash_state as it is + * still passed around. We chose to use an `int` as a placeholder for this + * purpose. + */ + +typedef int hash_state; + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/hashx4.h b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/hashx4.h new file mode 100644 index 00000000..8cb4322e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/hashx4.h @@ -0,0 +1,16 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_HASHX4_H +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_HASHX4_H + +#include + +#include "hash_state.h" + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_prf_addrx4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + const unsigned char *key, + const uint32_t addrx4[4 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/params.h b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/params.h new file mode 100644 index 00000000..5cc9a125 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_PARAMS_H +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N 16 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FULL_HEIGHT 60 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_D 20 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_HEIGHT 9 +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_TREES 30 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N / PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LEN (PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LEN1 + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_BYTES (PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_PK_BYTES PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_TREE_HEIGHT (PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FULL_HEIGHT / PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_BYTES ((PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_PK_BYTES PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_BYTES (PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_BYTES + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_D * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FULL_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_PK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_SK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/sign.c b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/sign.c new file mode 100644 index 00000000..ee731094 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/sign.c @@ -0,0 +1,409 @@ +#include +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_D - 1); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, sk + 3 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); + + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + + // guarantee alignment of pk + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES / 16]; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + + // guarantee alignment of sk + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_CRYPTO_SECRETKEYBYTES / 16]; + uint8_t sk[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_CRYPTO_SECRETKEYBYTES]; + } aligned_sk; + + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_CRYPTO_SEEDBYTES / 16]; + uint8_t seed[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_CRYPTO_SEEDBYTES]; + } aligned_seed; + randombytes(aligned_seed.seed, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_CRYPTO_SEEDBYTES); + + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_crypto_sign_seed_keypair( + aligned_pk.pk, aligned_sk.sk, aligned_seed.seed); + memcpy(pk, aligned_pk.pk, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES); + memcpy(sk, aligned_sk.sk, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_CRYPTO_SECRETKEYBYTES); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + // guarantee alignment of sk + union { + __m128 *_x; + uint8_t sk[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_CRYPTO_SECRETKEYBYTES]; + } aligned_sk; + memcpy(aligned_sk.sk, sk, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_CRYPTO_SECRETKEYBYTES); + sk = aligned_sk.sk; + + // guarantee alignment of sig + union { + __m128 *_x; + uint8_t sig[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_BYTES]; + } aligned_sig; + uint8_t *orig_sig = sig; + sig = (uint8_t *)aligned_sig.sig; + + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N; + + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_D; i++) { + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_TREE_HEIGHT; + } + + memcpy(orig_sig, aligned_sig.sig, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_BYTES); + *siglen = PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_BYTES; + + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + // guarantee alignment of pk + union { + __m128 *_x; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + memcpy(aligned_pk.pk, pk, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES); + pk = aligned_pk.pk; + + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_D; i++) { + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_TREE_HEIGHT; + } + + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + + // guarantee alignment of pk + union { + __m128 *_x; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + memcpy(aligned_pk.pk, pk, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES); + pk = aligned_pk.pk; + + + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_BYTES; + + if (PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_BYTES, sm + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/thash.h b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/thash.h new file mode 100644 index 00000000..8b6ea800 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_THASH_H +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/thash_shake256_robust.c b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/thash_shake256_robust.c new file mode 100644 index 00000000..351159df --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/thash_shake256_robust.c @@ -0,0 +1,81 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "fips202.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N bytes. + */ +static void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8]) { + + unsigned char *bitmask = buf + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES; + unsigned int i; + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, addr); + + shake256(bitmask, inblocks * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N; i++) { + buf[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES + i] = in[i] ^ bitmask[i]; + } + + shake256(out, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thash( + out, buf, in, 1, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thash( + out, buf, in, 2, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LEN, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_TREES, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/thash_shake256_robustx4.c b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/thash_shake256_robustx4.c new file mode 100644 index 00000000..61dc0079 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/thash_shake256_robustx4.c @@ -0,0 +1,66 @@ +#include +#include + +#include "thashx4.h" +#include "address.h" +#include "params.h" + +#include "fips202x4.h" + +/** + * 4-way parallel version of thash; takes 4x as much input and output + */ +#define thash_variant(name, inblocks) \ + void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thashx4_##name(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *pub_seed, uint32_t addrx4[4*8], \ + const hash_state *state_seeded) \ + { \ + unsigned char buf0[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N]; \ + unsigned char buf1[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N]; \ + unsigned char buf2[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N]; \ + unsigned char buf3[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N]; \ + unsigned char bitmask0[(inblocks) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N]; \ + unsigned char bitmask1[(inblocks) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N]; \ + unsigned char bitmask2[(inblocks) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N]; \ + unsigned char bitmask3[(inblocks) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N]; \ + unsigned int i; \ + \ + memcpy(buf0, pub_seed, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); \ + memcpy(buf1, pub_seed, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); \ + memcpy(buf2, pub_seed, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); \ + memcpy(buf3, pub_seed, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); \ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_addr_to_bytes(buf0 + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, addrx4 + 0*8); \ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_addr_to_bytes(buf1 + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, addrx4 + 1*8); \ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_addr_to_bytes(buf2 + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, addrx4 + 2*8); \ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_addr_to_bytes(buf3 + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, addrx4 + 3*8); \ + \ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_shake256x4(bitmask0, bitmask1, bitmask2, bitmask3, (inblocks) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, \ + buf0, buf1, buf2, buf3, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES); \ + \ + for (i = 0; i < (inblocks) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N; i++) { \ + buf0[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES + i] = in0[i] ^ bitmask0[i]; \ + buf1[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES + i] = in1[i] ^ bitmask1[i]; \ + buf2[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES + i] = in2[i] ^ bitmask2[i]; \ + buf3[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES + i] = in3[i] ^ bitmask3[i]; \ + } \ + \ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_shake256x4( \ + out0, out1, out2, out3, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, \ + buf0, buf1, buf2, buf3, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); \ + \ + /* avoid unused parameter warning */ \ + (void)state_seeded; \ + } + + +thash_variant(1, 1) +thash_variant(2, 2) +thash_variant(WOTS_LEN, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LEN) +thash_variant(FORS_TREES, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_TREES) diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/thashx4.h b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/thashx4.h new file mode 100644 index 00000000..b48a62b5 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/thashx4.h @@ -0,0 +1,25 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_THASHX4_H +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_THASHX4_H + +#include + +#include "hash_state.h" + +#define thashx4_header(inblocks) \ + void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thashx4_##inblocks(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *pub_seed, uint32_t addrx4[4*8], \ + const hash_state *state_seeded) + +thashx4_header(1); +thashx4_header(2); +thashx4_header(WOTS_LEN); +thashx4_header(FORS_TREES); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/utils.c b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/utils.c new file mode 100644 index 00000000..9bbf5eec --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, leaf, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, auth_path, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thash_2( + buffer + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); + } else { + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, auth_path, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/utils.h b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/utils.h new file mode 100644 index 00000000..b657e720 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_UTILS_H +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/utilsx4.c b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/utilsx4.c new file mode 100644 index 00000000..b84caebf --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/utilsx4.c @@ -0,0 +1,98 @@ +#include "address.h" +#include "params.h" +#include "thashx4.h" +#include "utils.h" +#include "utilsx4.h" + +#include + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +#define treehashx4_variant(name, tree_height) \ + void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_treehashx4_##name( \ + unsigned char *rootx4, unsigned char *auth_pathx4, const unsigned char *sk_seed, \ + const unsigned char *pub_seed, const uint32_t leaf_idx[4], uint32_t idx_offset[4], \ + void (*gen_leafx4)(unsigned char * /* leaf0 */, unsigned char * /* leaf1 */, \ + unsigned char * /* leaf2 */, unsigned char * /* leaf3 */, \ + const unsigned char * /* sk_seed */, \ + const unsigned char * /* pub_seed */, uint32_t /* addr_idx0 */, \ + uint32_t /* addr_idx1 */, uint32_t /* addr_idx2 */, \ + uint32_t /* addr_idx3 */, const uint32_t[8] /* tree_addr */, \ + const hash_state * /* state_seeded */), \ + uint32_t tree_addrx4[4 * 8], const hash_state *state_seeded) { \ + unsigned char stackx4[4 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N]; \ + unsigned int heights[(tree_height) + 1]; \ + unsigned int offset = 0; \ + uint32_t idx; \ + uint32_t tree_idx; \ + unsigned int j; \ + \ + for (idx = 0; idx < (uint32_t)(1 << (tree_height)); idx++) { \ + /* Add the next leaf node to the stack. */ \ + gen_leafx4(stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, sk_seed, \ + pub_seed, idx + idx_offset[0], idx + idx_offset[1], idx + idx_offset[2], \ + idx + idx_offset[3], tree_addrx4, state_seeded); \ + offset++; \ + heights[offset - 1] = 0; \ + \ + /* If this is a node we need for the auth path.. */ \ + for (j = 0; j < 4; j++) { \ + if ((leaf_idx[j] ^ 0x1) == idx) { \ + memcpy(auth_pathx4 + j * (tree_height)*PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, \ + stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + (offset - 1) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, \ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); \ + } \ + } \ + \ + /* While the top-most nodes are of equal height.. */ \ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { \ + /* Compute index of the new node, in the next layer. */ \ + tree_idx = (idx >> (heights[offset - 1] + 1)); \ + \ + /* Set the address of the node we're creating. */ \ + for (j = 0; j < 4; j++) { \ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_tree_height(tree_addrx4 + j * 8, heights[offset - 1] + 1); \ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_tree_index(tree_addrx4 + j * 8, \ + tree_idx + (idx_offset[j] >> (heights[offset - 1] + 1))); \ + } \ + /* Hash the top-most nodes from the stack together. */ \ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thashx4_2(stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, \ + stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, \ + pub_seed, tree_addrx4, state_seeded); \ + offset--; \ + /* Note that the top-most node is now one layer higher. */ \ + heights[offset - 1]++; \ + \ + /* If this is a node we need for the auth path.. */ \ + for (j = 0; j < 4; j++) { \ + if (((leaf_idx[j] >> heights[offset - 1]) ^ 0x1) == tree_idx) { \ + memcpy(auth_pathx4 + j * (tree_height)*PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + \ + heights[offset - 1] * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, \ + stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N + (offset - 1) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, \ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); \ + } \ + } \ + } \ + } \ + \ + for (j = 0; j < 4; j++) { \ + memcpy(rootx4 + j * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); \ + } \ + } + +treehashx4_variant(FORS_HEIGHT, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_FORS_HEIGHT) diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/utilsx4.h b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/utilsx4.h new file mode 100644 index 00000000..502376f7 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/utilsx4.h @@ -0,0 +1,38 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_UTILSX4_H +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_UTILSX4_H + +#include "hash_state.h" +#include "params.h" + +#include + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_treehashx4_FORS_HEIGHT(unsigned char *rootx4, + unsigned char *auth_pathx4, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + const uint32_t leaf_idx[4], + uint32_t idx_offset[4], + void (*gen_leafx4)(unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx4[4 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/wots.c b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/wots.c new file mode 100644 index 00000000..71089982 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/wots.c @@ -0,0 +1,240 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "hashx4.h" +#include "params.h" +#include "thash.h" +#include "thashx4.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], const hash_state *state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_prf_addr(sk, sk_seed, wots_addr, state_seeded); +} + +/** + * 4-way parallel version of wots_gen_sk; expects 4x as much space in sk + */ +static void wots_gen_skx4(unsigned char *skx4, const unsigned char *sk_seed, + uint32_t wots_addrx4[4 * 8], const hash_state *state_seeded) { + unsigned int j; + + /* Make sure that the hash address is actually zeroed. */ + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_hash_addr(wots_addrx4 + j * 8, 0); + } + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_prf_addrx4(skx4 + 0 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, + skx4 + 1 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, + skx4 + 2 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, + skx4 + 3 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, + sk_seed, wots_addrx4, + state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_W; i++) { + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thash_1(out, out, pub_seed, addr, state_seeded); + } +} + +/** + * 4-way parallel version of gen_chain; expects 4x as much space in out, and + * 4x as much space in inx4. Assumes start and step identical across chains. + */ +static void gen_chainx4(unsigned char *outx4, const unsigned char *inx4, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addrx4[4 * 8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + /* Initialize outx4 with the value at position 'start'. */ + memcpy(outx4, inx4, 4 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_W; i++) { + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_hash_addr(addrx4 + j * 8, i); + } + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_thashx4_1(outx4 + 0 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, + outx4 + 1 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, + outx4 + 2 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, + outx4 + 3 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, + outx4 + 0 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, + outx4 + 1 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, + outx4 + 2 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, + outx4 + 3 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, + pub_seed, addrx4, + state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { + int in = 0; + int out = 0; + unsigned char total = 0; + int bits = 0; + int consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LOGW; + output[out] = (unsigned int)(total >> bits) & (PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_W - 1); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_wots_gen_pk(unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + uint32_t addrx4[4 * 8]; + unsigned char pkbuf[4 * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N]; + + for (j = 0; j < 4; j++) { + memcpy(addrx4 + j * 8, addr, sizeof(uint32_t) * 8); + } + + /* The last iteration typically does not have complete set of 4 chains, + but because we use pkbuf, this is not an issue -- we still do as many + in parallel as possible. */ + for (i = 0; i < ((PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LEN + 3) & ~0x3); i += 4) { + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_chain_addr(addrx4 + j * 8, i + j); + } + wots_gen_skx4(pkbuf, sk_seed, addrx4, state_seeded); + gen_chainx4(pkbuf, pkbuf, 0, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_W - 1, pub_seed, addrx4, state_seeded); + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LEN) { + memcpy(pk + (i + j)*PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, pkbuf + j * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N); + } + } + } + + // Get rid of unused argument variable. + (void)state_seeded; +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_wots_sign(unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, sk_seed, addr, state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, sig + i * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, 0, lengths[i], pub_seed, addr, state_seeded); + } + + // avoid unused argument + (void)state_seeded; +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_wots_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, sig + i * PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_N, + lengths[i], PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_W - 1 - lengths[i], pub_seed, addr, + state_seeded); + } + + // avoid unused argument + (void)state_seeded; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/wots.h b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/wots.h new file mode 100644 index 00000000..87b503cd --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/avx2/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_H +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_AVX2_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/LICENSE b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..c08343c8 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-shake256-128f-robust_clean.lib +OBJECTS=address.obj wots.obj utils.obj fors.obj sign.obj hash_shake256.obj thash_shake256_robust.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/address.c b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/address.c new file mode 100644 index 00000000..dd5d233e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/address.h b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/address.h new file mode 100644 index 00000000..e3e9cc0a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDRESS_H +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/api.h b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/api.h new file mode 100644 index 00000000..7e025487 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_API_H +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_CRYPTO_SECRETKEYBYTES 64 +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_CRYPTO_PUBLICKEYBYTES 32 +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_CRYPTO_BYTES 16976 +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_CRYPTO_SEEDBYTES 48 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/fors.c b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/fors.c new file mode 100644 index 00000000..f8a3fb17 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/fors.c @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "thash.h" +#include "utils.h" + +static void fors_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t fors_leaf_addr[8], const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_prf_addr( + sk, sk_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_thash_1( + leaf, sk, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t fors_tree_addr[8], + const hash_state *hash_state_seeded) { + uint32_t fors_leaf_addr[8] = {0}; + + /* Only copy the parts that must be kept in fors_leaf_addr. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_copy_keypair_addr( + fors_leaf_addr, fors_tree_addr); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_type( + fors_leaf_addr, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_tree_index( + fors_leaf_addr, addr_idx); + + fors_gen_sk(leaf, sk_seed, fors_leaf_addr, hash_state_seeded); + fors_sk_to_leaf(leaf, leaf, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_copy_keypair_addr( + fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_copy_keypair_addr( + fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_type( + fors_tree_addr, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_type( + fors_pk_addr, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_tree_height( + fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_tree_index( + fors_tree_addr, indices[i] + idx_offset); + + /* Include the secret key part that produces the selected leaf node. */ + fors_gen_sk(sig, sk_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N; + + /* Compute the authentication path for this leaf node. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_treehash_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, sig, sk_seed, pub_seed, + indices[i], idx_offset, fors_gen_leaf, fors_tree_addr, + hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_thash_FORS_TREES( + pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_compute_root(roots + i * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, leaf, indices[i], idx_offset, sig, + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_HEIGHT, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/fors.h b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/fors.h new file mode 100644 index 00000000..8290a4e2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_H +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/hash.h b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/hash.h new file mode 100644 index 00000000..3856a469 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_HASH_H +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/hash_shake256.c b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/hash_shake256.c new file mode 100644 index 00000000..f350f0b9 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/hash_shake256.c @@ -0,0 +1,106 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "fips202.h" + +/* For SHAKE256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, // NOLINT(readability-non-const-parameter) + const unsigned char *pub_seed, const unsigned char *sk_seed) { + (void)hash_state_seeded; /* Suppress an 'unused parameter' warning. */ + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* This is not necessary for SHAKE256, so we don't do anything */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_destroy_hash_function( + hash_state *hash_state_seeded) { // NOLINT(readability-non-const-parameter) + (void)hash_state_seeded; +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N bytes and an address + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, addr); + + shake256(out, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_BYTES); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed and an + * optional randomization value as well as the message. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, sk_prf, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N); + shake256_inc_absorb(&state, optrand, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(R, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, &state); + shake256_inc_ctx_release(&state); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_TREE_BITS (PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_TREE_HEIGHT * (PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_D - 1)) +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_TREE_BYTES ((PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_LEAF_BITS PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_LEAF_BYTES ((PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_DGST_BYTES (PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_TREE_BYTES + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_LEAF_BYTES) + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_DGST_BYTES]; + unsigned char *bufp = buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, R, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N); + shake256_inc_absorb(&state, pk, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_PK_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_DGST_BYTES, &state); + shake256_inc_ctx_release(&state); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/hash_state.h b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/hash_state.h new file mode 100644 index 00000000..7d92ef87 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/hash_state.h @@ -0,0 +1,30 @@ +#ifndef SPX_HASH_STATE_H +#define SPX_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * SHAKE256 does not need this state. Because this implementation is generated + * from a shared code base, we still need to specify some hash_state as it is + * still passed around. We chose to use an `int` as a placeholder for this + * purpose. + */ + +#define hash_state int + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/params.h b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/params.h new file mode 100644 index 00000000..dd72b607 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_PARAMS_H +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N 16 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FULL_HEIGHT 60 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_D 20 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_HEIGHT 9 +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_TREES 30 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N / PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_LEN (PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_LEN1 + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_BYTES (PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_PK_BYTES PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_TREE_HEIGHT (PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FULL_HEIGHT / PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_BYTES ((PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_PK_BYTES PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_BYTES (PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_BYTES + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_D * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FULL_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_PK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_SK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/sign.c b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/sign.c new file mode 100644 index 00000000..34bc5dc3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_D - 1); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, sk + 3 * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N); + + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N; + + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_BYTES; + + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_BYTES; + + if (PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_BYTES, sm + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/thash.h b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/thash.h new file mode 100644 index 00000000..ef518b11 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_THASH_H +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/thash_shake256_robust.c b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/thash_shake256_robust.c new file mode 100644 index 00000000..27531f50 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/thash_shake256_robust.c @@ -0,0 +1,81 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "fips202.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N bytes. + */ +static void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8]) { + + unsigned char *bitmask = buf + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_BYTES; + unsigned int i; + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, addr); + + shake256(bitmask, inblocks * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_BYTES); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N; i++) { + buf[PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_BYTES + i] = in[i] ^ bitmask[i]; + } + + shake256(out, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_thash( + out, buf, in, 1, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_thash( + out, buf, in, 2, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_LEN, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_TREES, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/utils.c b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/utils.c new file mode 100644 index 00000000..aefe7054 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, leaf, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_thash_2( + buffer + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N); + } else { + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/utils.h b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/utils.h new file mode 100644 index 00000000..153481bd --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_UTILS_H +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/wots.c b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/wots.c new file mode 100644 index 00000000..c2ef4a70 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/wots.c @@ -0,0 +1,167 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], + const hash_state *hash_state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_prf_addr(sk, sk_seed, wots_addr, hash_state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_W; i++) { + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_thash_1( + out, out, pub_seed, addr, hash_state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const size_t out_len, + const unsigned char *input) { + size_t in = 0; + size_t out = 0; + unsigned char total = 0; + unsigned int bits = 0; + size_t consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_LOGW; + output[out] = (unsigned int)((total >> bits) & (PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_W - 1)); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, + const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_ull_to_bytes( + csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(pk + i * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, pk + i * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, + 0, PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_W - 1, pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, 0, lengths[i], pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_N, + lengths[i], PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_W - 1 - lengths[i], pub_seed, addr, + hash_state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/wots.h b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/wots.h new file mode 100644 index 00000000..46a68c98 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-robust/clean/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_H +#define PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128FROBUST_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/META.yml b/crypto_sign/sphincs/sphincs-shake256-128f-simple/META.yml new file mode 100644 index 00000000..ce797358 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/META.yml @@ -0,0 +1,35 @@ +name: SPHINCS+ +type: signature +claimed-nist-level: 1 +length-public-key: 32 +length-secret-key: 64 +length-signature: 16976 +testvectors-sha256: a14cb8e4f149493fc5979e465e09ce943e8d669186ff5c7c3d11239fa869def6 +nistkat-sha256: c99700873ca6914944fcef3b649270c86c056dcd11ce6e8f22580b193a136e6f +principal-submitters: + - Andreas Hülsing +auxiliary-submitters: + - Jean-Philippe Aumasson + - Daniel J. Bernstein, + - Christoph Dobraunig + - Maria Eichlseder + - Scott Fluhrer + - Stefan-Lukas Gazdag + - Panos Kampanakis + - Stefan Kölbl + - Tanja Lange + - Martin M. Lauridsen + - Florian Mendel + - Ruben Niederhagen + - Christian Rechberger + - Joost Rijneveld + - Peter Schwabe +implementations: + - name: clean + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + - name: avx2 + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + supported_platforms: + - architecture: x86_64 + required_flags: + - avx2 diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/LICENSE b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/Makefile.Microsoft_nmake new file mode 100644 index 00000000..dd27016b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/Makefile.Microsoft_nmake @@ -0,0 +1,27 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-shake256-128f-simple_avx2.lib +OBJECTS=address.obj wots.obj utils.obj utilsx4.obj fips202x4.obj fors.obj sign.obj hash_shake256.obj thash_shake256_simple.obj hash_shake256x4.obj thash_shake256_simplex4.obj + +KECCAK4XDIR=..\..\..\common\keccak4x +KECCAK4XOBJ=KeccakP-1600-times4-SIMD256.obj +KECCAK4X=$(KECCAK4XDIR)\$(KECCAK4XOBJ) + +CFLAGS=/nologo /arch:AVX2 /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) $(KECCAK4X) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +$(KECCAK4X): + cd $(KECCAK4XDIR) && $(MAKE) /f Makefile.Microsoft_nmake $(KECCAK4XOBJ) + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) + -DEL $(KECCAK4X) diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/address.c b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/address.c new file mode 100644 index 00000000..41b247de --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/address.h b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/address.h new file mode 100644 index 00000000..44bcaeae --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDRESS_H +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/api.h b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/api.h new file mode 100644 index 00000000..1a6555c9 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_API_H +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES 64 +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES 32 +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_CRYPTO_BYTES 16976 +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_CRYPTO_SEEDBYTES 48 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/fips202x4.c b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/fips202x4.c new file mode 100644 index 00000000..3f1a5e0b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/fips202x4.c @@ -0,0 +1,205 @@ +#include +#include +#include + +#include "fips202.h" +#include "fips202x4.h" + +#define NROUNDS 24 +#define ROL(a, offset) (((a) << (offset)) ^ ((a) >> (64-(offset)))) + +static uint64_t load64(const unsigned char *x) { + unsigned long long r = 0, i; + + for (i = 0; i < 8; ++i) { + r |= (unsigned long long)x[i] << 8 * i; + } + return r; +} + +static void store64(uint8_t *x, uint64_t u) { + unsigned int i; + + for (i = 0; i < 8; ++i) { + x[i] = (uint8_t)u; + u >>= 8; + } +} + +/* Use implementation from the Keccak Code Package */ +extern void KeccakP1600times4_PermuteAll_24rounds(__m256i *s); +#define KeccakF1600_StatePermute4x KeccakP1600times4_PermuteAll_24rounds + +static void keccak_absorb4x(__m256i *s, + unsigned int r, + const unsigned char *m0, + const unsigned char *m1, + const unsigned char *m2, + const unsigned char *m3, + size_t mlen, + unsigned char p) { + unsigned char t0[200] = {0}; + unsigned char t1[200] = {0}; + unsigned char t2[200] = {0}; + unsigned char t3[200] = {0}; + + unsigned long long *ss = (unsigned long long *)s; + + + while (mlen >= r) { + for (size_t i = 0; i < r / 8; ++i) { + ss[4 * i + 0] ^= load64(m0 + 8 * i); + ss[4 * i + 1] ^= load64(m1 + 8 * i); + ss[4 * i + 2] ^= load64(m2 + 8 * i); + ss[4 * i + 3] ^= load64(m3 + 8 * i); + } + + KeccakF1600_StatePermute4x(s); + mlen -= r; + m0 += r; + m1 += r; + m2 += r; + m3 += r; + } + + memcpy(t0, m0, mlen); + memcpy(t1, m1, mlen); + memcpy(t2, m2, mlen); + memcpy(t3, m3, mlen); + + t0[mlen] = p; + t1[mlen] = p; + t2[mlen] = p; + t3[mlen] = p; + + t0[r - 1] |= 128; + t1[r - 1] |= 128; + t2[r - 1] |= 128; + t3[r - 1] |= 128; + + for (size_t i = 0; i < r / 8; ++i) { + ss[4 * i + 0] ^= load64(t0 + 8 * i); + ss[4 * i + 1] ^= load64(t1 + 8 * i); + ss[4 * i + 2] ^= load64(t2 + 8 * i); + ss[4 * i + 3] ^= load64(t3 + 8 * i); + } +} + + +static void keccak_squeezeblocks4x(unsigned char *h0, + unsigned char *h1, + unsigned char *h2, + unsigned char *h3, + unsigned long long int nblocks, + __m256i *s, + unsigned int r) { + unsigned int i; + + unsigned long long *ss = (unsigned long long *)s; + + while (nblocks > 0) { + KeccakF1600_StatePermute4x(s); + for (i = 0; i < (r >> 3); i++) { + store64(h0 + 8 * i, ss[4 * i + 0]); + store64(h1 + 8 * i, ss[4 * i + 1]); + store64(h2 + 8 * i, ss[4 * i + 2]); + store64(h3 + 8 * i, ss[4 * i + 3]); + } + h0 += r; + h1 += r; + h2 += r; + h3 += r; + nblocks--; + } +} + + + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_shake128x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen) { + __m256i s[25]; + unsigned char t0[SHAKE128_RATE]; + unsigned char t1[SHAKE128_RATE]; + unsigned char t2[SHAKE128_RATE]; + unsigned char t3[SHAKE128_RATE]; + unsigned int i; + + /* zero state */ + for (i = 0; i < 25; i++) { + s[i] = _mm256_xor_si256(s[i], s[i]); + } + + /* absorb 4 message of identical length in parallel */ + keccak_absorb4x(s, SHAKE128_RATE, in0, in1, in2, in3, (size_t)inlen, 0x1F); + + /* Squeeze output */ + keccak_squeezeblocks4x(out0, out1, out2, out3, outlen / SHAKE128_RATE, s, SHAKE128_RATE); + + out0 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out1 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out2 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out3 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + + if (outlen % SHAKE128_RATE) { + keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE128_RATE); + for (i = 0; i < outlen % SHAKE128_RATE; i++) { + out0[i] = t0[i]; + out1[i] = t1[i]; + out2[i] = t2[i]; + out3[i] = t3[i]; + } + } +} + + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_shake256x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen) { + __m256i s[25]; + unsigned char t0[SHAKE256_RATE]; + unsigned char t1[SHAKE256_RATE]; + unsigned char t2[SHAKE256_RATE]; + unsigned char t3[SHAKE256_RATE]; + unsigned int i; + + /* zero state */ + for (i = 0; i < 25; i++) { + s[i] = _mm256_xor_si256(s[i], s[i]); + } + + /* absorb 4 message of identical length in parallel */ + keccak_absorb4x(s, SHAKE256_RATE, in0, in1, in2, in3, (size_t)inlen, 0x1F); + + /* Squeeze output */ + keccak_squeezeblocks4x(out0, out1, out2, out3, outlen / SHAKE256_RATE, s, SHAKE256_RATE); + + out0 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out1 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out2 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out3 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + + if (outlen % SHAKE256_RATE) { + keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE256_RATE); + for (i = 0; i < outlen % SHAKE256_RATE; i++) { + out0[i] = t0[i]; + out1[i] = t1[i]; + out2[i] = t2[i]; + out3[i] = t3[i]; + } + } +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/fips202x4.h b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/fips202x4.h new file mode 100644 index 00000000..a68603b7 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/fips202x4.h @@ -0,0 +1,27 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FIPS202X4_H +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FIPS202X4_H + +#include + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_shake128x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, unsigned long long inlen); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_shake256x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/fors.c b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/fors.c new file mode 100644 index 00000000..8a89a3df --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/fors.c @@ -0,0 +1,206 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hashx4.h" +#include "thash.h" +#include "thashx4.h" +#include "utils.h" +#include "utilsx4.h" + +static void fors_gen_skx4(unsigned char *sk0, + unsigned char *sk1, + unsigned char *sk2, + unsigned char *sk3, const unsigned char *sk_seed, + uint32_t fors_leaf_addrx4[4 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_prf_addrx4(sk0, sk1, sk2, sk3, sk_seed, fors_leaf_addrx4, state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thash_1(leaf, sk, pub_seed, fors_leaf_addr, state_seeded); +} + +static void fors_sk_to_leafx4(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + const unsigned char *sk0, + const unsigned char *sk1, + const unsigned char *sk2, + const unsigned char *sk3, + const unsigned char *pub_seed, + uint32_t fors_leaf_addrx4[4 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thashx4_1(leaf0, leaf1, leaf2, leaf3, + sk0, sk1, sk2, sk3, pub_seed, fors_leaf_addrx4, state_seeded); +} + +static void fors_gen_leafx4(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx0, + uint32_t addr_idx1, + uint32_t addr_idx2, + uint32_t addr_idx3, + const uint32_t fors_tree_addr[8], + const hash_state *state_seeded) { + uint32_t fors_leaf_addrx4[4 * 8] = {0}; + unsigned int j; + + /* Only copy the parts that must be kept in fors_leaf_addrx4. */ + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_copy_keypair_addr(fors_leaf_addrx4 + j * 8, fors_tree_addr); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_type(fors_leaf_addrx4 + j * 8, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx4 + 0 * 8, addr_idx0); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx4 + 1 * 8, addr_idx1); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx4 + 2 * 8, addr_idx2); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx4 + 3 * 8, addr_idx3); + + fors_gen_skx4(leaf0, leaf1, leaf2, leaf3, sk_seed, fors_leaf_addrx4, state_seeded); + fors_sk_to_leafx4(leaf0, leaf1, leaf2, leaf3, + leaf0, leaf1, leaf2, leaf3, pub_seed, fors_leaf_addrx4, state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_fors_sign(unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *state_seeded) { + /* Round up to multiple of 4 to prevent out-of-bounds for x4 parallelism */ + uint32_t indices[(PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_TREES + 3) & ~3] = {0}; + unsigned char roots[((PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_TREES + 3) & ~3) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N]; + /* Sign to a buffer, since we may not have a nice multiple of 4 and would + otherwise overrun the signature. */ + unsigned char sigbufx4[4 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N * (1 + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_HEIGHT)]; + uint32_t fors_tree_addrx4[4 * 8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset[4] = {0}; + unsigned int i, j; + + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_copy_keypair_addr(fors_tree_addrx4 + j * 8, fors_addr); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_type(fors_tree_addrx4 + j * 8, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < ((PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_TREES + 3) & ~0x3); i += 4) { + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_TREES) { + idx_offset[j] = (i + j) * (1 << PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_tree_height(fors_tree_addrx4 + j * 8, 0); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_tree_index(fors_tree_addrx4 + j * 8, + indices[i + j] + idx_offset[j]); + } + } + + /* Include the secret key part that produces the selected leaf nodes. */ + fors_gen_skx4(sigbufx4 + 0 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, + sigbufx4 + 1 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, + sigbufx4 + 2 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, + sigbufx4 + 3 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, + sk_seed, fors_tree_addrx4, state_seeded); + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_treehashx4_FORS_HEIGHT(roots + i * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, sigbufx4 + 4 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, sk_seed, pub_seed, + &indices[i], idx_offset, fors_gen_leafx4, fors_tree_addrx4, + state_seeded); + + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_TREES) { + memcpy(sig, sigbufx4 + j * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); + memcpy(sig + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, + sigbufx4 + 4 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + j * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_HEIGHT, + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_HEIGHT); + sig += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N * (1 + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_HEIGHT); + } + } + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_fors_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_compute_root(roots + i * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, leaf, indices[i], idx_offset, + sig, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_HEIGHT, pub_seed, fors_tree_addr, + state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/fors.h b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/fors.h new file mode 100644 index 00000000..5def0258 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_H +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/hash.h b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/hash.h new file mode 100644 index 00000000..0a97be63 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_HASH_H +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/hash_shake256.c b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/hash_shake256.c new file mode 100644 index 00000000..496aac8f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/hash_shake256.c @@ -0,0 +1,106 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "fips202.h" + +/* For SHAKE256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, // NOLINT(readability-non-const-parameter) + const unsigned char *pub_seed, const unsigned char *sk_seed) { + (void)hash_state_seeded; /* Suppress an 'unused parameter' warning. */ + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* This is not necessary for SHAKE256, so we don't do anything */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_destroy_hash_function( + hash_state *hash_state_seeded) { // NOLINT(readability-non-const-parameter) + (void)hash_state_seeded; +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N bytes and an address + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, addr); + + shake256(out, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_BYTES); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed and an + * optional randomization value as well as the message. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, sk_prf, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); + shake256_inc_absorb(&state, optrand, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(R, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, &state); + shake256_inc_ctx_release(&state); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_TREE_BITS (PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_TREE_HEIGHT * (PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_D - 1)) +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_TREE_BYTES ((PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_LEAF_BITS PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_LEAF_BYTES ((PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_DGST_BYTES (PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_TREE_BYTES + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_LEAF_BYTES) + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_DGST_BYTES]; + unsigned char *bufp = buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, R, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); + shake256_inc_absorb(&state, pk, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_PK_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_DGST_BYTES, &state); + shake256_inc_ctx_release(&state); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/hash_shake256x4.c b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/hash_shake256x4.c new file mode 100644 index 00000000..6f2a6373 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/hash_shake256x4.c @@ -0,0 +1,38 @@ +#include +#include + +#include "address.h" +#include "fips202x4.h" +#include "hashx4.h" +#include "params.h" + +/* + * 4-way parallel version of prf_addr; takes 4x as much input and output + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_prf_addrx4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + const unsigned char *key, + const uint32_t addrx4[4 * 8], + const hash_state *state_seeded) { + unsigned char bufx4[4 * (PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_BYTES)]; + unsigned int j; + + for (j = 0; j < 4; j++) { + memcpy(bufx4 + j * (PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_BYTES), key, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_addr_to_bytes(bufx4 + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + j * (PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_BYTES), addrx4 + j * 8); + } + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_shake256x4(out0, + out1, + out2, + out3, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, + bufx4 + 0 * (PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_BYTES), + bufx4 + 1 * (PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_BYTES), + bufx4 + 2 * (PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_BYTES), + bufx4 + 3 * (PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_BYTES), PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_BYTES); + + /* Avoid unused parameter warning */ + (void)state_seeded; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/hash_state.h b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/hash_state.h new file mode 100644 index 00000000..6af38645 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/hash_state.h @@ -0,0 +1,30 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_HASH_STATE_H +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * SHAKE256 does not need this state. Because this implementation is generated + * from a shared code base, we still need to specify some hash_state as it is + * still passed around. We chose to use an `int` as a placeholder for this + * purpose. + */ + +typedef int hash_state; + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/hashx4.h b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/hashx4.h new file mode 100644 index 00000000..1842aef4 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/hashx4.h @@ -0,0 +1,16 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_HASHX4_H +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_HASHX4_H + +#include + +#include "hash_state.h" + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_prf_addrx4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + const unsigned char *key, + const uint32_t addrx4[4 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/params.h b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/params.h new file mode 100644 index 00000000..f105ab18 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_PARAMS_H +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N 16 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FULL_HEIGHT 60 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_D 20 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_HEIGHT 9 +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_TREES 30 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N / PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LEN (PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LEN1 + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_BYTES (PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_PK_BYTES PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_TREE_HEIGHT (PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FULL_HEIGHT / PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_BYTES ((PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_PK_BYTES PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_BYTES (PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_BYTES + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_D * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FULL_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_PK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_SK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/sign.c b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/sign.c new file mode 100644 index 00000000..1b77e896 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/sign.c @@ -0,0 +1,409 @@ +#include +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_D - 1); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, sk + 3 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); + + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + + // guarantee alignment of pk + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES / 16]; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + + // guarantee alignment of sk + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES / 16]; + uint8_t sk[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES]; + } aligned_sk; + + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_CRYPTO_SEEDBYTES / 16]; + uint8_t seed[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_CRYPTO_SEEDBYTES]; + } aligned_seed; + randombytes(aligned_seed.seed, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_CRYPTO_SEEDBYTES); + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_crypto_sign_seed_keypair( + aligned_pk.pk, aligned_sk.sk, aligned_seed.seed); + memcpy(pk, aligned_pk.pk, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES); + memcpy(sk, aligned_sk.sk, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + // guarantee alignment of sk + union { + __m128 *_x; + uint8_t sk[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES]; + } aligned_sk; + memcpy(aligned_sk.sk, sk, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES); + sk = aligned_sk.sk; + + // guarantee alignment of sig + union { + __m128 *_x; + uint8_t sig[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_BYTES]; + } aligned_sig; + uint8_t *orig_sig = sig; + sig = (uint8_t *)aligned_sig.sig; + + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N; + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_D; i++) { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_TREE_HEIGHT; + } + + memcpy(orig_sig, aligned_sig.sig, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_BYTES); + *siglen = PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_BYTES; + + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + // guarantee alignment of pk + union { + __m128 *_x; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + memcpy(aligned_pk.pk, pk, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES); + pk = aligned_pk.pk; + + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_D; i++) { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_TREE_HEIGHT; + } + + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + + // guarantee alignment of pk + union { + __m128 *_x; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + memcpy(aligned_pk.pk, pk, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES); + pk = aligned_pk.pk; + + + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_BYTES; + + if (PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_BYTES, sm + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/thash.h b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/thash.h new file mode 100644 index 00000000..532ff45f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_THASH_H +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/thash_shake256_simple.c b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/thash_shake256_simple.c new file mode 100644 index 00000000..2812f11e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/thash_shake256_simple.c @@ -0,0 +1,74 @@ +#include +#include + +#include "address.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" + +#include "fips202.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N bytes. + */ +static void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8]) { + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, addr); + memcpy(buf + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_BYTES, in, inblocks * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); + + shake256(out, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thash( + out, buf, in, 1, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thash( + out, buf, in, 2, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LEN, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_TREES, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/thash_shake256_simplex4.c b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/thash_shake256_simplex4.c new file mode 100644 index 00000000..704c3244 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/thash_shake256_simplex4.c @@ -0,0 +1,47 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thashx4.h" + +#include "fips202x4.h" + +/** + * 4-way parallel version of thash; takes 4x as much input and output + */ +#define thashx4_variant(name, inblocks) \ + void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thashx4_##name( \ + unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, \ + const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, \ + const unsigned char *in3, const unsigned char *pub_seed, uint32_t addrx4[4 * 8], \ + const hash_state *state_seeded) { \ + unsigned char buf0[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N]; \ + unsigned char buf1[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N]; \ + unsigned char buf2[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N]; \ + unsigned char buf3[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N]; \ + \ + memcpy(buf0, pub_seed, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); \ + memcpy(buf1, pub_seed, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); \ + memcpy(buf2, pub_seed, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); \ + memcpy(buf3, pub_seed, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); \ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_addr_to_bytes(buf0 + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, addrx4 + 0 * 8); \ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_addr_to_bytes(buf1 + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, addrx4 + 1 * 8); \ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_addr_to_bytes(buf2 + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, addrx4 + 2 * 8); \ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_addr_to_bytes(buf3 + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, addrx4 + 3 * 8); \ + memcpy(buf0 + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_BYTES, in0, (inblocks)*PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); \ + memcpy(buf1 + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_BYTES, in1, (inblocks)*PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); \ + memcpy(buf2 + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_BYTES, in2, (inblocks)*PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); \ + memcpy(buf3 + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_BYTES, in3, (inblocks)*PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); \ + \ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_shake256x4(out0, out1, out2, out3, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, buf0, buf1, buf2, buf3, \ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); \ + \ + /* Avoid unused parameter warning */ \ + (void)state_seeded; \ + } + +thashx4_variant(1, 1) +thashx4_variant(2, 2) +thashx4_variant(WOTS_LEN, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LEN) +thashx4_variant(FORS_TREES, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_TREES) diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/thashx4.h b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/thashx4.h new file mode 100644 index 00000000..274c99c2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/thashx4.h @@ -0,0 +1,25 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_THASHX4_H +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_THASHX4_H + +#include + +#include "hash_state.h" + +#define thashx4_header(inblocks) \ + void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thashx4_##inblocks(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *pub_seed, uint32_t addrx4[4*8], \ + const hash_state *state_seeded) + +thashx4_header(1); +thashx4_header(2); +thashx4_header(WOTS_LEN); +thashx4_header(FORS_TREES); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/utils.c b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/utils.c new file mode 100644 index 00000000..efe313ad --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, leaf, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, auth_path, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thash_2( + buffer + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); + } else { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, auth_path, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/utils.h b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/utils.h new file mode 100644 index 00000000..3abe2cbe --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_UTILS_H +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/utilsx4.c b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/utilsx4.c new file mode 100644 index 00000000..4222466c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/utilsx4.c @@ -0,0 +1,98 @@ +#include "address.h" +#include "params.h" +#include "thashx4.h" +#include "utils.h" +#include "utilsx4.h" + +#include + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +#define treehashx4_variant(name, tree_height) \ + void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_treehashx4_##name( \ + unsigned char *rootx4, unsigned char *auth_pathx4, const unsigned char *sk_seed, \ + const unsigned char *pub_seed, const uint32_t leaf_idx[4], uint32_t idx_offset[4], \ + void (*gen_leafx4)(unsigned char * /* leaf0 */, unsigned char * /* leaf1 */, \ + unsigned char * /* leaf2 */, unsigned char * /* leaf3 */, \ + const unsigned char * /* sk_seed */, \ + const unsigned char * /* pub_seed */, uint32_t /* addr_idx0 */, \ + uint32_t /* addr_idx1 */, uint32_t /* addr_idx2 */, \ + uint32_t /* addr_idx3 */, const uint32_t[8] /* tree_addr */, \ + const hash_state * /* state_seeded */), \ + uint32_t tree_addrx4[4 * 8], const hash_state *state_seeded) { \ + unsigned char stackx4[4 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N]; \ + unsigned int heights[(tree_height) + 1]; \ + unsigned int offset = 0; \ + uint32_t idx; \ + uint32_t tree_idx; \ + unsigned int j; \ + \ + for (idx = 0; idx < (uint32_t)(1 << (tree_height)); idx++) { \ + /* Add the next leaf node to the stack. */ \ + gen_leafx4(stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, sk_seed, \ + pub_seed, idx + idx_offset[0], idx + idx_offset[1], idx + idx_offset[2], \ + idx + idx_offset[3], tree_addrx4, state_seeded); \ + offset++; \ + heights[offset - 1] = 0; \ + \ + /* If this is a node we need for the auth path.. */ \ + for (j = 0; j < 4; j++) { \ + if ((leaf_idx[j] ^ 0x1) == idx) { \ + memcpy(auth_pathx4 + j * (tree_height)*PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, \ + stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + (offset - 1) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, \ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); \ + } \ + } \ + \ + /* While the top-most nodes are of equal height.. */ \ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { \ + /* Compute index of the new node, in the next layer. */ \ + tree_idx = (idx >> (heights[offset - 1] + 1)); \ + \ + /* Set the address of the node we're creating. */ \ + for (j = 0; j < 4; j++) { \ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_tree_height(tree_addrx4 + j * 8, heights[offset - 1] + 1); \ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_tree_index(tree_addrx4 + j * 8, \ + tree_idx + (idx_offset[j] >> (heights[offset - 1] + 1))); \ + } \ + /* Hash the top-most nodes from the stack together. */ \ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thashx4_2(stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, \ + stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, \ + pub_seed, tree_addrx4, state_seeded); \ + offset--; \ + /* Note that the top-most node is now one layer higher. */ \ + heights[offset - 1]++; \ + \ + /* If this is a node we need for the auth path.. */ \ + for (j = 0; j < 4; j++) { \ + if (((leaf_idx[j] >> heights[offset - 1]) ^ 0x1) == tree_idx) { \ + memcpy(auth_pathx4 + j * (tree_height)*PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + \ + heights[offset - 1] * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, \ + stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N + (offset - 1) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, \ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); \ + } \ + } \ + } \ + } \ + \ + for (j = 0; j < 4; j++) { \ + memcpy(rootx4 + j * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); \ + } \ + } + +treehashx4_variant(FORS_HEIGHT, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_FORS_HEIGHT) diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/utilsx4.h b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/utilsx4.h new file mode 100644 index 00000000..6518962f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/utilsx4.h @@ -0,0 +1,38 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_UTILSX4_H +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_UTILSX4_H + +#include "hash_state.h" +#include "params.h" + +#include + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_treehashx4_FORS_HEIGHT(unsigned char *rootx4, + unsigned char *auth_pathx4, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + const uint32_t leaf_idx[4], + uint32_t idx_offset[4], + void (*gen_leafx4)(unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx4[4 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/wots.c b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/wots.c new file mode 100644 index 00000000..c3ad965b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/wots.c @@ -0,0 +1,240 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "hashx4.h" +#include "params.h" +#include "thash.h" +#include "thashx4.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], const hash_state *state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_prf_addr(sk, sk_seed, wots_addr, state_seeded); +} + +/** + * 4-way parallel version of wots_gen_sk; expects 4x as much space in sk + */ +static void wots_gen_skx4(unsigned char *skx4, const unsigned char *sk_seed, + uint32_t wots_addrx4[4 * 8], const hash_state *state_seeded) { + unsigned int j; + + /* Make sure that the hash address is actually zeroed. */ + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_hash_addr(wots_addrx4 + j * 8, 0); + } + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_prf_addrx4(skx4 + 0 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, + skx4 + 1 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, + skx4 + 2 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, + skx4 + 3 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, + sk_seed, wots_addrx4, + state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_W; i++) { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thash_1(out, out, pub_seed, addr, state_seeded); + } +} + +/** + * 4-way parallel version of gen_chain; expects 4x as much space in out, and + * 4x as much space in inx4. Assumes start and step identical across chains. + */ +static void gen_chainx4(unsigned char *outx4, const unsigned char *inx4, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addrx4[4 * 8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + /* Initialize outx4 with the value at position 'start'. */ + memcpy(outx4, inx4, 4 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_W; i++) { + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_hash_addr(addrx4 + j * 8, i); + } + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_thashx4_1(outx4 + 0 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, + outx4 + 1 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, + outx4 + 2 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, + outx4 + 3 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, + outx4 + 0 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, + outx4 + 1 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, + outx4 + 2 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, + outx4 + 3 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, + pub_seed, addrx4, + state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { + int in = 0; + int out = 0; + unsigned char total = 0; + int bits = 0; + int consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LOGW; + output[out] = (unsigned int)(total >> bits) & (PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_W - 1); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_wots_gen_pk(unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + uint32_t addrx4[4 * 8]; + unsigned char pkbuf[4 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N]; + + for (j = 0; j < 4; j++) { + memcpy(addrx4 + j * 8, addr, sizeof(uint32_t) * 8); + } + + /* The last iteration typically does not have complete set of 4 chains, + but because we use pkbuf, this is not an issue -- we still do as many + in parallel as possible. */ + for (i = 0; i < ((PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LEN + 3) & ~0x3); i += 4) { + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_chain_addr(addrx4 + j * 8, i + j); + } + wots_gen_skx4(pkbuf, sk_seed, addrx4, state_seeded); + gen_chainx4(pkbuf, pkbuf, 0, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_W - 1, pub_seed, addrx4, state_seeded); + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LEN) { + memcpy(pk + (i + j)*PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, pkbuf + j * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N); + } + } + } + + // Get rid of unused argument variable. + (void)state_seeded; +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_wots_sign(unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, sk_seed, addr, state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, sig + i * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, 0, lengths[i], pub_seed, addr, state_seeded); + } + + // avoid unused argument + (void)state_seeded; +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_wots_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, sig + i * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_N, + lengths[i], PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_W - 1 - lengths[i], pub_seed, addr, + state_seeded); + } + + // avoid unused argument + (void)state_seeded; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/wots.h b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/wots.h new file mode 100644 index 00000000..8761e45a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/avx2/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_H +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_AVX2_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/LICENSE b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..b1892878 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-shake256-128f-simple_clean.lib +OBJECTS=address.obj wots.obj utils.obj fors.obj sign.obj hash_shake256.obj thash_shake256_simple.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/address.c b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/address.c new file mode 100644 index 00000000..dd01f401 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/address.h b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/address.h new file mode 100644 index 00000000..b756efb7 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDRESS_H +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/api.h b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/api.h new file mode 100644 index 00000000..fce0e50b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_API_H +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_CRYPTO_SECRETKEYBYTES 64 +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_CRYPTO_PUBLICKEYBYTES 32 +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_CRYPTO_BYTES 16976 +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_CRYPTO_SEEDBYTES 48 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/fors.c b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/fors.c new file mode 100644 index 00000000..e7ccfffa --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/fors.c @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "thash.h" +#include "utils.h" + +static void fors_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t fors_leaf_addr[8], const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_prf_addr( + sk, sk_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_thash_1( + leaf, sk, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t fors_tree_addr[8], + const hash_state *hash_state_seeded) { + uint32_t fors_leaf_addr[8] = {0}; + + /* Only copy the parts that must be kept in fors_leaf_addr. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_copy_keypair_addr( + fors_leaf_addr, fors_tree_addr); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_type( + fors_leaf_addr, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_tree_index( + fors_leaf_addr, addr_idx); + + fors_gen_sk(leaf, sk_seed, fors_leaf_addr, hash_state_seeded); + fors_sk_to_leaf(leaf, leaf, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_copy_keypair_addr( + fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_copy_keypair_addr( + fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_type( + fors_tree_addr, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_type( + fors_pk_addr, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_tree_height( + fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_tree_index( + fors_tree_addr, indices[i] + idx_offset); + + /* Include the secret key part that produces the selected leaf node. */ + fors_gen_sk(sig, sk_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N; + + /* Compute the authentication path for this leaf node. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_treehash_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, sig, sk_seed, pub_seed, + indices[i], idx_offset, fors_gen_leaf, fors_tree_addr, + hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_thash_FORS_TREES( + pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_compute_root(roots + i * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, leaf, indices[i], idx_offset, sig, + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_HEIGHT, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/fors.h b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/fors.h new file mode 100644 index 00000000..cd138519 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_H +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/hash.h b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/hash.h new file mode 100644 index 00000000..79751b2c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_HASH_H +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/hash_shake256.c b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/hash_shake256.c new file mode 100644 index 00000000..1719f18a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/hash_shake256.c @@ -0,0 +1,106 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "fips202.h" + +/* For SHAKE256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, // NOLINT(readability-non-const-parameter) + const unsigned char *pub_seed, const unsigned char *sk_seed) { + (void)hash_state_seeded; /* Suppress an 'unused parameter' warning. */ + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* This is not necessary for SHAKE256, so we don't do anything */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_destroy_hash_function( + hash_state *hash_state_seeded) { // NOLINT(readability-non-const-parameter) + (void)hash_state_seeded; +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N bytes and an address + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, addr); + + shake256(out, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_BYTES); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed and an + * optional randomization value as well as the message. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, sk_prf, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N); + shake256_inc_absorb(&state, optrand, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(R, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, &state); + shake256_inc_ctx_release(&state); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_TREE_BITS (PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_TREE_HEIGHT * (PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_D - 1)) +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_TREE_BYTES ((PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_LEAF_BITS PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_LEAF_BYTES ((PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_DGST_BYTES (PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_TREE_BYTES + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_LEAF_BYTES) + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_DGST_BYTES]; + unsigned char *bufp = buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, R, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N); + shake256_inc_absorb(&state, pk, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_PK_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_DGST_BYTES, &state); + shake256_inc_ctx_release(&state); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/hash_state.h b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/hash_state.h new file mode 100644 index 00000000..7d92ef87 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/hash_state.h @@ -0,0 +1,30 @@ +#ifndef SPX_HASH_STATE_H +#define SPX_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * SHAKE256 does not need this state. Because this implementation is generated + * from a shared code base, we still need to specify some hash_state as it is + * still passed around. We chose to use an `int` as a placeholder for this + * purpose. + */ + +#define hash_state int + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/params.h b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/params.h new file mode 100644 index 00000000..c76018b0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_PARAMS_H +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N 16 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FULL_HEIGHT 60 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_D 20 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_HEIGHT 9 +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_TREES 30 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N / PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_LEN (PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_LEN1 + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_BYTES (PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_PK_BYTES PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_TREE_HEIGHT (PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FULL_HEIGHT / PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_BYTES ((PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_PK_BYTES PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_BYTES (PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_BYTES + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_D * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FULL_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_PK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_SK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/sign.c b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/sign.c new file mode 100644 index 00000000..cdfae4bc --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_D - 1); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, sk + 3 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N); + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N; + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_BYTES; + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_BYTES; + + if (PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_BYTES, sm + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/thash.h b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/thash.h new file mode 100644 index 00000000..3e3f09c3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_THASH_H +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/thash_shake256_simple.c b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/thash_shake256_simple.c new file mode 100644 index 00000000..05e1c285 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/thash_shake256_simple.c @@ -0,0 +1,74 @@ +#include +#include + +#include "address.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" + +#include "fips202.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N bytes. + */ +static void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8]) { + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, addr); + memcpy(buf + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_BYTES, in, inblocks * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N); + + shake256(out, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_thash( + out, buf, in, 1, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_thash( + out, buf, in, 2, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_LEN, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_TREES, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/utils.c b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/utils.c new file mode 100644 index 00000000..1005cf97 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, leaf, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_thash_2( + buffer + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N); + } else { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/utils.h b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/utils.h new file mode 100644 index 00000000..a1385f6b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_UTILS_H +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/wots.c b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/wots.c new file mode 100644 index 00000000..913fe9bc --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/wots.c @@ -0,0 +1,167 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], + const hash_state *hash_state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_prf_addr(sk, sk_seed, wots_addr, hash_state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_W; i++) { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_thash_1( + out, out, pub_seed, addr, hash_state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const size_t out_len, + const unsigned char *input) { + size_t in = 0; + size_t out = 0; + unsigned char total = 0; + unsigned int bits = 0; + size_t consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_LOGW; + output[out] = (unsigned int)((total >> bits) & (PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_W - 1)); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, + const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_ull_to_bytes( + csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(pk + i * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, pk + i * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, + 0, PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_W - 1, pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, 0, lengths[i], pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_N, + lengths[i], PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_W - 1 - lengths[i], pub_seed, addr, + hash_state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/wots.h b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/wots.h new file mode 100644 index 00000000..a6b25fc6 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128f-simple/clean/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_H +#define PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128FSIMPLE_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/META.yml b/crypto_sign/sphincs/sphincs-shake256-128s-robust/META.yml new file mode 100644 index 00000000..ac23d88a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/META.yml @@ -0,0 +1,35 @@ +name: SPHINCS+ +type: signature +claimed-nist-level: 1 +length-public-key: 32 +length-secret-key: 64 +length-signature: 8080 +testvectors-sha256: f3f56ddff38a75ee07b44c023b9c9133ffe9538bb4b64f8ec8742b21fcaa6a50 +nistkat-sha256: e9c31937277677d1cb387ce76408c76b0128938f3af047f60fb5d073a3c788b3 +principal-submitters: + - Andreas Hülsing +auxiliary-submitters: + - Jean-Philippe Aumasson + - Daniel J. Bernstein, + - Christoph Dobraunig + - Maria Eichlseder + - Scott Fluhrer + - Stefan-Lukas Gazdag + - Panos Kampanakis + - Stefan Kölbl + - Tanja Lange + - Martin M. Lauridsen + - Florian Mendel + - Ruben Niederhagen + - Christian Rechberger + - Joost Rijneveld + - Peter Schwabe +implementations: + - name: clean + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + - name: avx2 + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + supported_platforms: + - architecture: x86_64 + required_flags: + - avx2 diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/LICENSE b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/Makefile.Microsoft_nmake new file mode 100644 index 00000000..ff36d1f9 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/Makefile.Microsoft_nmake @@ -0,0 +1,27 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-shake256-128s-robust_avx2.lib +OBJECTS=address.obj wots.obj utils.obj utilsx4.obj fips202x4.obj fors.obj sign.obj hash_shake256.obj thash_shake256_robust.obj hash_shake256x4.obj thash_shake256_robustx4.obj + +KECCAK4XDIR=..\..\..\common\keccak4x +KECCAK4XOBJ=KeccakP-1600-times4-SIMD256.obj +KECCAK4X=$(KECCAK4XDIR)\$(KECCAK4XOBJ) + +CFLAGS=/nologo /arch:AVX2 /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) $(KECCAK4X) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +$(KECCAK4X): $(KECCAK4XDIR)/*.c $(KECCAK4XDIR)/*.h $(KECCAK4XDIR)/*.macros + cd $(KECCAK4XDIR) && $(MAKE) /$(MAKEFLAGS) /f Makefile.Microsoft_nmake $(KECCAK4XOBJ) + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) + -DEL $(KECCAK4X) diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/address.c b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/address.c new file mode 100644 index 00000000..64330da4 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/address.h b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/address.h new file mode 100644 index 00000000..9e1da219 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDRESS_H +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/api.h b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/api.h new file mode 100644 index 00000000..c0c03a0a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_API_H +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_CRYPTO_SECRETKEYBYTES 64 +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES 32 +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_CRYPTO_BYTES 8080 +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_CRYPTO_SEEDBYTES 48 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/fips202x4.c b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/fips202x4.c new file mode 100644 index 00000000..b23ebc63 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/fips202x4.c @@ -0,0 +1,205 @@ +#include +#include +#include + +#include "fips202.h" +#include "fips202x4.h" + +#define NROUNDS 24 +#define ROL(a, offset) (((a) << (offset)) ^ ((a) >> (64-(offset)))) + +static uint64_t load64(const unsigned char *x) { + unsigned long long r = 0, i; + + for (i = 0; i < 8; ++i) { + r |= (unsigned long long)x[i] << 8 * i; + } + return r; +} + +static void store64(uint8_t *x, uint64_t u) { + unsigned int i; + + for (i = 0; i < 8; ++i) { + x[i] = (uint8_t)u; + u >>= 8; + } +} + +/* Use implementation from the Keccak Code Package */ +extern void KeccakP1600times4_PermuteAll_24rounds(__m256i *s); +#define KeccakF1600_StatePermute4x KeccakP1600times4_PermuteAll_24rounds + +static void keccak_absorb4x(__m256i *s, + unsigned int r, + const unsigned char *m0, + const unsigned char *m1, + const unsigned char *m2, + const unsigned char *m3, + size_t mlen, + unsigned char p) { + unsigned char t0[200] = {0}; + unsigned char t1[200] = {0}; + unsigned char t2[200] = {0}; + unsigned char t3[200] = {0}; + + unsigned long long *ss = (unsigned long long *)s; + + + while (mlen >= r) { + for (size_t i = 0; i < r / 8; ++i) { + ss[4 * i + 0] ^= load64(m0 + 8 * i); + ss[4 * i + 1] ^= load64(m1 + 8 * i); + ss[4 * i + 2] ^= load64(m2 + 8 * i); + ss[4 * i + 3] ^= load64(m3 + 8 * i); + } + + KeccakF1600_StatePermute4x(s); + mlen -= r; + m0 += r; + m1 += r; + m2 += r; + m3 += r; + } + + memcpy(t0, m0, mlen); + memcpy(t1, m1, mlen); + memcpy(t2, m2, mlen); + memcpy(t3, m3, mlen); + + t0[mlen] = p; + t1[mlen] = p; + t2[mlen] = p; + t3[mlen] = p; + + t0[r - 1] |= 128; + t1[r - 1] |= 128; + t2[r - 1] |= 128; + t3[r - 1] |= 128; + + for (size_t i = 0; i < r / 8; ++i) { + ss[4 * i + 0] ^= load64(t0 + 8 * i); + ss[4 * i + 1] ^= load64(t1 + 8 * i); + ss[4 * i + 2] ^= load64(t2 + 8 * i); + ss[4 * i + 3] ^= load64(t3 + 8 * i); + } +} + + +static void keccak_squeezeblocks4x(unsigned char *h0, + unsigned char *h1, + unsigned char *h2, + unsigned char *h3, + unsigned long long int nblocks, + __m256i *s, + unsigned int r) { + unsigned int i; + + unsigned long long *ss = (unsigned long long *)s; + + while (nblocks > 0) { + KeccakF1600_StatePermute4x(s); + for (i = 0; i < (r >> 3); i++) { + store64(h0 + 8 * i, ss[4 * i + 0]); + store64(h1 + 8 * i, ss[4 * i + 1]); + store64(h2 + 8 * i, ss[4 * i + 2]); + store64(h3 + 8 * i, ss[4 * i + 3]); + } + h0 += r; + h1 += r; + h2 += r; + h3 += r; + nblocks--; + } +} + + + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_shake128x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen) { + __m256i s[25]; + unsigned char t0[SHAKE128_RATE]; + unsigned char t1[SHAKE128_RATE]; + unsigned char t2[SHAKE128_RATE]; + unsigned char t3[SHAKE128_RATE]; + unsigned int i; + + /* zero state */ + for (i = 0; i < 25; i++) { + s[i] = _mm256_xor_si256(s[i], s[i]); + } + + /* absorb 4 message of identical length in parallel */ + keccak_absorb4x(s, SHAKE128_RATE, in0, in1, in2, in3, (size_t)inlen, 0x1F); + + /* Squeeze output */ + keccak_squeezeblocks4x(out0, out1, out2, out3, outlen / SHAKE128_RATE, s, SHAKE128_RATE); + + out0 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out1 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out2 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out3 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + + if (outlen % SHAKE128_RATE) { + keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE128_RATE); + for (i = 0; i < outlen % SHAKE128_RATE; i++) { + out0[i] = t0[i]; + out1[i] = t1[i]; + out2[i] = t2[i]; + out3[i] = t3[i]; + } + } +} + + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_shake256x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen) { + __m256i s[25]; + unsigned char t0[SHAKE256_RATE]; + unsigned char t1[SHAKE256_RATE]; + unsigned char t2[SHAKE256_RATE]; + unsigned char t3[SHAKE256_RATE]; + unsigned int i; + + /* zero state */ + for (i = 0; i < 25; i++) { + s[i] = _mm256_xor_si256(s[i], s[i]); + } + + /* absorb 4 message of identical length in parallel */ + keccak_absorb4x(s, SHAKE256_RATE, in0, in1, in2, in3, (size_t)inlen, 0x1F); + + /* Squeeze output */ + keccak_squeezeblocks4x(out0, out1, out2, out3, outlen / SHAKE256_RATE, s, SHAKE256_RATE); + + out0 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out1 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out2 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out3 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + + if (outlen % SHAKE256_RATE) { + keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE256_RATE); + for (i = 0; i < outlen % SHAKE256_RATE; i++) { + out0[i] = t0[i]; + out1[i] = t1[i]; + out2[i] = t2[i]; + out3[i] = t3[i]; + } + } +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/fips202x4.h b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/fips202x4.h new file mode 100644 index 00000000..fd35c873 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/fips202x4.h @@ -0,0 +1,27 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FIPS202X4_H +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FIPS202X4_H + +#include + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_shake128x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, unsigned long long inlen); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_shake256x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/fors.c b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/fors.c new file mode 100644 index 00000000..f8580887 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/fors.c @@ -0,0 +1,206 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hashx4.h" +#include "thash.h" +#include "thashx4.h" +#include "utils.h" +#include "utilsx4.h" + +static void fors_gen_skx4(unsigned char *sk0, + unsigned char *sk1, + unsigned char *sk2, + unsigned char *sk3, const unsigned char *sk_seed, + uint32_t fors_leaf_addrx4[4 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_prf_addrx4(sk0, sk1, sk2, sk3, sk_seed, fors_leaf_addrx4, state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thash_1(leaf, sk, pub_seed, fors_leaf_addr, state_seeded); +} + +static void fors_sk_to_leafx4(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + const unsigned char *sk0, + const unsigned char *sk1, + const unsigned char *sk2, + const unsigned char *sk3, + const unsigned char *pub_seed, + uint32_t fors_leaf_addrx4[4 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thashx4_1(leaf0, leaf1, leaf2, leaf3, + sk0, sk1, sk2, sk3, pub_seed, fors_leaf_addrx4, state_seeded); +} + +static void fors_gen_leafx4(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx0, + uint32_t addr_idx1, + uint32_t addr_idx2, + uint32_t addr_idx3, + const uint32_t fors_tree_addr[8], + const hash_state *state_seeded) { + uint32_t fors_leaf_addrx4[4 * 8] = {0}; + unsigned int j; + + /* Only copy the parts that must be kept in fors_leaf_addrx4. */ + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_copy_keypair_addr(fors_leaf_addrx4 + j * 8, fors_tree_addr); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_type(fors_leaf_addrx4 + j * 8, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_tree_index(fors_leaf_addrx4 + 0 * 8, addr_idx0); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_tree_index(fors_leaf_addrx4 + 1 * 8, addr_idx1); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_tree_index(fors_leaf_addrx4 + 2 * 8, addr_idx2); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_tree_index(fors_leaf_addrx4 + 3 * 8, addr_idx3); + + fors_gen_skx4(leaf0, leaf1, leaf2, leaf3, sk_seed, fors_leaf_addrx4, state_seeded); + fors_sk_to_leafx4(leaf0, leaf1, leaf2, leaf3, + leaf0, leaf1, leaf2, leaf3, pub_seed, fors_leaf_addrx4, state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_fors_sign(unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *state_seeded) { + /* Round up to multiple of 4 to prevent out-of-bounds for x4 parallelism */ + uint32_t indices[(PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_TREES + 3) & ~3] = {0}; + unsigned char roots[((PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_TREES + 3) & ~3) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N]; + /* Sign to a buffer, since we may not have a nice multiple of 4 and would + otherwise overrun the signature. */ + unsigned char sigbufx4[4 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N * (1 + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_HEIGHT)]; + uint32_t fors_tree_addrx4[4 * 8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset[4] = {0}; + unsigned int i, j; + + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_copy_keypair_addr(fors_tree_addrx4 + j * 8, fors_addr); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_type(fors_tree_addrx4 + j * 8, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < ((PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_TREES + 3) & ~0x3); i += 4) { + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_TREES) { + idx_offset[j] = (i + j) * (1 << PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_tree_height(fors_tree_addrx4 + j * 8, 0); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_tree_index(fors_tree_addrx4 + j * 8, + indices[i + j] + idx_offset[j]); + } + } + + /* Include the secret key part that produces the selected leaf nodes. */ + fors_gen_skx4(sigbufx4 + 0 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, + sigbufx4 + 1 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, + sigbufx4 + 2 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, + sigbufx4 + 3 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, + sk_seed, fors_tree_addrx4, state_seeded); + + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_treehashx4_FORS_HEIGHT(roots + i * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, sigbufx4 + 4 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, sk_seed, pub_seed, + &indices[i], idx_offset, fors_gen_leafx4, fors_tree_addrx4, + state_seeded); + + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_TREES) { + memcpy(sig, sigbufx4 + j * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); + memcpy(sig + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, + sigbufx4 + 4 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + j * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_HEIGHT, + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_HEIGHT); + sig += PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N * (1 + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_HEIGHT); + } + } + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_fors_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_compute_root(roots + i * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, leaf, indices[i], idx_offset, + sig, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_HEIGHT, pub_seed, fors_tree_addr, + state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/fors.h b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/fors.h new file mode 100644 index 00000000..58cb8b20 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_H +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/hash.h b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/hash.h new file mode 100644 index 00000000..72fb9b92 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_HASH_H +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/hash_shake256.c b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/hash_shake256.c new file mode 100644 index 00000000..c24b61e6 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/hash_shake256.c @@ -0,0 +1,106 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "fips202.h" + +/* For SHAKE256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, // NOLINT(readability-non-const-parameter) + const unsigned char *pub_seed, const unsigned char *sk_seed) { + (void)hash_state_seeded; /* Suppress an 'unused parameter' warning. */ + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* This is not necessary for SHAKE256, so we don't do anything */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_destroy_hash_function( + hash_state *hash_state_seeded) { // NOLINT(readability-non-const-parameter) + (void)hash_state_seeded; +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N bytes and an address + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, addr); + + shake256(out, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed and an + * optional randomization value as well as the message. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, sk_prf, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); + shake256_inc_absorb(&state, optrand, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(R, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, &state); + shake256_inc_ctx_release(&state); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_TREE_BITS (PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_TREE_HEIGHT * (PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_D - 1)) +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_TREE_BYTES ((PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_LEAF_BITS PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_LEAF_BYTES ((PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_DGST_BYTES (PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_TREE_BYTES + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_LEAF_BYTES) + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_DGST_BYTES]; + unsigned char *bufp = buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, R, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); + shake256_inc_absorb(&state, pk, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_PK_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_DGST_BYTES, &state); + shake256_inc_ctx_release(&state); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/hash_shake256x4.c b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/hash_shake256x4.c new file mode 100644 index 00000000..c031b0bb --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/hash_shake256x4.c @@ -0,0 +1,38 @@ +#include +#include + +#include "address.h" +#include "fips202x4.h" +#include "hashx4.h" +#include "params.h" + +/* + * 4-way parallel version of prf_addr; takes 4x as much input and output + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_prf_addrx4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + const unsigned char *key, + const uint32_t addrx4[4 * 8], + const hash_state *state_seeded) { + unsigned char bufx4[4 * (PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES)]; + unsigned int j; + + for (j = 0; j < 4; j++) { + memcpy(bufx4 + j * (PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES), key, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_addr_to_bytes(bufx4 + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + j * (PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES), addrx4 + j * 8); + } + + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_shake256x4(out0, + out1, + out2, + out3, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, + bufx4 + 0 * (PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES), + bufx4 + 1 * (PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES), + bufx4 + 2 * (PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES), + bufx4 + 3 * (PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES), PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES); + + /* Avoid unused parameter warning */ + (void)state_seeded; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/hash_state.h b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/hash_state.h new file mode 100644 index 00000000..908b8af8 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/hash_state.h @@ -0,0 +1,30 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_HASH_STATE_H +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * SHAKE256 does not need this state. Because this implementation is generated + * from a shared code base, we still need to specify some hash_state as it is + * still passed around. We chose to use an `int` as a placeholder for this + * purpose. + */ + +typedef int hash_state; + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/hashx4.h b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/hashx4.h new file mode 100644 index 00000000..d8c6cd78 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/hashx4.h @@ -0,0 +1,16 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_HASHX4_H +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_HASHX4_H + +#include + +#include "hash_state.h" + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_prf_addrx4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + const unsigned char *key, + const uint32_t addrx4[4 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/params.h b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/params.h new file mode 100644 index 00000000..e589170b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_PARAMS_H +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N 16 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FULL_HEIGHT 64 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_D 8 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_HEIGHT 15 +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_TREES 10 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N / PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LEN (PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LEN1 + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_BYTES (PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_PK_BYTES PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_TREE_HEIGHT (PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FULL_HEIGHT / PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_BYTES ((PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_PK_BYTES PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_BYTES (PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_BYTES + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_D * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FULL_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_PK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_SK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/sign.c b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/sign.c new file mode 100644 index 00000000..b75ce648 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/sign.c @@ -0,0 +1,409 @@ +#include +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_D - 1); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, sk + 3 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); + + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + + // guarantee alignment of pk + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES / 16]; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + + // guarantee alignment of sk + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_CRYPTO_SECRETKEYBYTES / 16]; + uint8_t sk[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_CRYPTO_SECRETKEYBYTES]; + } aligned_sk; + + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_CRYPTO_SEEDBYTES / 16]; + uint8_t seed[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_CRYPTO_SEEDBYTES]; + } aligned_seed; + randombytes(aligned_seed.seed, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_CRYPTO_SEEDBYTES); + + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_crypto_sign_seed_keypair( + aligned_pk.pk, aligned_sk.sk, aligned_seed.seed); + memcpy(pk, aligned_pk.pk, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES); + memcpy(sk, aligned_sk.sk, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_CRYPTO_SECRETKEYBYTES); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + // guarantee alignment of sk + union { + __m128 *_x; + uint8_t sk[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_CRYPTO_SECRETKEYBYTES]; + } aligned_sk; + memcpy(aligned_sk.sk, sk, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_CRYPTO_SECRETKEYBYTES); + sk = aligned_sk.sk; + + // guarantee alignment of sig + union { + __m128 *_x; + uint8_t sig[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_BYTES]; + } aligned_sig; + uint8_t *orig_sig = sig; + sig = (uint8_t *)aligned_sig.sig; + + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N; + + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_D; i++) { + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_TREE_HEIGHT; + } + + memcpy(orig_sig, aligned_sig.sig, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_BYTES); + *siglen = PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_BYTES; + + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + // guarantee alignment of pk + union { + __m128 *_x; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + memcpy(aligned_pk.pk, pk, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES); + pk = aligned_pk.pk; + + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_D; i++) { + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_TREE_HEIGHT; + } + + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + + // guarantee alignment of pk + union { + __m128 *_x; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + memcpy(aligned_pk.pk, pk, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES); + pk = aligned_pk.pk; + + + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_BYTES; + + if (PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_BYTES, sm + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/thash.h b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/thash.h new file mode 100644 index 00000000..957cd065 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_THASH_H +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/thash_shake256_robust.c b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/thash_shake256_robust.c new file mode 100644 index 00000000..bc1d72ed --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/thash_shake256_robust.c @@ -0,0 +1,81 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "fips202.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N bytes. + */ +static void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8]) { + + unsigned char *bitmask = buf + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES; + unsigned int i; + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, addr); + + shake256(bitmask, inblocks * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N; i++) { + buf[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES + i] = in[i] ^ bitmask[i]; + } + + shake256(out, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thash( + out, buf, in, 1, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thash( + out, buf, in, 2, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LEN, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_TREES, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/thash_shake256_robustx4.c b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/thash_shake256_robustx4.c new file mode 100644 index 00000000..79d43880 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/thash_shake256_robustx4.c @@ -0,0 +1,66 @@ +#include +#include + +#include "thashx4.h" +#include "address.h" +#include "params.h" + +#include "fips202x4.h" + +/** + * 4-way parallel version of thash; takes 4x as much input and output + */ +#define thash_variant(name, inblocks) \ + void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thashx4_##name(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *pub_seed, uint32_t addrx4[4*8], \ + const hash_state *state_seeded) \ + { \ + unsigned char buf0[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N]; \ + unsigned char buf1[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N]; \ + unsigned char buf2[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N]; \ + unsigned char buf3[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N]; \ + unsigned char bitmask0[(inblocks) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N]; \ + unsigned char bitmask1[(inblocks) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N]; \ + unsigned char bitmask2[(inblocks) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N]; \ + unsigned char bitmask3[(inblocks) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N]; \ + unsigned int i; \ + \ + memcpy(buf0, pub_seed, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); \ + memcpy(buf1, pub_seed, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); \ + memcpy(buf2, pub_seed, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); \ + memcpy(buf3, pub_seed, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); \ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_addr_to_bytes(buf0 + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, addrx4 + 0*8); \ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_addr_to_bytes(buf1 + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, addrx4 + 1*8); \ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_addr_to_bytes(buf2 + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, addrx4 + 2*8); \ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_addr_to_bytes(buf3 + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, addrx4 + 3*8); \ + \ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_shake256x4(bitmask0, bitmask1, bitmask2, bitmask3, (inblocks) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, \ + buf0, buf1, buf2, buf3, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES); \ + \ + for (i = 0; i < (inblocks) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N; i++) { \ + buf0[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES + i] = in0[i] ^ bitmask0[i]; \ + buf1[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES + i] = in1[i] ^ bitmask1[i]; \ + buf2[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES + i] = in2[i] ^ bitmask2[i]; \ + buf3[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES + i] = in3[i] ^ bitmask3[i]; \ + } \ + \ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_shake256x4( \ + out0, out1, out2, out3, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, \ + buf0, buf1, buf2, buf3, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); \ + \ + /* avoid unused parameter warning */ \ + (void)state_seeded; \ + } + + +thash_variant(1, 1) +thash_variant(2, 2) +thash_variant(WOTS_LEN, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LEN) +thash_variant(FORS_TREES, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_TREES) diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/thashx4.h b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/thashx4.h new file mode 100644 index 00000000..6efe2d0a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/thashx4.h @@ -0,0 +1,25 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_THASHX4_H +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_THASHX4_H + +#include + +#include "hash_state.h" + +#define thashx4_header(inblocks) \ + void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thashx4_##inblocks(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *pub_seed, uint32_t addrx4[4*8], \ + const hash_state *state_seeded) + +thashx4_header(1); +thashx4_header(2); +thashx4_header(WOTS_LEN); +thashx4_header(FORS_TREES); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/utils.c b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/utils.c new file mode 100644 index 00000000..48432b2b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, leaf, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, auth_path, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thash_2( + buffer + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); + } else { + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, auth_path, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/utils.h b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/utils.h new file mode 100644 index 00000000..bc26b115 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_UTILS_H +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/utilsx4.c b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/utilsx4.c new file mode 100644 index 00000000..dbba010c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/utilsx4.c @@ -0,0 +1,98 @@ +#include "address.h" +#include "params.h" +#include "thashx4.h" +#include "utils.h" +#include "utilsx4.h" + +#include + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +#define treehashx4_variant(name, tree_height) \ + void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_treehashx4_##name( \ + unsigned char *rootx4, unsigned char *auth_pathx4, const unsigned char *sk_seed, \ + const unsigned char *pub_seed, const uint32_t leaf_idx[4], uint32_t idx_offset[4], \ + void (*gen_leafx4)(unsigned char * /* leaf0 */, unsigned char * /* leaf1 */, \ + unsigned char * /* leaf2 */, unsigned char * /* leaf3 */, \ + const unsigned char * /* sk_seed */, \ + const unsigned char * /* pub_seed */, uint32_t /* addr_idx0 */, \ + uint32_t /* addr_idx1 */, uint32_t /* addr_idx2 */, \ + uint32_t /* addr_idx3 */, const uint32_t[8] /* tree_addr */, \ + const hash_state * /* state_seeded */), \ + uint32_t tree_addrx4[4 * 8], const hash_state *state_seeded) { \ + unsigned char stackx4[4 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N]; \ + unsigned int heights[(tree_height) + 1]; \ + unsigned int offset = 0; \ + uint32_t idx; \ + uint32_t tree_idx; \ + unsigned int j; \ + \ + for (idx = 0; idx < (uint32_t)(1 << (tree_height)); idx++) { \ + /* Add the next leaf node to the stack. */ \ + gen_leafx4(stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, sk_seed, \ + pub_seed, idx + idx_offset[0], idx + idx_offset[1], idx + idx_offset[2], \ + idx + idx_offset[3], tree_addrx4, state_seeded); \ + offset++; \ + heights[offset - 1] = 0; \ + \ + /* If this is a node we need for the auth path.. */ \ + for (j = 0; j < 4; j++) { \ + if ((leaf_idx[j] ^ 0x1) == idx) { \ + memcpy(auth_pathx4 + j * (tree_height)*PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, \ + stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + (offset - 1) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, \ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); \ + } \ + } \ + \ + /* While the top-most nodes are of equal height.. */ \ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { \ + /* Compute index of the new node, in the next layer. */ \ + tree_idx = (idx >> (heights[offset - 1] + 1)); \ + \ + /* Set the address of the node we're creating. */ \ + for (j = 0; j < 4; j++) { \ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_tree_height(tree_addrx4 + j * 8, heights[offset - 1] + 1); \ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_tree_index(tree_addrx4 + j * 8, \ + tree_idx + (idx_offset[j] >> (heights[offset - 1] + 1))); \ + } \ + /* Hash the top-most nodes from the stack together. */ \ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thashx4_2(stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, \ + stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, \ + pub_seed, tree_addrx4, state_seeded); \ + offset--; \ + /* Note that the top-most node is now one layer higher. */ \ + heights[offset - 1]++; \ + \ + /* If this is a node we need for the auth path.. */ \ + for (j = 0; j < 4; j++) { \ + if (((leaf_idx[j] >> heights[offset - 1]) ^ 0x1) == tree_idx) { \ + memcpy(auth_pathx4 + j * (tree_height)*PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + \ + heights[offset - 1] * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, \ + stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N + (offset - 1) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, \ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); \ + } \ + } \ + } \ + } \ + \ + for (j = 0; j < 4; j++) { \ + memcpy(rootx4 + j * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); \ + } \ + } + +treehashx4_variant(FORS_HEIGHT, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_FORS_HEIGHT) diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/utilsx4.h b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/utilsx4.h new file mode 100644 index 00000000..40b55ace --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/utilsx4.h @@ -0,0 +1,38 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_UTILSX4_H +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_UTILSX4_H + +#include "hash_state.h" +#include "params.h" + +#include + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_treehashx4_FORS_HEIGHT(unsigned char *rootx4, + unsigned char *auth_pathx4, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + const uint32_t leaf_idx[4], + uint32_t idx_offset[4], + void (*gen_leafx4)(unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx4[4 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/wots.c b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/wots.c new file mode 100644 index 00000000..9c48e918 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/wots.c @@ -0,0 +1,240 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "hashx4.h" +#include "params.h" +#include "thash.h" +#include "thashx4.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], const hash_state *state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_prf_addr(sk, sk_seed, wots_addr, state_seeded); +} + +/** + * 4-way parallel version of wots_gen_sk; expects 4x as much space in sk + */ +static void wots_gen_skx4(unsigned char *skx4, const unsigned char *sk_seed, + uint32_t wots_addrx4[4 * 8], const hash_state *state_seeded) { + unsigned int j; + + /* Make sure that the hash address is actually zeroed. */ + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_hash_addr(wots_addrx4 + j * 8, 0); + } + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_prf_addrx4(skx4 + 0 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, + skx4 + 1 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, + skx4 + 2 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, + skx4 + 3 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, + sk_seed, wots_addrx4, + state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_W; i++) { + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thash_1(out, out, pub_seed, addr, state_seeded); + } +} + +/** + * 4-way parallel version of gen_chain; expects 4x as much space in out, and + * 4x as much space in inx4. Assumes start and step identical across chains. + */ +static void gen_chainx4(unsigned char *outx4, const unsigned char *inx4, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addrx4[4 * 8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + /* Initialize outx4 with the value at position 'start'. */ + memcpy(outx4, inx4, 4 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_W; i++) { + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_hash_addr(addrx4 + j * 8, i); + } + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_thashx4_1(outx4 + 0 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, + outx4 + 1 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, + outx4 + 2 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, + outx4 + 3 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, + outx4 + 0 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, + outx4 + 1 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, + outx4 + 2 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, + outx4 + 3 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, + pub_seed, addrx4, + state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { + int in = 0; + int out = 0; + unsigned char total = 0; + int bits = 0; + int consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LOGW; + output[out] = (unsigned int)(total >> bits) & (PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_W - 1); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_wots_gen_pk(unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + uint32_t addrx4[4 * 8]; + unsigned char pkbuf[4 * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N]; + + for (j = 0; j < 4; j++) { + memcpy(addrx4 + j * 8, addr, sizeof(uint32_t) * 8); + } + + /* The last iteration typically does not have complete set of 4 chains, + but because we use pkbuf, this is not an issue -- we still do as many + in parallel as possible. */ + for (i = 0; i < ((PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LEN + 3) & ~0x3); i += 4) { + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_chain_addr(addrx4 + j * 8, i + j); + } + wots_gen_skx4(pkbuf, sk_seed, addrx4, state_seeded); + gen_chainx4(pkbuf, pkbuf, 0, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_W - 1, pub_seed, addrx4, state_seeded); + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LEN) { + memcpy(pk + (i + j)*PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, pkbuf + j * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N); + } + } + } + + // Get rid of unused argument variable. + (void)state_seeded; +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_wots_sign(unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, sk_seed, addr, state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, sig + i * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, 0, lengths[i], pub_seed, addr, state_seeded); + } + + // avoid unused argument + (void)state_seeded; +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_wots_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, sig + i * PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_N, + lengths[i], PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_W - 1 - lengths[i], pub_seed, addr, + state_seeded); + } + + // avoid unused argument + (void)state_seeded; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/wots.h b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/wots.h new file mode 100644 index 00000000..48ed1532 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/avx2/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_H +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_AVX2_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/LICENSE b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..ec29e38b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-shake256-128s-robust_clean.lib +OBJECTS=address.obj wots.obj utils.obj fors.obj sign.obj hash_shake256.obj thash_shake256_robust.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/address.c b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/address.c new file mode 100644 index 00000000..c2dc7835 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/address.h b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/address.h new file mode 100644 index 00000000..89997a5a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDRESS_H +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/api.h b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/api.h new file mode 100644 index 00000000..0c84a169 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_API_H +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_CRYPTO_SECRETKEYBYTES 64 +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_CRYPTO_PUBLICKEYBYTES 32 +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_CRYPTO_BYTES 8080 +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_CRYPTO_SEEDBYTES 48 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/fors.c b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/fors.c new file mode 100644 index 00000000..f0c90b88 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/fors.c @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "thash.h" +#include "utils.h" + +static void fors_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t fors_leaf_addr[8], const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_prf_addr( + sk, sk_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_thash_1( + leaf, sk, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t fors_tree_addr[8], + const hash_state *hash_state_seeded) { + uint32_t fors_leaf_addr[8] = {0}; + + /* Only copy the parts that must be kept in fors_leaf_addr. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_copy_keypair_addr( + fors_leaf_addr, fors_tree_addr); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_type( + fors_leaf_addr, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_tree_index( + fors_leaf_addr, addr_idx); + + fors_gen_sk(leaf, sk_seed, fors_leaf_addr, hash_state_seeded); + fors_sk_to_leaf(leaf, leaf, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_copy_keypair_addr( + fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_copy_keypair_addr( + fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_type( + fors_tree_addr, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_type( + fors_pk_addr, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_tree_height( + fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_tree_index( + fors_tree_addr, indices[i] + idx_offset); + + /* Include the secret key part that produces the selected leaf node. */ + fors_gen_sk(sig, sk_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N; + + /* Compute the authentication path for this leaf node. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_treehash_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, sig, sk_seed, pub_seed, + indices[i], idx_offset, fors_gen_leaf, fors_tree_addr, + hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_thash_FORS_TREES( + pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_compute_root(roots + i * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, leaf, indices[i], idx_offset, sig, + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_HEIGHT, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/fors.h b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/fors.h new file mode 100644 index 00000000..7255523b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_H +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/hash.h b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/hash.h new file mode 100644 index 00000000..3a5182ba --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_HASH_H +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/hash_shake256.c b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/hash_shake256.c new file mode 100644 index 00000000..53a99814 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/hash_shake256.c @@ -0,0 +1,106 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "fips202.h" + +/* For SHAKE256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, // NOLINT(readability-non-const-parameter) + const unsigned char *pub_seed, const unsigned char *sk_seed) { + (void)hash_state_seeded; /* Suppress an 'unused parameter' warning. */ + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* This is not necessary for SHAKE256, so we don't do anything */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_destroy_hash_function( + hash_state *hash_state_seeded) { // NOLINT(readability-non-const-parameter) + (void)hash_state_seeded; +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N bytes and an address + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, addr); + + shake256(out, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_BYTES); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed and an + * optional randomization value as well as the message. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, sk_prf, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N); + shake256_inc_absorb(&state, optrand, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(R, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, &state); + shake256_inc_ctx_release(&state); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_TREE_BITS (PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_TREE_HEIGHT * (PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_D - 1)) +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_TREE_BYTES ((PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_LEAF_BITS PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_LEAF_BYTES ((PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_DGST_BYTES (PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_TREE_BYTES + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_LEAF_BYTES) + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_DGST_BYTES]; + unsigned char *bufp = buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, R, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N); + shake256_inc_absorb(&state, pk, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_PK_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_DGST_BYTES, &state); + shake256_inc_ctx_release(&state); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/hash_state.h b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/hash_state.h new file mode 100644 index 00000000..7d92ef87 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/hash_state.h @@ -0,0 +1,30 @@ +#ifndef SPX_HASH_STATE_H +#define SPX_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * SHAKE256 does not need this state. Because this implementation is generated + * from a shared code base, we still need to specify some hash_state as it is + * still passed around. We chose to use an `int` as a placeholder for this + * purpose. + */ + +#define hash_state int + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/params.h b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/params.h new file mode 100644 index 00000000..52050aa1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_PARAMS_H +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N 16 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FULL_HEIGHT 64 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_D 8 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_HEIGHT 15 +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_TREES 10 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N / PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_LEN (PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_LEN1 + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_BYTES (PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_PK_BYTES PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_TREE_HEIGHT (PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FULL_HEIGHT / PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_BYTES ((PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_PK_BYTES PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_BYTES (PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_BYTES + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_D * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FULL_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_PK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_SK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/sign.c b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/sign.c new file mode 100644 index 00000000..f9d1bbf7 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_D - 1); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, sk + 3 * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N); + + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N; + + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_BYTES; + + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_BYTES; + + if (PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_BYTES, sm + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/thash.h b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/thash.h new file mode 100644 index 00000000..ef878287 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_THASH_H +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/thash_shake256_robust.c b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/thash_shake256_robust.c new file mode 100644 index 00000000..29ea339d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/thash_shake256_robust.c @@ -0,0 +1,81 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "fips202.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N bytes. + */ +static void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8]) { + + unsigned char *bitmask = buf + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_BYTES; + unsigned int i; + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, addr); + + shake256(bitmask, inblocks * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_BYTES); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N; i++) { + buf[PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_BYTES + i] = in[i] ^ bitmask[i]; + } + + shake256(out, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_thash( + out, buf, in, 1, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_thash( + out, buf, in, 2, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_LEN, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_TREES, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/utils.c b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/utils.c new file mode 100644 index 00000000..1ea4dc46 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, leaf, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_thash_2( + buffer + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N); + } else { + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/utils.h b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/utils.h new file mode 100644 index 00000000..8647177f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_UTILS_H +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/wots.c b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/wots.c new file mode 100644 index 00000000..5758edcb --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/wots.c @@ -0,0 +1,167 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], + const hash_state *hash_state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_prf_addr(sk, sk_seed, wots_addr, hash_state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_W; i++) { + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_thash_1( + out, out, pub_seed, addr, hash_state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const size_t out_len, + const unsigned char *input) { + size_t in = 0; + size_t out = 0; + unsigned char total = 0; + unsigned int bits = 0; + size_t consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_LOGW; + output[out] = (unsigned int)((total >> bits) & (PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_W - 1)); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, + const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_ull_to_bytes( + csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(pk + i * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, pk + i * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, + 0, PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_W - 1, pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, 0, lengths[i], pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_N, + lengths[i], PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_W - 1 - lengths[i], pub_seed, addr, + hash_state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/wots.h b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/wots.h new file mode 100644 index 00000000..d250b481 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-robust/clean/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_H +#define PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128SROBUST_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/META.yml b/crypto_sign/sphincs/sphincs-shake256-128s-simple/META.yml new file mode 100644 index 00000000..aaf85862 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/META.yml @@ -0,0 +1,35 @@ +name: SPHINCS+ +type: signature +claimed-nist-level: 1 +length-public-key: 32 +length-secret-key: 64 +length-signature: 8080 +testvectors-sha256: ee2af38333f6ba705102ab66689c262b07c1fd9ce1d46180796bcb263bf1a654 +nistkat-sha256: 5d23c9f334e9bd99d5294cf40c6b2c096ee668076e809b44b928ca146d2c5e3a +principal-submitters: + - Andreas Hülsing +auxiliary-submitters: + - Jean-Philippe Aumasson + - Daniel J. Bernstein, + - Christoph Dobraunig + - Maria Eichlseder + - Scott Fluhrer + - Stefan-Lukas Gazdag + - Panos Kampanakis + - Stefan Kölbl + - Tanja Lange + - Martin M. Lauridsen + - Florian Mendel + - Ruben Niederhagen + - Christian Rechberger + - Joost Rijneveld + - Peter Schwabe +implementations: + - name: clean + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + - name: avx2 + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + supported_platforms: + - architecture: x86_64 + required_flags: + - avx2 diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/LICENSE b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/Makefile.Microsoft_nmake new file mode 100644 index 00000000..a887d7a1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/Makefile.Microsoft_nmake @@ -0,0 +1,27 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-shake256-128s-simple_avx2.lib +OBJECTS=address.obj wots.obj utils.obj utilsx4.obj fips202x4.obj fors.obj sign.obj hash_shake256.obj thash_shake256_simple.obj hash_shake256x4.obj thash_shake256_simplex4.obj + +KECCAK4XDIR=..\..\..\common\keccak4x +KECCAK4XOBJ=KeccakP-1600-times4-SIMD256.obj +KECCAK4X=$(KECCAK4XDIR)\$(KECCAK4XOBJ) + +CFLAGS=/nologo /arch:AVX2 /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) $(KECCAK4X) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +$(KECCAK4X): + cd $(KECCAK4XDIR) && $(MAKE) /f Makefile.Microsoft_nmake $(KECCAK4XOBJ) + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) + -DEL $(KECCAK4X) diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/address.c b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/address.c new file mode 100644 index 00000000..ee01ccd0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/address.h b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/address.h new file mode 100644 index 00000000..bdc869a7 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDRESS_H +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/api.h b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/api.h new file mode 100644 index 00000000..8fccf5d1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_API_H +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES 64 +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES 32 +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_CRYPTO_BYTES 8080 +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_CRYPTO_SEEDBYTES 48 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/fips202x4.c b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/fips202x4.c new file mode 100644 index 00000000..58f98b3f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/fips202x4.c @@ -0,0 +1,205 @@ +#include +#include +#include + +#include "fips202.h" +#include "fips202x4.h" + +#define NROUNDS 24 +#define ROL(a, offset) (((a) << (offset)) ^ ((a) >> (64-(offset)))) + +static uint64_t load64(const unsigned char *x) { + unsigned long long r = 0, i; + + for (i = 0; i < 8; ++i) { + r |= (unsigned long long)x[i] << 8 * i; + } + return r; +} + +static void store64(uint8_t *x, uint64_t u) { + unsigned int i; + + for (i = 0; i < 8; ++i) { + x[i] = (uint8_t)u; + u >>= 8; + } +} + +/* Use implementation from the Keccak Code Package */ +extern void KeccakP1600times4_PermuteAll_24rounds(__m256i *s); +#define KeccakF1600_StatePermute4x KeccakP1600times4_PermuteAll_24rounds + +static void keccak_absorb4x(__m256i *s, + unsigned int r, + const unsigned char *m0, + const unsigned char *m1, + const unsigned char *m2, + const unsigned char *m3, + size_t mlen, + unsigned char p) { + unsigned char t0[200] = {0}; + unsigned char t1[200] = {0}; + unsigned char t2[200] = {0}; + unsigned char t3[200] = {0}; + + unsigned long long *ss = (unsigned long long *)s; + + + while (mlen >= r) { + for (size_t i = 0; i < r / 8; ++i) { + ss[4 * i + 0] ^= load64(m0 + 8 * i); + ss[4 * i + 1] ^= load64(m1 + 8 * i); + ss[4 * i + 2] ^= load64(m2 + 8 * i); + ss[4 * i + 3] ^= load64(m3 + 8 * i); + } + + KeccakF1600_StatePermute4x(s); + mlen -= r; + m0 += r; + m1 += r; + m2 += r; + m3 += r; + } + + memcpy(t0, m0, mlen); + memcpy(t1, m1, mlen); + memcpy(t2, m2, mlen); + memcpy(t3, m3, mlen); + + t0[mlen] = p; + t1[mlen] = p; + t2[mlen] = p; + t3[mlen] = p; + + t0[r - 1] |= 128; + t1[r - 1] |= 128; + t2[r - 1] |= 128; + t3[r - 1] |= 128; + + for (size_t i = 0; i < r / 8; ++i) { + ss[4 * i + 0] ^= load64(t0 + 8 * i); + ss[4 * i + 1] ^= load64(t1 + 8 * i); + ss[4 * i + 2] ^= load64(t2 + 8 * i); + ss[4 * i + 3] ^= load64(t3 + 8 * i); + } +} + + +static void keccak_squeezeblocks4x(unsigned char *h0, + unsigned char *h1, + unsigned char *h2, + unsigned char *h3, + unsigned long long int nblocks, + __m256i *s, + unsigned int r) { + unsigned int i; + + unsigned long long *ss = (unsigned long long *)s; + + while (nblocks > 0) { + KeccakF1600_StatePermute4x(s); + for (i = 0; i < (r >> 3); i++) { + store64(h0 + 8 * i, ss[4 * i + 0]); + store64(h1 + 8 * i, ss[4 * i + 1]); + store64(h2 + 8 * i, ss[4 * i + 2]); + store64(h3 + 8 * i, ss[4 * i + 3]); + } + h0 += r; + h1 += r; + h2 += r; + h3 += r; + nblocks--; + } +} + + + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_shake128x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen) { + __m256i s[25]; + unsigned char t0[SHAKE128_RATE]; + unsigned char t1[SHAKE128_RATE]; + unsigned char t2[SHAKE128_RATE]; + unsigned char t3[SHAKE128_RATE]; + unsigned int i; + + /* zero state */ + for (i = 0; i < 25; i++) { + s[i] = _mm256_xor_si256(s[i], s[i]); + } + + /* absorb 4 message of identical length in parallel */ + keccak_absorb4x(s, SHAKE128_RATE, in0, in1, in2, in3, (size_t)inlen, 0x1F); + + /* Squeeze output */ + keccak_squeezeblocks4x(out0, out1, out2, out3, outlen / SHAKE128_RATE, s, SHAKE128_RATE); + + out0 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out1 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out2 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out3 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + + if (outlen % SHAKE128_RATE) { + keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE128_RATE); + for (i = 0; i < outlen % SHAKE128_RATE; i++) { + out0[i] = t0[i]; + out1[i] = t1[i]; + out2[i] = t2[i]; + out3[i] = t3[i]; + } + } +} + + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_shake256x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen) { + __m256i s[25]; + unsigned char t0[SHAKE256_RATE]; + unsigned char t1[SHAKE256_RATE]; + unsigned char t2[SHAKE256_RATE]; + unsigned char t3[SHAKE256_RATE]; + unsigned int i; + + /* zero state */ + for (i = 0; i < 25; i++) { + s[i] = _mm256_xor_si256(s[i], s[i]); + } + + /* absorb 4 message of identical length in parallel */ + keccak_absorb4x(s, SHAKE256_RATE, in0, in1, in2, in3, (size_t)inlen, 0x1F); + + /* Squeeze output */ + keccak_squeezeblocks4x(out0, out1, out2, out3, outlen / SHAKE256_RATE, s, SHAKE256_RATE); + + out0 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out1 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out2 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out3 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + + if (outlen % SHAKE256_RATE) { + keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE256_RATE); + for (i = 0; i < outlen % SHAKE256_RATE; i++) { + out0[i] = t0[i]; + out1[i] = t1[i]; + out2[i] = t2[i]; + out3[i] = t3[i]; + } + } +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/fips202x4.h b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/fips202x4.h new file mode 100644 index 00000000..30b73493 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/fips202x4.h @@ -0,0 +1,27 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FIPS202X4_H +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FIPS202X4_H + +#include + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_shake128x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, unsigned long long inlen); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_shake256x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/fors.c b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/fors.c new file mode 100644 index 00000000..572a45b1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/fors.c @@ -0,0 +1,206 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hashx4.h" +#include "thash.h" +#include "thashx4.h" +#include "utils.h" +#include "utilsx4.h" + +static void fors_gen_skx4(unsigned char *sk0, + unsigned char *sk1, + unsigned char *sk2, + unsigned char *sk3, const unsigned char *sk_seed, + uint32_t fors_leaf_addrx4[4 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_prf_addrx4(sk0, sk1, sk2, sk3, sk_seed, fors_leaf_addrx4, state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thash_1(leaf, sk, pub_seed, fors_leaf_addr, state_seeded); +} + +static void fors_sk_to_leafx4(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + const unsigned char *sk0, + const unsigned char *sk1, + const unsigned char *sk2, + const unsigned char *sk3, + const unsigned char *pub_seed, + uint32_t fors_leaf_addrx4[4 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thashx4_1(leaf0, leaf1, leaf2, leaf3, + sk0, sk1, sk2, sk3, pub_seed, fors_leaf_addrx4, state_seeded); +} + +static void fors_gen_leafx4(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx0, + uint32_t addr_idx1, + uint32_t addr_idx2, + uint32_t addr_idx3, + const uint32_t fors_tree_addr[8], + const hash_state *state_seeded) { + uint32_t fors_leaf_addrx4[4 * 8] = {0}; + unsigned int j; + + /* Only copy the parts that must be kept in fors_leaf_addrx4. */ + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_copy_keypair_addr(fors_leaf_addrx4 + j * 8, fors_tree_addr); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_type(fors_leaf_addrx4 + j * 8, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx4 + 0 * 8, addr_idx0); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx4 + 1 * 8, addr_idx1); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx4 + 2 * 8, addr_idx2); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx4 + 3 * 8, addr_idx3); + + fors_gen_skx4(leaf0, leaf1, leaf2, leaf3, sk_seed, fors_leaf_addrx4, state_seeded); + fors_sk_to_leafx4(leaf0, leaf1, leaf2, leaf3, + leaf0, leaf1, leaf2, leaf3, pub_seed, fors_leaf_addrx4, state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_fors_sign(unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *state_seeded) { + /* Round up to multiple of 4 to prevent out-of-bounds for x4 parallelism */ + uint32_t indices[(PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_TREES + 3) & ~3] = {0}; + unsigned char roots[((PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_TREES + 3) & ~3) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N]; + /* Sign to a buffer, since we may not have a nice multiple of 4 and would + otherwise overrun the signature. */ + unsigned char sigbufx4[4 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N * (1 + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_HEIGHT)]; + uint32_t fors_tree_addrx4[4 * 8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset[4] = {0}; + unsigned int i, j; + + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_copy_keypair_addr(fors_tree_addrx4 + j * 8, fors_addr); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_type(fors_tree_addrx4 + j * 8, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < ((PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_TREES + 3) & ~0x3); i += 4) { + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_TREES) { + idx_offset[j] = (i + j) * (1 << PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_tree_height(fors_tree_addrx4 + j * 8, 0); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_tree_index(fors_tree_addrx4 + j * 8, + indices[i + j] + idx_offset[j]); + } + } + + /* Include the secret key part that produces the selected leaf nodes. */ + fors_gen_skx4(sigbufx4 + 0 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, + sigbufx4 + 1 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, + sigbufx4 + 2 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, + sigbufx4 + 3 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, + sk_seed, fors_tree_addrx4, state_seeded); + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_treehashx4_FORS_HEIGHT(roots + i * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, sigbufx4 + 4 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, sk_seed, pub_seed, + &indices[i], idx_offset, fors_gen_leafx4, fors_tree_addrx4, + state_seeded); + + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_TREES) { + memcpy(sig, sigbufx4 + j * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); + memcpy(sig + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, + sigbufx4 + 4 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + j * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_HEIGHT, + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_HEIGHT); + sig += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N * (1 + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_HEIGHT); + } + } + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_fors_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_compute_root(roots + i * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, leaf, indices[i], idx_offset, + sig, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_HEIGHT, pub_seed, fors_tree_addr, + state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/fors.h b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/fors.h new file mode 100644 index 00000000..769215eb --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_H +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/hash.h b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/hash.h new file mode 100644 index 00000000..87de0a72 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_HASH_H +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/hash_shake256.c b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/hash_shake256.c new file mode 100644 index 00000000..15ed03b3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/hash_shake256.c @@ -0,0 +1,106 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "fips202.h" + +/* For SHAKE256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, // NOLINT(readability-non-const-parameter) + const unsigned char *pub_seed, const unsigned char *sk_seed) { + (void)hash_state_seeded; /* Suppress an 'unused parameter' warning. */ + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* This is not necessary for SHAKE256, so we don't do anything */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_destroy_hash_function( + hash_state *hash_state_seeded) { // NOLINT(readability-non-const-parameter) + (void)hash_state_seeded; +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N bytes and an address + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, addr); + + shake256(out, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_BYTES); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed and an + * optional randomization value as well as the message. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, sk_prf, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); + shake256_inc_absorb(&state, optrand, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(R, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, &state); + shake256_inc_ctx_release(&state); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_TREE_BITS (PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_TREE_HEIGHT * (PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_D - 1)) +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_TREE_BYTES ((PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_LEAF_BITS PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_LEAF_BYTES ((PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_DGST_BYTES (PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_TREE_BYTES + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_LEAF_BYTES) + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_DGST_BYTES]; + unsigned char *bufp = buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, R, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); + shake256_inc_absorb(&state, pk, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_PK_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_DGST_BYTES, &state); + shake256_inc_ctx_release(&state); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/hash_shake256x4.c b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/hash_shake256x4.c new file mode 100644 index 00000000..26f64385 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/hash_shake256x4.c @@ -0,0 +1,38 @@ +#include +#include + +#include "address.h" +#include "fips202x4.h" +#include "hashx4.h" +#include "params.h" + +/* + * 4-way parallel version of prf_addr; takes 4x as much input and output + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_prf_addrx4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + const unsigned char *key, + const uint32_t addrx4[4 * 8], + const hash_state *state_seeded) { + unsigned char bufx4[4 * (PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_BYTES)]; + unsigned int j; + + for (j = 0; j < 4; j++) { + memcpy(bufx4 + j * (PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_BYTES), key, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_addr_to_bytes(bufx4 + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + j * (PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_BYTES), addrx4 + j * 8); + } + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_shake256x4(out0, + out1, + out2, + out3, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, + bufx4 + 0 * (PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_BYTES), + bufx4 + 1 * (PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_BYTES), + bufx4 + 2 * (PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_BYTES), + bufx4 + 3 * (PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_BYTES), PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_BYTES); + + /* Avoid unused parameter warning */ + (void)state_seeded; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/hash_state.h b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/hash_state.h new file mode 100644 index 00000000..5a3baad3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/hash_state.h @@ -0,0 +1,30 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_HASH_STATE_H +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * SHAKE256 does not need this state. Because this implementation is generated + * from a shared code base, we still need to specify some hash_state as it is + * still passed around. We chose to use an `int` as a placeholder for this + * purpose. + */ + +typedef int hash_state; + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/hashx4.h b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/hashx4.h new file mode 100644 index 00000000..7b3e1b76 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/hashx4.h @@ -0,0 +1,16 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_HASHX4_H +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_HASHX4_H + +#include + +#include "hash_state.h" + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_prf_addrx4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + const unsigned char *key, + const uint32_t addrx4[4 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/params.h b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/params.h new file mode 100644 index 00000000..211e4b92 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_PARAMS_H +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N 16 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FULL_HEIGHT 64 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_D 8 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_HEIGHT 15 +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_TREES 10 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N / PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LEN (PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LEN1 + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_BYTES (PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_PK_BYTES PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_TREE_HEIGHT (PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FULL_HEIGHT / PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_BYTES ((PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_PK_BYTES PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_BYTES (PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_BYTES + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_D * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FULL_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_PK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_SK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/sign.c b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/sign.c new file mode 100644 index 00000000..6c114dbd --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/sign.c @@ -0,0 +1,409 @@ +#include +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_D - 1); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, sk + 3 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); + + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + + // guarantee alignment of pk + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES / 16]; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + + // guarantee alignment of sk + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES / 16]; + uint8_t sk[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES]; + } aligned_sk; + + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_CRYPTO_SEEDBYTES / 16]; + uint8_t seed[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_CRYPTO_SEEDBYTES]; + } aligned_seed; + randombytes(aligned_seed.seed, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_CRYPTO_SEEDBYTES); + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_crypto_sign_seed_keypair( + aligned_pk.pk, aligned_sk.sk, aligned_seed.seed); + memcpy(pk, aligned_pk.pk, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES); + memcpy(sk, aligned_sk.sk, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + // guarantee alignment of sk + union { + __m128 *_x; + uint8_t sk[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES]; + } aligned_sk; + memcpy(aligned_sk.sk, sk, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES); + sk = aligned_sk.sk; + + // guarantee alignment of sig + union { + __m128 *_x; + uint8_t sig[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_BYTES]; + } aligned_sig; + uint8_t *orig_sig = sig; + sig = (uint8_t *)aligned_sig.sig; + + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N; + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_D; i++) { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_TREE_HEIGHT; + } + + memcpy(orig_sig, aligned_sig.sig, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_BYTES); + *siglen = PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_BYTES; + + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + // guarantee alignment of pk + union { + __m128 *_x; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + memcpy(aligned_pk.pk, pk, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES); + pk = aligned_pk.pk; + + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_D; i++) { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_TREE_HEIGHT; + } + + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + + // guarantee alignment of pk + union { + __m128 *_x; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + memcpy(aligned_pk.pk, pk, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES); + pk = aligned_pk.pk; + + + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_BYTES; + + if (PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_BYTES, sm + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/thash.h b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/thash.h new file mode 100644 index 00000000..385ec0d8 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_THASH_H +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/thash_shake256_simple.c b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/thash_shake256_simple.c new file mode 100644 index 00000000..235bc859 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/thash_shake256_simple.c @@ -0,0 +1,74 @@ +#include +#include + +#include "address.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" + +#include "fips202.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N bytes. + */ +static void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8]) { + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, addr); + memcpy(buf + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_BYTES, in, inblocks * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); + + shake256(out, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thash( + out, buf, in, 1, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thash( + out, buf, in, 2, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LEN, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_TREES, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/thash_shake256_simplex4.c b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/thash_shake256_simplex4.c new file mode 100644 index 00000000..973f0b0e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/thash_shake256_simplex4.c @@ -0,0 +1,47 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thashx4.h" + +#include "fips202x4.h" + +/** + * 4-way parallel version of thash; takes 4x as much input and output + */ +#define thashx4_variant(name, inblocks) \ + void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thashx4_##name( \ + unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, \ + const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, \ + const unsigned char *in3, const unsigned char *pub_seed, uint32_t addrx4[4 * 8], \ + const hash_state *state_seeded) { \ + unsigned char buf0[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N]; \ + unsigned char buf1[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N]; \ + unsigned char buf2[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N]; \ + unsigned char buf3[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N]; \ + \ + memcpy(buf0, pub_seed, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); \ + memcpy(buf1, pub_seed, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); \ + memcpy(buf2, pub_seed, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); \ + memcpy(buf3, pub_seed, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); \ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_addr_to_bytes(buf0 + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, addrx4 + 0 * 8); \ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_addr_to_bytes(buf1 + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, addrx4 + 1 * 8); \ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_addr_to_bytes(buf2 + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, addrx4 + 2 * 8); \ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_addr_to_bytes(buf3 + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, addrx4 + 3 * 8); \ + memcpy(buf0 + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_BYTES, in0, (inblocks)*PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); \ + memcpy(buf1 + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_BYTES, in1, (inblocks)*PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); \ + memcpy(buf2 + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_BYTES, in2, (inblocks)*PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); \ + memcpy(buf3 + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_BYTES, in3, (inblocks)*PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); \ + \ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_shake256x4(out0, out1, out2, out3, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, buf0, buf1, buf2, buf3, \ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); \ + \ + /* Avoid unused parameter warning */ \ + (void)state_seeded; \ + } + +thashx4_variant(1, 1) +thashx4_variant(2, 2) +thashx4_variant(WOTS_LEN, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LEN) +thashx4_variant(FORS_TREES, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_TREES) diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/thashx4.h b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/thashx4.h new file mode 100644 index 00000000..38735609 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/thashx4.h @@ -0,0 +1,25 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_THASHX4_H +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_THASHX4_H + +#include + +#include "hash_state.h" + +#define thashx4_header(inblocks) \ + void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thashx4_##inblocks(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *pub_seed, uint32_t addrx4[4*8], \ + const hash_state *state_seeded) + +thashx4_header(1); +thashx4_header(2); +thashx4_header(WOTS_LEN); +thashx4_header(FORS_TREES); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/utils.c b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/utils.c new file mode 100644 index 00000000..11a1d1fa --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, leaf, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, auth_path, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thash_2( + buffer + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); + } else { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, auth_path, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/utils.h b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/utils.h new file mode 100644 index 00000000..43ba1b09 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_UTILS_H +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/utilsx4.c b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/utilsx4.c new file mode 100644 index 00000000..7be40219 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/utilsx4.c @@ -0,0 +1,98 @@ +#include "address.h" +#include "params.h" +#include "thashx4.h" +#include "utils.h" +#include "utilsx4.h" + +#include + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +#define treehashx4_variant(name, tree_height) \ + void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_treehashx4_##name( \ + unsigned char *rootx4, unsigned char *auth_pathx4, const unsigned char *sk_seed, \ + const unsigned char *pub_seed, const uint32_t leaf_idx[4], uint32_t idx_offset[4], \ + void (*gen_leafx4)(unsigned char * /* leaf0 */, unsigned char * /* leaf1 */, \ + unsigned char * /* leaf2 */, unsigned char * /* leaf3 */, \ + const unsigned char * /* sk_seed */, \ + const unsigned char * /* pub_seed */, uint32_t /* addr_idx0 */, \ + uint32_t /* addr_idx1 */, uint32_t /* addr_idx2 */, \ + uint32_t /* addr_idx3 */, const uint32_t[8] /* tree_addr */, \ + const hash_state * /* state_seeded */), \ + uint32_t tree_addrx4[4 * 8], const hash_state *state_seeded) { \ + unsigned char stackx4[4 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N]; \ + unsigned int heights[(tree_height) + 1]; \ + unsigned int offset = 0; \ + uint32_t idx; \ + uint32_t tree_idx; \ + unsigned int j; \ + \ + for (idx = 0; idx < (uint32_t)(1 << (tree_height)); idx++) { \ + /* Add the next leaf node to the stack. */ \ + gen_leafx4(stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, sk_seed, \ + pub_seed, idx + idx_offset[0], idx + idx_offset[1], idx + idx_offset[2], \ + idx + idx_offset[3], tree_addrx4, state_seeded); \ + offset++; \ + heights[offset - 1] = 0; \ + \ + /* If this is a node we need for the auth path.. */ \ + for (j = 0; j < 4; j++) { \ + if ((leaf_idx[j] ^ 0x1) == idx) { \ + memcpy(auth_pathx4 + j * (tree_height)*PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, \ + stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + (offset - 1) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, \ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); \ + } \ + } \ + \ + /* While the top-most nodes are of equal height.. */ \ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { \ + /* Compute index of the new node, in the next layer. */ \ + tree_idx = (idx >> (heights[offset - 1] + 1)); \ + \ + /* Set the address of the node we're creating. */ \ + for (j = 0; j < 4; j++) { \ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_tree_height(tree_addrx4 + j * 8, heights[offset - 1] + 1); \ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_tree_index(tree_addrx4 + j * 8, \ + tree_idx + (idx_offset[j] >> (heights[offset - 1] + 1))); \ + } \ + /* Hash the top-most nodes from the stack together. */ \ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thashx4_2(stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, \ + stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, \ + pub_seed, tree_addrx4, state_seeded); \ + offset--; \ + /* Note that the top-most node is now one layer higher. */ \ + heights[offset - 1]++; \ + \ + /* If this is a node we need for the auth path.. */ \ + for (j = 0; j < 4; j++) { \ + if (((leaf_idx[j] >> heights[offset - 1]) ^ 0x1) == tree_idx) { \ + memcpy(auth_pathx4 + j * (tree_height)*PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + \ + heights[offset - 1] * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, \ + stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N + (offset - 1) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, \ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); \ + } \ + } \ + } \ + } \ + \ + for (j = 0; j < 4; j++) { \ + memcpy(rootx4 + j * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); \ + } \ + } + +treehashx4_variant(FORS_HEIGHT, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_FORS_HEIGHT) diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/utilsx4.h b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/utilsx4.h new file mode 100644 index 00000000..4dcc037e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/utilsx4.h @@ -0,0 +1,38 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_UTILSX4_H +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_UTILSX4_H + +#include "hash_state.h" +#include "params.h" + +#include + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_treehashx4_FORS_HEIGHT(unsigned char *rootx4, + unsigned char *auth_pathx4, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + const uint32_t leaf_idx[4], + uint32_t idx_offset[4], + void (*gen_leafx4)(unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx4[4 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/wots.c b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/wots.c new file mode 100644 index 00000000..1e259d02 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/wots.c @@ -0,0 +1,240 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "hashx4.h" +#include "params.h" +#include "thash.h" +#include "thashx4.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], const hash_state *state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_prf_addr(sk, sk_seed, wots_addr, state_seeded); +} + +/** + * 4-way parallel version of wots_gen_sk; expects 4x as much space in sk + */ +static void wots_gen_skx4(unsigned char *skx4, const unsigned char *sk_seed, + uint32_t wots_addrx4[4 * 8], const hash_state *state_seeded) { + unsigned int j; + + /* Make sure that the hash address is actually zeroed. */ + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_hash_addr(wots_addrx4 + j * 8, 0); + } + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_prf_addrx4(skx4 + 0 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, + skx4 + 1 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, + skx4 + 2 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, + skx4 + 3 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, + sk_seed, wots_addrx4, + state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_W; i++) { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thash_1(out, out, pub_seed, addr, state_seeded); + } +} + +/** + * 4-way parallel version of gen_chain; expects 4x as much space in out, and + * 4x as much space in inx4. Assumes start and step identical across chains. + */ +static void gen_chainx4(unsigned char *outx4, const unsigned char *inx4, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addrx4[4 * 8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + /* Initialize outx4 with the value at position 'start'. */ + memcpy(outx4, inx4, 4 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_W; i++) { + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_hash_addr(addrx4 + j * 8, i); + } + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_thashx4_1(outx4 + 0 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, + outx4 + 1 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, + outx4 + 2 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, + outx4 + 3 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, + outx4 + 0 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, + outx4 + 1 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, + outx4 + 2 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, + outx4 + 3 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, + pub_seed, addrx4, + state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { + int in = 0; + int out = 0; + unsigned char total = 0; + int bits = 0; + int consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LOGW; + output[out] = (unsigned int)(total >> bits) & (PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_W - 1); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_wots_gen_pk(unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + uint32_t addrx4[4 * 8]; + unsigned char pkbuf[4 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N]; + + for (j = 0; j < 4; j++) { + memcpy(addrx4 + j * 8, addr, sizeof(uint32_t) * 8); + } + + /* The last iteration typically does not have complete set of 4 chains, + but because we use pkbuf, this is not an issue -- we still do as many + in parallel as possible. */ + for (i = 0; i < ((PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LEN + 3) & ~0x3); i += 4) { + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_chain_addr(addrx4 + j * 8, i + j); + } + wots_gen_skx4(pkbuf, sk_seed, addrx4, state_seeded); + gen_chainx4(pkbuf, pkbuf, 0, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_W - 1, pub_seed, addrx4, state_seeded); + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LEN) { + memcpy(pk + (i + j)*PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, pkbuf + j * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N); + } + } + } + + // Get rid of unused argument variable. + (void)state_seeded; +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_wots_sign(unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, sk_seed, addr, state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, sig + i * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, 0, lengths[i], pub_seed, addr, state_seeded); + } + + // avoid unused argument + (void)state_seeded; +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_wots_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, sig + i * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_N, + lengths[i], PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_W - 1 - lengths[i], pub_seed, addr, + state_seeded); + } + + // avoid unused argument + (void)state_seeded; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/wots.h b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/wots.h new file mode 100644 index 00000000..afd5712f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/avx2/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_H +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_AVX2_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/LICENSE b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..de8b00bc --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-shake256-128s-simple_clean.lib +OBJECTS=address.obj wots.obj utils.obj fors.obj sign.obj hash_shake256.obj thash_shake256_simple.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/address.c b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/address.c new file mode 100644 index 00000000..35757e54 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/address.h b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/address.h new file mode 100644 index 00000000..d17e9d2f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDRESS_H +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/api.h b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/api.h new file mode 100644 index 00000000..053f985c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_API_H +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_CRYPTO_SECRETKEYBYTES 64 +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_CRYPTO_PUBLICKEYBYTES 32 +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_CRYPTO_BYTES 8080 +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_CRYPTO_SEEDBYTES 48 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/fors.c b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/fors.c new file mode 100644 index 00000000..3a9b69ae --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/fors.c @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "thash.h" +#include "utils.h" + +static void fors_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t fors_leaf_addr[8], const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_prf_addr( + sk, sk_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_thash_1( + leaf, sk, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t fors_tree_addr[8], + const hash_state *hash_state_seeded) { + uint32_t fors_leaf_addr[8] = {0}; + + /* Only copy the parts that must be kept in fors_leaf_addr. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_copy_keypair_addr( + fors_leaf_addr, fors_tree_addr); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_type( + fors_leaf_addr, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_tree_index( + fors_leaf_addr, addr_idx); + + fors_gen_sk(leaf, sk_seed, fors_leaf_addr, hash_state_seeded); + fors_sk_to_leaf(leaf, leaf, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_copy_keypair_addr( + fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_copy_keypair_addr( + fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_type( + fors_tree_addr, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_type( + fors_pk_addr, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_tree_height( + fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_tree_index( + fors_tree_addr, indices[i] + idx_offset); + + /* Include the secret key part that produces the selected leaf node. */ + fors_gen_sk(sig, sk_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N; + + /* Compute the authentication path for this leaf node. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_treehash_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, sig, sk_seed, pub_seed, + indices[i], idx_offset, fors_gen_leaf, fors_tree_addr, + hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_thash_FORS_TREES( + pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_compute_root(roots + i * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, leaf, indices[i], idx_offset, sig, + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_HEIGHT, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/fors.h b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/fors.h new file mode 100644 index 00000000..4a6ce0b8 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_H +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/hash.h b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/hash.h new file mode 100644 index 00000000..30bac947 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_HASH_H +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/hash_shake256.c b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/hash_shake256.c new file mode 100644 index 00000000..7fca240c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/hash_shake256.c @@ -0,0 +1,106 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "fips202.h" + +/* For SHAKE256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, // NOLINT(readability-non-const-parameter) + const unsigned char *pub_seed, const unsigned char *sk_seed) { + (void)hash_state_seeded; /* Suppress an 'unused parameter' warning. */ + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* This is not necessary for SHAKE256, so we don't do anything */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_destroy_hash_function( + hash_state *hash_state_seeded) { // NOLINT(readability-non-const-parameter) + (void)hash_state_seeded; +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N bytes and an address + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, addr); + + shake256(out, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_BYTES); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed and an + * optional randomization value as well as the message. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, sk_prf, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N); + shake256_inc_absorb(&state, optrand, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(R, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, &state); + shake256_inc_ctx_release(&state); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_TREE_BITS (PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_TREE_HEIGHT * (PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_D - 1)) +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_TREE_BYTES ((PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_LEAF_BITS PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_LEAF_BYTES ((PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_DGST_BYTES (PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_TREE_BYTES + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_LEAF_BYTES) + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_DGST_BYTES]; + unsigned char *bufp = buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, R, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N); + shake256_inc_absorb(&state, pk, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_PK_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_DGST_BYTES, &state); + shake256_inc_ctx_release(&state); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/hash_state.h b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/hash_state.h new file mode 100644 index 00000000..7d92ef87 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/hash_state.h @@ -0,0 +1,30 @@ +#ifndef SPX_HASH_STATE_H +#define SPX_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * SHAKE256 does not need this state. Because this implementation is generated + * from a shared code base, we still need to specify some hash_state as it is + * still passed around. We chose to use an `int` as a placeholder for this + * purpose. + */ + +#define hash_state int + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/params.h b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/params.h new file mode 100644 index 00000000..e4d0ec49 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_PARAMS_H +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N 16 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FULL_HEIGHT 64 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_D 8 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_HEIGHT 15 +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_TREES 10 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N / PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_LEN (PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_LEN1 + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_BYTES (PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_PK_BYTES PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_TREE_HEIGHT (PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FULL_HEIGHT / PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_BYTES ((PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_PK_BYTES PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_BYTES (PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_BYTES + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_D * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FULL_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_PK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_SK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/sign.c b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/sign.c new file mode 100644 index 00000000..a02739cf --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_D - 1); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, sk + 3 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N); + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N; + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_BYTES; + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_BYTES; + + if (PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_BYTES, sm + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/thash.h b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/thash.h new file mode 100644 index 00000000..dd570353 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_THASH_H +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/thash_shake256_simple.c b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/thash_shake256_simple.c new file mode 100644 index 00000000..1de75bf8 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/thash_shake256_simple.c @@ -0,0 +1,74 @@ +#include +#include + +#include "address.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" + +#include "fips202.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N bytes. + */ +static void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8]) { + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, addr); + memcpy(buf + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_BYTES, in, inblocks * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N); + + shake256(out, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_thash( + out, buf, in, 1, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_thash( + out, buf, in, 2, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_LEN, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_TREES, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/utils.c b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/utils.c new file mode 100644 index 00000000..da9c0914 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, leaf, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_thash_2( + buffer + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N); + } else { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/utils.h b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/utils.h new file mode 100644 index 00000000..d399d123 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_UTILS_H +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/wots.c b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/wots.c new file mode 100644 index 00000000..7ce0bb89 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/wots.c @@ -0,0 +1,167 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], + const hash_state *hash_state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_prf_addr(sk, sk_seed, wots_addr, hash_state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_W; i++) { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_thash_1( + out, out, pub_seed, addr, hash_state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const size_t out_len, + const unsigned char *input) { + size_t in = 0; + size_t out = 0; + unsigned char total = 0; + unsigned int bits = 0; + size_t consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_LOGW; + output[out] = (unsigned int)((total >> bits) & (PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_W - 1)); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, + const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_ull_to_bytes( + csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(pk + i * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, pk + i * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, + 0, PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_W - 1, pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, 0, lengths[i], pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_N, + lengths[i], PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_W - 1 - lengths[i], pub_seed, addr, + hash_state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/wots.h b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/wots.h new file mode 100644 index 00000000..787b2971 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-128s-simple/clean/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_H +#define PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256128SSIMPLE_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/META.yml b/crypto_sign/sphincs/sphincs-shake256-192f-robust/META.yml new file mode 100644 index 00000000..962c5023 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/META.yml @@ -0,0 +1,35 @@ +name: SPHINCS+ +type: signature +claimed-nist-level: 3 +length-public-key: 48 +length-secret-key: 96 +length-signature: 35664 +testvectors-sha256: de65b2a7b6d5e819f58b6e1a08ec4ef3308a9c36b7c962450105f82263e35e98 +nistkat-sha256: 5cfcf998ad0bedf8e6b961c8891048f456d6422d3b4a26fcb095a913c9efd03e +principal-submitters: + - Andreas Hülsing +auxiliary-submitters: + - Jean-Philippe Aumasson + - Daniel J. Bernstein, + - Christoph Dobraunig + - Maria Eichlseder + - Scott Fluhrer + - Stefan-Lukas Gazdag + - Panos Kampanakis + - Stefan Kölbl + - Tanja Lange + - Martin M. Lauridsen + - Florian Mendel + - Ruben Niederhagen + - Christian Rechberger + - Joost Rijneveld + - Peter Schwabe +implementations: + - name: clean + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + - name: avx2 + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + supported_platforms: + - architecture: x86_64 + required_flags: + - avx2 diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/LICENSE b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/Makefile.Microsoft_nmake new file mode 100644 index 00000000..4b4e5134 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/Makefile.Microsoft_nmake @@ -0,0 +1,27 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-shake256-192f-robust_avx2.lib +OBJECTS=address.obj wots.obj utils.obj utilsx4.obj fips202x4.obj fors.obj sign.obj hash_shake256.obj thash_shake256_robust.obj hash_shake256x4.obj thash_shake256_robustx4.obj + +KECCAK4XDIR=..\..\..\common\keccak4x +KECCAK4XOBJ=KeccakP-1600-times4-SIMD256.obj +KECCAK4X=$(KECCAK4XDIR)\$(KECCAK4XOBJ) + +CFLAGS=/nologo /arch:AVX2 /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) $(KECCAK4X) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +$(KECCAK4X): + cd $(KECCAK4XDIR) && $(MAKE) /f Makefile.Microsoft_nmake $(KECCAK4XOBJ) + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) + -DEL $(KECCAK4X) diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/address.c b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/address.c new file mode 100644 index 00000000..8cf6b151 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/address.h b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/address.h new file mode 100644 index 00000000..f8fb1505 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDRESS_H +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/api.h b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/api.h new file mode 100644 index 00000000..aefa2bb2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_API_H +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_CRYPTO_SECRETKEYBYTES 96 +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES 48 +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_CRYPTO_BYTES 35664 +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_CRYPTO_SEEDBYTES 72 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/fips202x4.c b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/fips202x4.c new file mode 100644 index 00000000..31388004 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/fips202x4.c @@ -0,0 +1,205 @@ +#include +#include +#include + +#include "fips202.h" +#include "fips202x4.h" + +#define NROUNDS 24 +#define ROL(a, offset) (((a) << (offset)) ^ ((a) >> (64-(offset)))) + +static uint64_t load64(const unsigned char *x) { + unsigned long long r = 0, i; + + for (i = 0; i < 8; ++i) { + r |= (unsigned long long)x[i] << 8 * i; + } + return r; +} + +static void store64(uint8_t *x, uint64_t u) { + unsigned int i; + + for (i = 0; i < 8; ++i) { + x[i] = (uint8_t)u; + u >>= 8; + } +} + +/* Use implementation from the Keccak Code Package */ +extern void KeccakP1600times4_PermuteAll_24rounds(__m256i *s); +#define KeccakF1600_StatePermute4x KeccakP1600times4_PermuteAll_24rounds + +static void keccak_absorb4x(__m256i *s, + unsigned int r, + const unsigned char *m0, + const unsigned char *m1, + const unsigned char *m2, + const unsigned char *m3, + size_t mlen, + unsigned char p) { + unsigned char t0[200] = {0}; + unsigned char t1[200] = {0}; + unsigned char t2[200] = {0}; + unsigned char t3[200] = {0}; + + unsigned long long *ss = (unsigned long long *)s; + + + while (mlen >= r) { + for (size_t i = 0; i < r / 8; ++i) { + ss[4 * i + 0] ^= load64(m0 + 8 * i); + ss[4 * i + 1] ^= load64(m1 + 8 * i); + ss[4 * i + 2] ^= load64(m2 + 8 * i); + ss[4 * i + 3] ^= load64(m3 + 8 * i); + } + + KeccakF1600_StatePermute4x(s); + mlen -= r; + m0 += r; + m1 += r; + m2 += r; + m3 += r; + } + + memcpy(t0, m0, mlen); + memcpy(t1, m1, mlen); + memcpy(t2, m2, mlen); + memcpy(t3, m3, mlen); + + t0[mlen] = p; + t1[mlen] = p; + t2[mlen] = p; + t3[mlen] = p; + + t0[r - 1] |= 128; + t1[r - 1] |= 128; + t2[r - 1] |= 128; + t3[r - 1] |= 128; + + for (size_t i = 0; i < r / 8; ++i) { + ss[4 * i + 0] ^= load64(t0 + 8 * i); + ss[4 * i + 1] ^= load64(t1 + 8 * i); + ss[4 * i + 2] ^= load64(t2 + 8 * i); + ss[4 * i + 3] ^= load64(t3 + 8 * i); + } +} + + +static void keccak_squeezeblocks4x(unsigned char *h0, + unsigned char *h1, + unsigned char *h2, + unsigned char *h3, + unsigned long long int nblocks, + __m256i *s, + unsigned int r) { + unsigned int i; + + unsigned long long *ss = (unsigned long long *)s; + + while (nblocks > 0) { + KeccakF1600_StatePermute4x(s); + for (i = 0; i < (r >> 3); i++) { + store64(h0 + 8 * i, ss[4 * i + 0]); + store64(h1 + 8 * i, ss[4 * i + 1]); + store64(h2 + 8 * i, ss[4 * i + 2]); + store64(h3 + 8 * i, ss[4 * i + 3]); + } + h0 += r; + h1 += r; + h2 += r; + h3 += r; + nblocks--; + } +} + + + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_shake128x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen) { + __m256i s[25]; + unsigned char t0[SHAKE128_RATE]; + unsigned char t1[SHAKE128_RATE]; + unsigned char t2[SHAKE128_RATE]; + unsigned char t3[SHAKE128_RATE]; + unsigned int i; + + /* zero state */ + for (i = 0; i < 25; i++) { + s[i] = _mm256_xor_si256(s[i], s[i]); + } + + /* absorb 4 message of identical length in parallel */ + keccak_absorb4x(s, SHAKE128_RATE, in0, in1, in2, in3, (size_t)inlen, 0x1F); + + /* Squeeze output */ + keccak_squeezeblocks4x(out0, out1, out2, out3, outlen / SHAKE128_RATE, s, SHAKE128_RATE); + + out0 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out1 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out2 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out3 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + + if (outlen % SHAKE128_RATE) { + keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE128_RATE); + for (i = 0; i < outlen % SHAKE128_RATE; i++) { + out0[i] = t0[i]; + out1[i] = t1[i]; + out2[i] = t2[i]; + out3[i] = t3[i]; + } + } +} + + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_shake256x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen) { + __m256i s[25]; + unsigned char t0[SHAKE256_RATE]; + unsigned char t1[SHAKE256_RATE]; + unsigned char t2[SHAKE256_RATE]; + unsigned char t3[SHAKE256_RATE]; + unsigned int i; + + /* zero state */ + for (i = 0; i < 25; i++) { + s[i] = _mm256_xor_si256(s[i], s[i]); + } + + /* absorb 4 message of identical length in parallel */ + keccak_absorb4x(s, SHAKE256_RATE, in0, in1, in2, in3, (size_t)inlen, 0x1F); + + /* Squeeze output */ + keccak_squeezeblocks4x(out0, out1, out2, out3, outlen / SHAKE256_RATE, s, SHAKE256_RATE); + + out0 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out1 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out2 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out3 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + + if (outlen % SHAKE256_RATE) { + keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE256_RATE); + for (i = 0; i < outlen % SHAKE256_RATE; i++) { + out0[i] = t0[i]; + out1[i] = t1[i]; + out2[i] = t2[i]; + out3[i] = t3[i]; + } + } +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/fips202x4.h b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/fips202x4.h new file mode 100644 index 00000000..936fa6f0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/fips202x4.h @@ -0,0 +1,27 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FIPS202X4_H +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FIPS202X4_H + +#include + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_shake128x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, unsigned long long inlen); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_shake256x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/fors.c b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/fors.c new file mode 100644 index 00000000..0dd32a42 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/fors.c @@ -0,0 +1,206 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hashx4.h" +#include "thash.h" +#include "thashx4.h" +#include "utils.h" +#include "utilsx4.h" + +static void fors_gen_skx4(unsigned char *sk0, + unsigned char *sk1, + unsigned char *sk2, + unsigned char *sk3, const unsigned char *sk_seed, + uint32_t fors_leaf_addrx4[4 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_prf_addrx4(sk0, sk1, sk2, sk3, sk_seed, fors_leaf_addrx4, state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thash_1(leaf, sk, pub_seed, fors_leaf_addr, state_seeded); +} + +static void fors_sk_to_leafx4(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + const unsigned char *sk0, + const unsigned char *sk1, + const unsigned char *sk2, + const unsigned char *sk3, + const unsigned char *pub_seed, + uint32_t fors_leaf_addrx4[4 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thashx4_1(leaf0, leaf1, leaf2, leaf3, + sk0, sk1, sk2, sk3, pub_seed, fors_leaf_addrx4, state_seeded); +} + +static void fors_gen_leafx4(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx0, + uint32_t addr_idx1, + uint32_t addr_idx2, + uint32_t addr_idx3, + const uint32_t fors_tree_addr[8], + const hash_state *state_seeded) { + uint32_t fors_leaf_addrx4[4 * 8] = {0}; + unsigned int j; + + /* Only copy the parts that must be kept in fors_leaf_addrx4. */ + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_copy_keypair_addr(fors_leaf_addrx4 + j * 8, fors_tree_addr); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_type(fors_leaf_addrx4 + j * 8, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_tree_index(fors_leaf_addrx4 + 0 * 8, addr_idx0); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_tree_index(fors_leaf_addrx4 + 1 * 8, addr_idx1); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_tree_index(fors_leaf_addrx4 + 2 * 8, addr_idx2); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_tree_index(fors_leaf_addrx4 + 3 * 8, addr_idx3); + + fors_gen_skx4(leaf0, leaf1, leaf2, leaf3, sk_seed, fors_leaf_addrx4, state_seeded); + fors_sk_to_leafx4(leaf0, leaf1, leaf2, leaf3, + leaf0, leaf1, leaf2, leaf3, pub_seed, fors_leaf_addrx4, state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_fors_sign(unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *state_seeded) { + /* Round up to multiple of 4 to prevent out-of-bounds for x4 parallelism */ + uint32_t indices[(PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_TREES + 3) & ~3] = {0}; + unsigned char roots[((PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_TREES + 3) & ~3) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N]; + /* Sign to a buffer, since we may not have a nice multiple of 4 and would + otherwise overrun the signature. */ + unsigned char sigbufx4[4 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N * (1 + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_HEIGHT)]; + uint32_t fors_tree_addrx4[4 * 8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset[4] = {0}; + unsigned int i, j; + + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_copy_keypair_addr(fors_tree_addrx4 + j * 8, fors_addr); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_type(fors_tree_addrx4 + j * 8, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < ((PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_TREES + 3) & ~0x3); i += 4) { + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_TREES) { + idx_offset[j] = (i + j) * (1 << PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_tree_height(fors_tree_addrx4 + j * 8, 0); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_tree_index(fors_tree_addrx4 + j * 8, + indices[i + j] + idx_offset[j]); + } + } + + /* Include the secret key part that produces the selected leaf nodes. */ + fors_gen_skx4(sigbufx4 + 0 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, + sigbufx4 + 1 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, + sigbufx4 + 2 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, + sigbufx4 + 3 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, + sk_seed, fors_tree_addrx4, state_seeded); + + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_treehashx4_FORS_HEIGHT(roots + i * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, sigbufx4 + 4 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, sk_seed, pub_seed, + &indices[i], idx_offset, fors_gen_leafx4, fors_tree_addrx4, + state_seeded); + + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_TREES) { + memcpy(sig, sigbufx4 + j * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); + memcpy(sig + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, + sigbufx4 + 4 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + j * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_HEIGHT, + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_HEIGHT); + sig += PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N * (1 + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_HEIGHT); + } + } + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_fors_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_compute_root(roots + i * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, leaf, indices[i], idx_offset, + sig, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_HEIGHT, pub_seed, fors_tree_addr, + state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/fors.h b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/fors.h new file mode 100644 index 00000000..3463a5a5 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_H +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/hash.h b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/hash.h new file mode 100644 index 00000000..324ea40a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_HASH_H +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/hash_shake256.c b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/hash_shake256.c new file mode 100644 index 00000000..08cc063e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/hash_shake256.c @@ -0,0 +1,106 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "fips202.h" + +/* For SHAKE256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, // NOLINT(readability-non-const-parameter) + const unsigned char *pub_seed, const unsigned char *sk_seed) { + (void)hash_state_seeded; /* Suppress an 'unused parameter' warning. */ + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* This is not necessary for SHAKE256, so we don't do anything */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_destroy_hash_function( + hash_state *hash_state_seeded) { // NOLINT(readability-non-const-parameter) + (void)hash_state_seeded; +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N bytes and an address + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, addr); + + shake256(out, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed and an + * optional randomization value as well as the message. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, sk_prf, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); + shake256_inc_absorb(&state, optrand, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(R, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, &state); + shake256_inc_ctx_release(&state); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_TREE_BITS (PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_TREE_HEIGHT * (PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_D - 1)) +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_TREE_BYTES ((PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_LEAF_BITS PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_LEAF_BYTES ((PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_DGST_BYTES (PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_TREE_BYTES + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_LEAF_BYTES) + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_DGST_BYTES]; + unsigned char *bufp = buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, R, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); + shake256_inc_absorb(&state, pk, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_PK_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_DGST_BYTES, &state); + shake256_inc_ctx_release(&state); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/hash_shake256x4.c b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/hash_shake256x4.c new file mode 100644 index 00000000..c780e4bb --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/hash_shake256x4.c @@ -0,0 +1,38 @@ +#include +#include + +#include "address.h" +#include "fips202x4.h" +#include "hashx4.h" +#include "params.h" + +/* + * 4-way parallel version of prf_addr; takes 4x as much input and output + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_prf_addrx4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + const unsigned char *key, + const uint32_t addrx4[4 * 8], + const hash_state *state_seeded) { + unsigned char bufx4[4 * (PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES)]; + unsigned int j; + + for (j = 0; j < 4; j++) { + memcpy(bufx4 + j * (PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES), key, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_addr_to_bytes(bufx4 + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + j * (PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES), addrx4 + j * 8); + } + + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_shake256x4(out0, + out1, + out2, + out3, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, + bufx4 + 0 * (PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES), + bufx4 + 1 * (PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES), + bufx4 + 2 * (PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES), + bufx4 + 3 * (PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES), PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES); + + /* Avoid unused parameter warning */ + (void)state_seeded; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/hash_state.h b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/hash_state.h new file mode 100644 index 00000000..7e4e5b29 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/hash_state.h @@ -0,0 +1,30 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_HASH_STATE_H +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * SHAKE256 does not need this state. Because this implementation is generated + * from a shared code base, we still need to specify some hash_state as it is + * still passed around. We chose to use an `int` as a placeholder for this + * purpose. + */ + +typedef int hash_state; + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/hashx4.h b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/hashx4.h new file mode 100644 index 00000000..67770980 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/hashx4.h @@ -0,0 +1,16 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_HASHX4_H +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_HASHX4_H + +#include + +#include "hash_state.h" + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_prf_addrx4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + const unsigned char *key, + const uint32_t addrx4[4 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/params.h b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/params.h new file mode 100644 index 00000000..2f8324ab --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_PARAMS_H +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N 24 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FULL_HEIGHT 66 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_D 22 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_HEIGHT 8 +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_TREES 33 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N / PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LEN (PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LEN1 + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_BYTES (PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_PK_BYTES PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_TREE_HEIGHT (PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FULL_HEIGHT / PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_BYTES ((PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_PK_BYTES PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_BYTES (PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_BYTES + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_D * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FULL_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_PK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_SK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/sign.c b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/sign.c new file mode 100644 index 00000000..d3a805fd --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/sign.c @@ -0,0 +1,409 @@ +#include +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_D - 1); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, sk + 3 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); + + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + + // guarantee alignment of pk + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES / 16]; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + + // guarantee alignment of sk + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_CRYPTO_SECRETKEYBYTES / 16]; + uint8_t sk[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_CRYPTO_SECRETKEYBYTES]; + } aligned_sk; + + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_CRYPTO_SEEDBYTES / 16]; + uint8_t seed[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_CRYPTO_SEEDBYTES]; + } aligned_seed; + randombytes(aligned_seed.seed, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_CRYPTO_SEEDBYTES); + + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_crypto_sign_seed_keypair( + aligned_pk.pk, aligned_sk.sk, aligned_seed.seed); + memcpy(pk, aligned_pk.pk, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES); + memcpy(sk, aligned_sk.sk, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_CRYPTO_SECRETKEYBYTES); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + // guarantee alignment of sk + union { + __m128 *_x; + uint8_t sk[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_CRYPTO_SECRETKEYBYTES]; + } aligned_sk; + memcpy(aligned_sk.sk, sk, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_CRYPTO_SECRETKEYBYTES); + sk = aligned_sk.sk; + + // guarantee alignment of sig + union { + __m128 *_x; + uint8_t sig[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_BYTES]; + } aligned_sig; + uint8_t *orig_sig = sig; + sig = (uint8_t *)aligned_sig.sig; + + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N; + + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_D; i++) { + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_TREE_HEIGHT; + } + + memcpy(orig_sig, aligned_sig.sig, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_BYTES); + *siglen = PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_BYTES; + + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + // guarantee alignment of pk + union { + __m128 *_x; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + memcpy(aligned_pk.pk, pk, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES); + pk = aligned_pk.pk; + + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_D; i++) { + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_TREE_HEIGHT; + } + + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + + // guarantee alignment of pk + union { + __m128 *_x; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + memcpy(aligned_pk.pk, pk, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES); + pk = aligned_pk.pk; + + + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_BYTES; + + if (PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_BYTES, sm + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/thash.h b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/thash.h new file mode 100644 index 00000000..785271c9 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_THASH_H +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/thash_shake256_robust.c b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/thash_shake256_robust.c new file mode 100644 index 00000000..b7c03603 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/thash_shake256_robust.c @@ -0,0 +1,81 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "fips202.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N bytes. + */ +static void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8]) { + + unsigned char *bitmask = buf + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES; + unsigned int i; + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, addr); + + shake256(bitmask, inblocks * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N; i++) { + buf[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES + i] = in[i] ^ bitmask[i]; + } + + shake256(out, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thash( + out, buf, in, 1, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thash( + out, buf, in, 2, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LEN, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_TREES, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/thash_shake256_robustx4.c b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/thash_shake256_robustx4.c new file mode 100644 index 00000000..7e6614c9 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/thash_shake256_robustx4.c @@ -0,0 +1,66 @@ +#include +#include + +#include "thashx4.h" +#include "address.h" +#include "params.h" + +#include "fips202x4.h" + +/** + * 4-way parallel version of thash; takes 4x as much input and output + */ +#define thash_variant(name, inblocks) \ + void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thashx4_##name(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *pub_seed, uint32_t addrx4[4*8], \ + const hash_state *state_seeded) \ + { \ + unsigned char buf0[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N]; \ + unsigned char buf1[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N]; \ + unsigned char buf2[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N]; \ + unsigned char buf3[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N]; \ + unsigned char bitmask0[(inblocks) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N]; \ + unsigned char bitmask1[(inblocks) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N]; \ + unsigned char bitmask2[(inblocks) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N]; \ + unsigned char bitmask3[(inblocks) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N]; \ + unsigned int i; \ + \ + memcpy(buf0, pub_seed, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); \ + memcpy(buf1, pub_seed, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); \ + memcpy(buf2, pub_seed, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); \ + memcpy(buf3, pub_seed, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); \ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_addr_to_bytes(buf0 + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, addrx4 + 0*8); \ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_addr_to_bytes(buf1 + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, addrx4 + 1*8); \ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_addr_to_bytes(buf2 + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, addrx4 + 2*8); \ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_addr_to_bytes(buf3 + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, addrx4 + 3*8); \ + \ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_shake256x4(bitmask0, bitmask1, bitmask2, bitmask3, (inblocks) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, \ + buf0, buf1, buf2, buf3, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES); \ + \ + for (i = 0; i < (inblocks) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N; i++) { \ + buf0[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES + i] = in0[i] ^ bitmask0[i]; \ + buf1[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES + i] = in1[i] ^ bitmask1[i]; \ + buf2[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES + i] = in2[i] ^ bitmask2[i]; \ + buf3[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES + i] = in3[i] ^ bitmask3[i]; \ + } \ + \ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_shake256x4( \ + out0, out1, out2, out3, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, \ + buf0, buf1, buf2, buf3, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); \ + \ + /* avoid unused parameter warning */ \ + (void)state_seeded; \ + } + + +thash_variant(1, 1) +thash_variant(2, 2) +thash_variant(WOTS_LEN, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LEN) +thash_variant(FORS_TREES, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_TREES) diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/thashx4.h b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/thashx4.h new file mode 100644 index 00000000..d302a3fe --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/thashx4.h @@ -0,0 +1,25 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_THASHX4_H +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_THASHX4_H + +#include + +#include "hash_state.h" + +#define thashx4_header(inblocks) \ + void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thashx4_##inblocks(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *pub_seed, uint32_t addrx4[4*8], \ + const hash_state *state_seeded) + +thashx4_header(1); +thashx4_header(2); +thashx4_header(WOTS_LEN); +thashx4_header(FORS_TREES); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/utils.c b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/utils.c new file mode 100644 index 00000000..0a79e8f3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, leaf, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, auth_path, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thash_2( + buffer + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); + } else { + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, auth_path, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/utils.h b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/utils.h new file mode 100644 index 00000000..86138108 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_UTILS_H +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/utilsx4.c b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/utilsx4.c new file mode 100644 index 00000000..525fbe66 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/utilsx4.c @@ -0,0 +1,98 @@ +#include "address.h" +#include "params.h" +#include "thashx4.h" +#include "utils.h" +#include "utilsx4.h" + +#include + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +#define treehashx4_variant(name, tree_height) \ + void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_treehashx4_##name( \ + unsigned char *rootx4, unsigned char *auth_pathx4, const unsigned char *sk_seed, \ + const unsigned char *pub_seed, const uint32_t leaf_idx[4], uint32_t idx_offset[4], \ + void (*gen_leafx4)(unsigned char * /* leaf0 */, unsigned char * /* leaf1 */, \ + unsigned char * /* leaf2 */, unsigned char * /* leaf3 */, \ + const unsigned char * /* sk_seed */, \ + const unsigned char * /* pub_seed */, uint32_t /* addr_idx0 */, \ + uint32_t /* addr_idx1 */, uint32_t /* addr_idx2 */, \ + uint32_t /* addr_idx3 */, const uint32_t[8] /* tree_addr */, \ + const hash_state * /* state_seeded */), \ + uint32_t tree_addrx4[4 * 8], const hash_state *state_seeded) { \ + unsigned char stackx4[4 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N]; \ + unsigned int heights[(tree_height) + 1]; \ + unsigned int offset = 0; \ + uint32_t idx; \ + uint32_t tree_idx; \ + unsigned int j; \ + \ + for (idx = 0; idx < (uint32_t)(1 << (tree_height)); idx++) { \ + /* Add the next leaf node to the stack. */ \ + gen_leafx4(stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, sk_seed, \ + pub_seed, idx + idx_offset[0], idx + idx_offset[1], idx + idx_offset[2], \ + idx + idx_offset[3], tree_addrx4, state_seeded); \ + offset++; \ + heights[offset - 1] = 0; \ + \ + /* If this is a node we need for the auth path.. */ \ + for (j = 0; j < 4; j++) { \ + if ((leaf_idx[j] ^ 0x1) == idx) { \ + memcpy(auth_pathx4 + j * (tree_height)*PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, \ + stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + (offset - 1) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, \ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); \ + } \ + } \ + \ + /* While the top-most nodes are of equal height.. */ \ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { \ + /* Compute index of the new node, in the next layer. */ \ + tree_idx = (idx >> (heights[offset - 1] + 1)); \ + \ + /* Set the address of the node we're creating. */ \ + for (j = 0; j < 4; j++) { \ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_tree_height(tree_addrx4 + j * 8, heights[offset - 1] + 1); \ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_tree_index(tree_addrx4 + j * 8, \ + tree_idx + (idx_offset[j] >> (heights[offset - 1] + 1))); \ + } \ + /* Hash the top-most nodes from the stack together. */ \ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thashx4_2(stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, \ + stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, \ + pub_seed, tree_addrx4, state_seeded); \ + offset--; \ + /* Note that the top-most node is now one layer higher. */ \ + heights[offset - 1]++; \ + \ + /* If this is a node we need for the auth path.. */ \ + for (j = 0; j < 4; j++) { \ + if (((leaf_idx[j] >> heights[offset - 1]) ^ 0x1) == tree_idx) { \ + memcpy(auth_pathx4 + j * (tree_height)*PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + \ + heights[offset - 1] * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, \ + stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N + (offset - 1) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, \ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); \ + } \ + } \ + } \ + } \ + \ + for (j = 0; j < 4; j++) { \ + memcpy(rootx4 + j * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); \ + } \ + } + +treehashx4_variant(FORS_HEIGHT, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_FORS_HEIGHT) diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/utilsx4.h b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/utilsx4.h new file mode 100644 index 00000000..4de0428e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/utilsx4.h @@ -0,0 +1,38 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_UTILSX4_H +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_UTILSX4_H + +#include "hash_state.h" +#include "params.h" + +#include + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_treehashx4_FORS_HEIGHT(unsigned char *rootx4, + unsigned char *auth_pathx4, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + const uint32_t leaf_idx[4], + uint32_t idx_offset[4], + void (*gen_leafx4)(unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx4[4 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/wots.c b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/wots.c new file mode 100644 index 00000000..67a431fb --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/wots.c @@ -0,0 +1,240 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "hashx4.h" +#include "params.h" +#include "thash.h" +#include "thashx4.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], const hash_state *state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_prf_addr(sk, sk_seed, wots_addr, state_seeded); +} + +/** + * 4-way parallel version of wots_gen_sk; expects 4x as much space in sk + */ +static void wots_gen_skx4(unsigned char *skx4, const unsigned char *sk_seed, + uint32_t wots_addrx4[4 * 8], const hash_state *state_seeded) { + unsigned int j; + + /* Make sure that the hash address is actually zeroed. */ + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_hash_addr(wots_addrx4 + j * 8, 0); + } + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_prf_addrx4(skx4 + 0 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, + skx4 + 1 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, + skx4 + 2 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, + skx4 + 3 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, + sk_seed, wots_addrx4, + state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_W; i++) { + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thash_1(out, out, pub_seed, addr, state_seeded); + } +} + +/** + * 4-way parallel version of gen_chain; expects 4x as much space in out, and + * 4x as much space in inx4. Assumes start and step identical across chains. + */ +static void gen_chainx4(unsigned char *outx4, const unsigned char *inx4, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addrx4[4 * 8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + /* Initialize outx4 with the value at position 'start'. */ + memcpy(outx4, inx4, 4 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_W; i++) { + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_hash_addr(addrx4 + j * 8, i); + } + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_thashx4_1(outx4 + 0 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, + outx4 + 1 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, + outx4 + 2 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, + outx4 + 3 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, + outx4 + 0 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, + outx4 + 1 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, + outx4 + 2 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, + outx4 + 3 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, + pub_seed, addrx4, + state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { + int in = 0; + int out = 0; + unsigned char total = 0; + int bits = 0; + int consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LOGW; + output[out] = (unsigned int)(total >> bits) & (PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_W - 1); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_wots_gen_pk(unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + uint32_t addrx4[4 * 8]; + unsigned char pkbuf[4 * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N]; + + for (j = 0; j < 4; j++) { + memcpy(addrx4 + j * 8, addr, sizeof(uint32_t) * 8); + } + + /* The last iteration typically does not have complete set of 4 chains, + but because we use pkbuf, this is not an issue -- we still do as many + in parallel as possible. */ + for (i = 0; i < ((PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LEN + 3) & ~0x3); i += 4) { + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_chain_addr(addrx4 + j * 8, i + j); + } + wots_gen_skx4(pkbuf, sk_seed, addrx4, state_seeded); + gen_chainx4(pkbuf, pkbuf, 0, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_W - 1, pub_seed, addrx4, state_seeded); + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LEN) { + memcpy(pk + (i + j)*PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, pkbuf + j * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N); + } + } + } + + // Get rid of unused argument variable. + (void)state_seeded; +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_wots_sign(unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, sk_seed, addr, state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, sig + i * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, 0, lengths[i], pub_seed, addr, state_seeded); + } + + // avoid unused argument + (void)state_seeded; +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_wots_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, sig + i * PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_N, + lengths[i], PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_W - 1 - lengths[i], pub_seed, addr, + state_seeded); + } + + // avoid unused argument + (void)state_seeded; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/wots.h b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/wots.h new file mode 100644 index 00000000..d7045976 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/avx2/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_H +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_AVX2_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/LICENSE b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..77185f6d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-shake256-192f-robust_clean.lib +OBJECTS=address.obj wots.obj utils.obj fors.obj sign.obj hash_shake256.obj thash_shake256_robust.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/address.c b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/address.c new file mode 100644 index 00000000..324dd196 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/address.h b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/address.h new file mode 100644 index 00000000..36da1aa6 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDRESS_H +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/api.h b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/api.h new file mode 100644 index 00000000..a749a41d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_API_H +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_CRYPTO_SECRETKEYBYTES 96 +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_CRYPTO_PUBLICKEYBYTES 48 +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_CRYPTO_BYTES 35664 +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_CRYPTO_SEEDBYTES 72 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/fors.c b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/fors.c new file mode 100644 index 00000000..b6011eee --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/fors.c @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "thash.h" +#include "utils.h" + +static void fors_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t fors_leaf_addr[8], const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_prf_addr( + sk, sk_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_thash_1( + leaf, sk, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t fors_tree_addr[8], + const hash_state *hash_state_seeded) { + uint32_t fors_leaf_addr[8] = {0}; + + /* Only copy the parts that must be kept in fors_leaf_addr. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_copy_keypair_addr( + fors_leaf_addr, fors_tree_addr); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_type( + fors_leaf_addr, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_tree_index( + fors_leaf_addr, addr_idx); + + fors_gen_sk(leaf, sk_seed, fors_leaf_addr, hash_state_seeded); + fors_sk_to_leaf(leaf, leaf, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_copy_keypair_addr( + fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_copy_keypair_addr( + fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_type( + fors_tree_addr, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_type( + fors_pk_addr, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_tree_height( + fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_tree_index( + fors_tree_addr, indices[i] + idx_offset); + + /* Include the secret key part that produces the selected leaf node. */ + fors_gen_sk(sig, sk_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N; + + /* Compute the authentication path for this leaf node. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_treehash_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, sig, sk_seed, pub_seed, + indices[i], idx_offset, fors_gen_leaf, fors_tree_addr, + hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_thash_FORS_TREES( + pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_compute_root(roots + i * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, leaf, indices[i], idx_offset, sig, + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_HEIGHT, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/fors.h b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/fors.h new file mode 100644 index 00000000..dcad3583 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_H +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/hash.h b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/hash.h new file mode 100644 index 00000000..34944417 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_HASH_H +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/hash_shake256.c b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/hash_shake256.c new file mode 100644 index 00000000..08e93bf6 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/hash_shake256.c @@ -0,0 +1,106 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "fips202.h" + +/* For SHAKE256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, // NOLINT(readability-non-const-parameter) + const unsigned char *pub_seed, const unsigned char *sk_seed) { + (void)hash_state_seeded; /* Suppress an 'unused parameter' warning. */ + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* This is not necessary for SHAKE256, so we don't do anything */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_destroy_hash_function( + hash_state *hash_state_seeded) { // NOLINT(readability-non-const-parameter) + (void)hash_state_seeded; +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N bytes and an address + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, addr); + + shake256(out, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_BYTES); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed and an + * optional randomization value as well as the message. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, sk_prf, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N); + shake256_inc_absorb(&state, optrand, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(R, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, &state); + shake256_inc_ctx_release(&state); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_TREE_BITS (PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_TREE_HEIGHT * (PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_D - 1)) +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_TREE_BYTES ((PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_LEAF_BITS PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_LEAF_BYTES ((PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_DGST_BYTES (PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_TREE_BYTES + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_LEAF_BYTES) + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_DGST_BYTES]; + unsigned char *bufp = buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, R, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N); + shake256_inc_absorb(&state, pk, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_PK_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_DGST_BYTES, &state); + shake256_inc_ctx_release(&state); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/hash_state.h b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/hash_state.h new file mode 100644 index 00000000..7d92ef87 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/hash_state.h @@ -0,0 +1,30 @@ +#ifndef SPX_HASH_STATE_H +#define SPX_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * SHAKE256 does not need this state. Because this implementation is generated + * from a shared code base, we still need to specify some hash_state as it is + * still passed around. We chose to use an `int` as a placeholder for this + * purpose. + */ + +#define hash_state int + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/params.h b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/params.h new file mode 100644 index 00000000..9c626a1d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_PARAMS_H +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N 24 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FULL_HEIGHT 66 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_D 22 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_HEIGHT 8 +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_TREES 33 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N / PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_LEN (PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_LEN1 + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_BYTES (PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_PK_BYTES PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_TREE_HEIGHT (PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FULL_HEIGHT / PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_BYTES ((PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_PK_BYTES PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_BYTES (PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_BYTES + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_D * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FULL_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_PK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_SK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/sign.c b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/sign.c new file mode 100644 index 00000000..d5c8c200 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_D - 1); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, sk + 3 * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N); + + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N; + + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_BYTES; + + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_BYTES; + + if (PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_BYTES, sm + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/thash.h b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/thash.h new file mode 100644 index 00000000..b2025e41 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_THASH_H +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/thash_shake256_robust.c b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/thash_shake256_robust.c new file mode 100644 index 00000000..c8694fa5 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/thash_shake256_robust.c @@ -0,0 +1,81 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "fips202.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N bytes. + */ +static void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8]) { + + unsigned char *bitmask = buf + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_BYTES; + unsigned int i; + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, addr); + + shake256(bitmask, inblocks * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_BYTES); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N; i++) { + buf[PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_BYTES + i] = in[i] ^ bitmask[i]; + } + + shake256(out, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_thash( + out, buf, in, 1, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_thash( + out, buf, in, 2, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_LEN, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_TREES, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/utils.c b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/utils.c new file mode 100644 index 00000000..2f1318c9 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, leaf, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_thash_2( + buffer + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N); + } else { + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/utils.h b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/utils.h new file mode 100644 index 00000000..2b6363a4 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_UTILS_H +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/wots.c b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/wots.c new file mode 100644 index 00000000..9307bdea --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/wots.c @@ -0,0 +1,167 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], + const hash_state *hash_state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_prf_addr(sk, sk_seed, wots_addr, hash_state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_W; i++) { + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_thash_1( + out, out, pub_seed, addr, hash_state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const size_t out_len, + const unsigned char *input) { + size_t in = 0; + size_t out = 0; + unsigned char total = 0; + unsigned int bits = 0; + size_t consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_LOGW; + output[out] = (unsigned int)((total >> bits) & (PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_W - 1)); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, + const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_ull_to_bytes( + csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(pk + i * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, pk + i * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, + 0, PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_W - 1, pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, 0, lengths[i], pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_N, + lengths[i], PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_W - 1 - lengths[i], pub_seed, addr, + hash_state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/wots.h b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/wots.h new file mode 100644 index 00000000..572232ad --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-robust/clean/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_H +#define PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192FROBUST_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/META.yml b/crypto_sign/sphincs/sphincs-shake256-192f-simple/META.yml new file mode 100644 index 00000000..0bd18762 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/META.yml @@ -0,0 +1,35 @@ +name: SPHINCS+ +type: signature +claimed-nist-level: 3 +length-public-key: 48 +length-secret-key: 96 +length-signature: 35664 +testvectors-sha256: 14f60a3099cfddf30c46491a98a5f3508739df108425b2eaa5c19383f0ca4b22 +nistkat-sha256: 28528adef75a728d013bb493d85e358a75344c72000792419f1f539c16f24f10 +principal-submitters: + - Andreas Hülsing +auxiliary-submitters: + - Jean-Philippe Aumasson + - Daniel J. Bernstein, + - Christoph Dobraunig + - Maria Eichlseder + - Scott Fluhrer + - Stefan-Lukas Gazdag + - Panos Kampanakis + - Stefan Kölbl + - Tanja Lange + - Martin M. Lauridsen + - Florian Mendel + - Ruben Niederhagen + - Christian Rechberger + - Joost Rijneveld + - Peter Schwabe +implementations: + - name: clean + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + - name: avx2 + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + supported_platforms: + - architecture: x86_64 + required_flags: + - avx2 diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/LICENSE b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/Makefile.Microsoft_nmake new file mode 100644 index 00000000..2b992f30 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/Makefile.Microsoft_nmake @@ -0,0 +1,27 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-shake256-192f-simple_avx2.lib +OBJECTS=address.obj wots.obj utils.obj utilsx4.obj fips202x4.obj fors.obj sign.obj hash_shake256.obj thash_shake256_simple.obj hash_shake256x4.obj thash_shake256_simplex4.obj + +KECCAK4XDIR=..\..\..\common\keccak4x +KECCAK4XOBJ=KeccakP-1600-times4-SIMD256.obj +KECCAK4X=$(KECCAK4XDIR)\$(KECCAK4XOBJ) + +CFLAGS=/nologo /arch:AVX2 /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) $(KECCAK4X) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +$(KECCAK4X): + cd $(KECCAK4XDIR) && $(MAKE) /f Makefile.Microsoft_nmake $(KECCAK4XOBJ) + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) + -DEL $(KECCAK4X) diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/address.c b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/address.c new file mode 100644 index 00000000..b8ee1f9b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/address.h b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/address.h new file mode 100644 index 00000000..2c4ef856 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDRESS_H +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/api.h b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/api.h new file mode 100644 index 00000000..7d132630 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_API_H +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES 96 +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES 48 +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_CRYPTO_BYTES 35664 +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_CRYPTO_SEEDBYTES 72 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/fips202x4.c b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/fips202x4.c new file mode 100644 index 00000000..01dbf7da --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/fips202x4.c @@ -0,0 +1,205 @@ +#include +#include +#include + +#include "fips202.h" +#include "fips202x4.h" + +#define NROUNDS 24 +#define ROL(a, offset) (((a) << (offset)) ^ ((a) >> (64-(offset)))) + +static uint64_t load64(const unsigned char *x) { + unsigned long long r = 0, i; + + for (i = 0; i < 8; ++i) { + r |= (unsigned long long)x[i] << 8 * i; + } + return r; +} + +static void store64(uint8_t *x, uint64_t u) { + unsigned int i; + + for (i = 0; i < 8; ++i) { + x[i] = (uint8_t)u; + u >>= 8; + } +} + +/* Use implementation from the Keccak Code Package */ +extern void KeccakP1600times4_PermuteAll_24rounds(__m256i *s); +#define KeccakF1600_StatePermute4x KeccakP1600times4_PermuteAll_24rounds + +static void keccak_absorb4x(__m256i *s, + unsigned int r, + const unsigned char *m0, + const unsigned char *m1, + const unsigned char *m2, + const unsigned char *m3, + size_t mlen, + unsigned char p) { + unsigned char t0[200] = {0}; + unsigned char t1[200] = {0}; + unsigned char t2[200] = {0}; + unsigned char t3[200] = {0}; + + unsigned long long *ss = (unsigned long long *)s; + + + while (mlen >= r) { + for (size_t i = 0; i < r / 8; ++i) { + ss[4 * i + 0] ^= load64(m0 + 8 * i); + ss[4 * i + 1] ^= load64(m1 + 8 * i); + ss[4 * i + 2] ^= load64(m2 + 8 * i); + ss[4 * i + 3] ^= load64(m3 + 8 * i); + } + + KeccakF1600_StatePermute4x(s); + mlen -= r; + m0 += r; + m1 += r; + m2 += r; + m3 += r; + } + + memcpy(t0, m0, mlen); + memcpy(t1, m1, mlen); + memcpy(t2, m2, mlen); + memcpy(t3, m3, mlen); + + t0[mlen] = p; + t1[mlen] = p; + t2[mlen] = p; + t3[mlen] = p; + + t0[r - 1] |= 128; + t1[r - 1] |= 128; + t2[r - 1] |= 128; + t3[r - 1] |= 128; + + for (size_t i = 0; i < r / 8; ++i) { + ss[4 * i + 0] ^= load64(t0 + 8 * i); + ss[4 * i + 1] ^= load64(t1 + 8 * i); + ss[4 * i + 2] ^= load64(t2 + 8 * i); + ss[4 * i + 3] ^= load64(t3 + 8 * i); + } +} + + +static void keccak_squeezeblocks4x(unsigned char *h0, + unsigned char *h1, + unsigned char *h2, + unsigned char *h3, + unsigned long long int nblocks, + __m256i *s, + unsigned int r) { + unsigned int i; + + unsigned long long *ss = (unsigned long long *)s; + + while (nblocks > 0) { + KeccakF1600_StatePermute4x(s); + for (i = 0; i < (r >> 3); i++) { + store64(h0 + 8 * i, ss[4 * i + 0]); + store64(h1 + 8 * i, ss[4 * i + 1]); + store64(h2 + 8 * i, ss[4 * i + 2]); + store64(h3 + 8 * i, ss[4 * i + 3]); + } + h0 += r; + h1 += r; + h2 += r; + h3 += r; + nblocks--; + } +} + + + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_shake128x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen) { + __m256i s[25]; + unsigned char t0[SHAKE128_RATE]; + unsigned char t1[SHAKE128_RATE]; + unsigned char t2[SHAKE128_RATE]; + unsigned char t3[SHAKE128_RATE]; + unsigned int i; + + /* zero state */ + for (i = 0; i < 25; i++) { + s[i] = _mm256_xor_si256(s[i], s[i]); + } + + /* absorb 4 message of identical length in parallel */ + keccak_absorb4x(s, SHAKE128_RATE, in0, in1, in2, in3, (size_t)inlen, 0x1F); + + /* Squeeze output */ + keccak_squeezeblocks4x(out0, out1, out2, out3, outlen / SHAKE128_RATE, s, SHAKE128_RATE); + + out0 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out1 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out2 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out3 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + + if (outlen % SHAKE128_RATE) { + keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE128_RATE); + for (i = 0; i < outlen % SHAKE128_RATE; i++) { + out0[i] = t0[i]; + out1[i] = t1[i]; + out2[i] = t2[i]; + out3[i] = t3[i]; + } + } +} + + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_shake256x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen) { + __m256i s[25]; + unsigned char t0[SHAKE256_RATE]; + unsigned char t1[SHAKE256_RATE]; + unsigned char t2[SHAKE256_RATE]; + unsigned char t3[SHAKE256_RATE]; + unsigned int i; + + /* zero state */ + for (i = 0; i < 25; i++) { + s[i] = _mm256_xor_si256(s[i], s[i]); + } + + /* absorb 4 message of identical length in parallel */ + keccak_absorb4x(s, SHAKE256_RATE, in0, in1, in2, in3, (size_t)inlen, 0x1F); + + /* Squeeze output */ + keccak_squeezeblocks4x(out0, out1, out2, out3, outlen / SHAKE256_RATE, s, SHAKE256_RATE); + + out0 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out1 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out2 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out3 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + + if (outlen % SHAKE256_RATE) { + keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE256_RATE); + for (i = 0; i < outlen % SHAKE256_RATE; i++) { + out0[i] = t0[i]; + out1[i] = t1[i]; + out2[i] = t2[i]; + out3[i] = t3[i]; + } + } +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/fips202x4.h b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/fips202x4.h new file mode 100644 index 00000000..c400b867 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/fips202x4.h @@ -0,0 +1,27 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FIPS202X4_H +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FIPS202X4_H + +#include + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_shake128x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, unsigned long long inlen); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_shake256x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/fors.c b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/fors.c new file mode 100644 index 00000000..517554cf --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/fors.c @@ -0,0 +1,206 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hashx4.h" +#include "thash.h" +#include "thashx4.h" +#include "utils.h" +#include "utilsx4.h" + +static void fors_gen_skx4(unsigned char *sk0, + unsigned char *sk1, + unsigned char *sk2, + unsigned char *sk3, const unsigned char *sk_seed, + uint32_t fors_leaf_addrx4[4 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_prf_addrx4(sk0, sk1, sk2, sk3, sk_seed, fors_leaf_addrx4, state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thash_1(leaf, sk, pub_seed, fors_leaf_addr, state_seeded); +} + +static void fors_sk_to_leafx4(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + const unsigned char *sk0, + const unsigned char *sk1, + const unsigned char *sk2, + const unsigned char *sk3, + const unsigned char *pub_seed, + uint32_t fors_leaf_addrx4[4 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thashx4_1(leaf0, leaf1, leaf2, leaf3, + sk0, sk1, sk2, sk3, pub_seed, fors_leaf_addrx4, state_seeded); +} + +static void fors_gen_leafx4(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx0, + uint32_t addr_idx1, + uint32_t addr_idx2, + uint32_t addr_idx3, + const uint32_t fors_tree_addr[8], + const hash_state *state_seeded) { + uint32_t fors_leaf_addrx4[4 * 8] = {0}; + unsigned int j; + + /* Only copy the parts that must be kept in fors_leaf_addrx4. */ + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_copy_keypair_addr(fors_leaf_addrx4 + j * 8, fors_tree_addr); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_type(fors_leaf_addrx4 + j * 8, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx4 + 0 * 8, addr_idx0); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx4 + 1 * 8, addr_idx1); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx4 + 2 * 8, addr_idx2); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx4 + 3 * 8, addr_idx3); + + fors_gen_skx4(leaf0, leaf1, leaf2, leaf3, sk_seed, fors_leaf_addrx4, state_seeded); + fors_sk_to_leafx4(leaf0, leaf1, leaf2, leaf3, + leaf0, leaf1, leaf2, leaf3, pub_seed, fors_leaf_addrx4, state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_fors_sign(unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *state_seeded) { + /* Round up to multiple of 4 to prevent out-of-bounds for x4 parallelism */ + uint32_t indices[(PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_TREES + 3) & ~3] = {0}; + unsigned char roots[((PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_TREES + 3) & ~3) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N]; + /* Sign to a buffer, since we may not have a nice multiple of 4 and would + otherwise overrun the signature. */ + unsigned char sigbufx4[4 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N * (1 + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_HEIGHT)]; + uint32_t fors_tree_addrx4[4 * 8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset[4] = {0}; + unsigned int i, j; + + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_copy_keypair_addr(fors_tree_addrx4 + j * 8, fors_addr); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_type(fors_tree_addrx4 + j * 8, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < ((PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_TREES + 3) & ~0x3); i += 4) { + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_TREES) { + idx_offset[j] = (i + j) * (1 << PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_tree_height(fors_tree_addrx4 + j * 8, 0); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_tree_index(fors_tree_addrx4 + j * 8, + indices[i + j] + idx_offset[j]); + } + } + + /* Include the secret key part that produces the selected leaf nodes. */ + fors_gen_skx4(sigbufx4 + 0 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, + sigbufx4 + 1 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, + sigbufx4 + 2 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, + sigbufx4 + 3 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, + sk_seed, fors_tree_addrx4, state_seeded); + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_treehashx4_FORS_HEIGHT(roots + i * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, sigbufx4 + 4 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, sk_seed, pub_seed, + &indices[i], idx_offset, fors_gen_leafx4, fors_tree_addrx4, + state_seeded); + + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_TREES) { + memcpy(sig, sigbufx4 + j * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); + memcpy(sig + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, + sigbufx4 + 4 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + j * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_HEIGHT, + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_HEIGHT); + sig += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N * (1 + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_HEIGHT); + } + } + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_fors_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_compute_root(roots + i * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, leaf, indices[i], idx_offset, + sig, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_HEIGHT, pub_seed, fors_tree_addr, + state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/fors.h b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/fors.h new file mode 100644 index 00000000..98e84c18 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_H +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/hash.h b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/hash.h new file mode 100644 index 00000000..5c0142c8 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_HASH_H +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/hash_shake256.c b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/hash_shake256.c new file mode 100644 index 00000000..9b19a6d9 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/hash_shake256.c @@ -0,0 +1,106 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "fips202.h" + +/* For SHAKE256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, // NOLINT(readability-non-const-parameter) + const unsigned char *pub_seed, const unsigned char *sk_seed) { + (void)hash_state_seeded; /* Suppress an 'unused parameter' warning. */ + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* This is not necessary for SHAKE256, so we don't do anything */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_destroy_hash_function( + hash_state *hash_state_seeded) { // NOLINT(readability-non-const-parameter) + (void)hash_state_seeded; +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N bytes and an address + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, addr); + + shake256(out, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_BYTES); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed and an + * optional randomization value as well as the message. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, sk_prf, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); + shake256_inc_absorb(&state, optrand, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(R, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, &state); + shake256_inc_ctx_release(&state); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_TREE_BITS (PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_TREE_HEIGHT * (PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_D - 1)) +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_TREE_BYTES ((PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_LEAF_BITS PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_LEAF_BYTES ((PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_DGST_BYTES (PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_TREE_BYTES + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_LEAF_BYTES) + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_DGST_BYTES]; + unsigned char *bufp = buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, R, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); + shake256_inc_absorb(&state, pk, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_PK_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_DGST_BYTES, &state); + shake256_inc_ctx_release(&state); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/hash_shake256x4.c b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/hash_shake256x4.c new file mode 100644 index 00000000..8f1cd883 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/hash_shake256x4.c @@ -0,0 +1,38 @@ +#include +#include + +#include "address.h" +#include "fips202x4.h" +#include "hashx4.h" +#include "params.h" + +/* + * 4-way parallel version of prf_addr; takes 4x as much input and output + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_prf_addrx4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + const unsigned char *key, + const uint32_t addrx4[4 * 8], + const hash_state *state_seeded) { + unsigned char bufx4[4 * (PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_BYTES)]; + unsigned int j; + + for (j = 0; j < 4; j++) { + memcpy(bufx4 + j * (PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_BYTES), key, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_addr_to_bytes(bufx4 + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + j * (PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_BYTES), addrx4 + j * 8); + } + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_shake256x4(out0, + out1, + out2, + out3, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, + bufx4 + 0 * (PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_BYTES), + bufx4 + 1 * (PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_BYTES), + bufx4 + 2 * (PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_BYTES), + bufx4 + 3 * (PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_BYTES), PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_BYTES); + + /* Avoid unused parameter warning */ + (void)state_seeded; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/hash_state.h b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/hash_state.h new file mode 100644 index 00000000..a4d6ea2c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/hash_state.h @@ -0,0 +1,30 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_HASH_STATE_H +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * SHAKE256 does not need this state. Because this implementation is generated + * from a shared code base, we still need to specify some hash_state as it is + * still passed around. We chose to use an `int` as a placeholder for this + * purpose. + */ + +typedef int hash_state; + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/hashx4.h b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/hashx4.h new file mode 100644 index 00000000..cf28794f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/hashx4.h @@ -0,0 +1,16 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_HASHX4_H +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_HASHX4_H + +#include + +#include "hash_state.h" + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_prf_addrx4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + const unsigned char *key, + const uint32_t addrx4[4 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/params.h b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/params.h new file mode 100644 index 00000000..49650dc9 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_PARAMS_H +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N 24 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FULL_HEIGHT 66 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_D 22 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_HEIGHT 8 +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_TREES 33 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N / PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LEN (PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LEN1 + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_BYTES (PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_PK_BYTES PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_TREE_HEIGHT (PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FULL_HEIGHT / PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_BYTES ((PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_PK_BYTES PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_BYTES (PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_BYTES + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_D * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FULL_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_PK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_SK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/sign.c b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/sign.c new file mode 100644 index 00000000..6f9f92e9 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/sign.c @@ -0,0 +1,409 @@ +#include +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_D - 1); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, sk + 3 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); + + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + + // guarantee alignment of pk + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES / 16]; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + + // guarantee alignment of sk + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES / 16]; + uint8_t sk[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES]; + } aligned_sk; + + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_CRYPTO_SEEDBYTES / 16]; + uint8_t seed[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_CRYPTO_SEEDBYTES]; + } aligned_seed; + randombytes(aligned_seed.seed, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_CRYPTO_SEEDBYTES); + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_crypto_sign_seed_keypair( + aligned_pk.pk, aligned_sk.sk, aligned_seed.seed); + memcpy(pk, aligned_pk.pk, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES); + memcpy(sk, aligned_sk.sk, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + // guarantee alignment of sk + union { + __m128 *_x; + uint8_t sk[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES]; + } aligned_sk; + memcpy(aligned_sk.sk, sk, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES); + sk = aligned_sk.sk; + + // guarantee alignment of sig + union { + __m128 *_x; + uint8_t sig[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_BYTES]; + } aligned_sig; + uint8_t *orig_sig = sig; + sig = (uint8_t *)aligned_sig.sig; + + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N; + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_D; i++) { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_TREE_HEIGHT; + } + + memcpy(orig_sig, aligned_sig.sig, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_BYTES); + *siglen = PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_BYTES; + + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + // guarantee alignment of pk + union { + __m128 *_x; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + memcpy(aligned_pk.pk, pk, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES); + pk = aligned_pk.pk; + + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_D; i++) { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_TREE_HEIGHT; + } + + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + + // guarantee alignment of pk + union { + __m128 *_x; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + memcpy(aligned_pk.pk, pk, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES); + pk = aligned_pk.pk; + + + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_BYTES; + + if (PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_BYTES, sm + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/thash.h b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/thash.h new file mode 100644 index 00000000..725affa3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_THASH_H +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/thash_shake256_simple.c b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/thash_shake256_simple.c new file mode 100644 index 00000000..11ae958b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/thash_shake256_simple.c @@ -0,0 +1,74 @@ +#include +#include + +#include "address.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" + +#include "fips202.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N bytes. + */ +static void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8]) { + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, addr); + memcpy(buf + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_BYTES, in, inblocks * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); + + shake256(out, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thash( + out, buf, in, 1, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thash( + out, buf, in, 2, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LEN, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_TREES, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/thash_shake256_simplex4.c b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/thash_shake256_simplex4.c new file mode 100644 index 00000000..0649592d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/thash_shake256_simplex4.c @@ -0,0 +1,47 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thashx4.h" + +#include "fips202x4.h" + +/** + * 4-way parallel version of thash; takes 4x as much input and output + */ +#define thashx4_variant(name, inblocks) \ + void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thashx4_##name( \ + unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, \ + const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, \ + const unsigned char *in3, const unsigned char *pub_seed, uint32_t addrx4[4 * 8], \ + const hash_state *state_seeded) { \ + unsigned char buf0[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N]; \ + unsigned char buf1[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N]; \ + unsigned char buf2[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N]; \ + unsigned char buf3[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N]; \ + \ + memcpy(buf0, pub_seed, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); \ + memcpy(buf1, pub_seed, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); \ + memcpy(buf2, pub_seed, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); \ + memcpy(buf3, pub_seed, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); \ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_addr_to_bytes(buf0 + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, addrx4 + 0 * 8); \ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_addr_to_bytes(buf1 + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, addrx4 + 1 * 8); \ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_addr_to_bytes(buf2 + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, addrx4 + 2 * 8); \ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_addr_to_bytes(buf3 + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, addrx4 + 3 * 8); \ + memcpy(buf0 + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_BYTES, in0, (inblocks)*PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); \ + memcpy(buf1 + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_BYTES, in1, (inblocks)*PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); \ + memcpy(buf2 + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_BYTES, in2, (inblocks)*PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); \ + memcpy(buf3 + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_BYTES, in3, (inblocks)*PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); \ + \ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_shake256x4(out0, out1, out2, out3, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, buf0, buf1, buf2, buf3, \ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); \ + \ + /* Avoid unused parameter warning */ \ + (void)state_seeded; \ + } + +thashx4_variant(1, 1) +thashx4_variant(2, 2) +thashx4_variant(WOTS_LEN, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LEN) +thashx4_variant(FORS_TREES, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_TREES) diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/thashx4.h b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/thashx4.h new file mode 100644 index 00000000..87f6d3eb --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/thashx4.h @@ -0,0 +1,25 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_THASHX4_H +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_THASHX4_H + +#include + +#include "hash_state.h" + +#define thashx4_header(inblocks) \ + void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thashx4_##inblocks(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *pub_seed, uint32_t addrx4[4*8], \ + const hash_state *state_seeded) + +thashx4_header(1); +thashx4_header(2); +thashx4_header(WOTS_LEN); +thashx4_header(FORS_TREES); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/utils.c b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/utils.c new file mode 100644 index 00000000..b2d823a5 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, leaf, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, auth_path, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thash_2( + buffer + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); + } else { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, auth_path, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/utils.h b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/utils.h new file mode 100644 index 00000000..77d4f9f4 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_UTILS_H +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/utilsx4.c b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/utilsx4.c new file mode 100644 index 00000000..e8850f4a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/utilsx4.c @@ -0,0 +1,98 @@ +#include "address.h" +#include "params.h" +#include "thashx4.h" +#include "utils.h" +#include "utilsx4.h" + +#include + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +#define treehashx4_variant(name, tree_height) \ + void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_treehashx4_##name( \ + unsigned char *rootx4, unsigned char *auth_pathx4, const unsigned char *sk_seed, \ + const unsigned char *pub_seed, const uint32_t leaf_idx[4], uint32_t idx_offset[4], \ + void (*gen_leafx4)(unsigned char * /* leaf0 */, unsigned char * /* leaf1 */, \ + unsigned char * /* leaf2 */, unsigned char * /* leaf3 */, \ + const unsigned char * /* sk_seed */, \ + const unsigned char * /* pub_seed */, uint32_t /* addr_idx0 */, \ + uint32_t /* addr_idx1 */, uint32_t /* addr_idx2 */, \ + uint32_t /* addr_idx3 */, const uint32_t[8] /* tree_addr */, \ + const hash_state * /* state_seeded */), \ + uint32_t tree_addrx4[4 * 8], const hash_state *state_seeded) { \ + unsigned char stackx4[4 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N]; \ + unsigned int heights[(tree_height) + 1]; \ + unsigned int offset = 0; \ + uint32_t idx; \ + uint32_t tree_idx; \ + unsigned int j; \ + \ + for (idx = 0; idx < (uint32_t)(1 << (tree_height)); idx++) { \ + /* Add the next leaf node to the stack. */ \ + gen_leafx4(stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, sk_seed, \ + pub_seed, idx + idx_offset[0], idx + idx_offset[1], idx + idx_offset[2], \ + idx + idx_offset[3], tree_addrx4, state_seeded); \ + offset++; \ + heights[offset - 1] = 0; \ + \ + /* If this is a node we need for the auth path.. */ \ + for (j = 0; j < 4; j++) { \ + if ((leaf_idx[j] ^ 0x1) == idx) { \ + memcpy(auth_pathx4 + j * (tree_height)*PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, \ + stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + (offset - 1) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, \ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); \ + } \ + } \ + \ + /* While the top-most nodes are of equal height.. */ \ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { \ + /* Compute index of the new node, in the next layer. */ \ + tree_idx = (idx >> (heights[offset - 1] + 1)); \ + \ + /* Set the address of the node we're creating. */ \ + for (j = 0; j < 4; j++) { \ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_tree_height(tree_addrx4 + j * 8, heights[offset - 1] + 1); \ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_tree_index(tree_addrx4 + j * 8, \ + tree_idx + (idx_offset[j] >> (heights[offset - 1] + 1))); \ + } \ + /* Hash the top-most nodes from the stack together. */ \ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thashx4_2(stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, \ + stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, \ + pub_seed, tree_addrx4, state_seeded); \ + offset--; \ + /* Note that the top-most node is now one layer higher. */ \ + heights[offset - 1]++; \ + \ + /* If this is a node we need for the auth path.. */ \ + for (j = 0; j < 4; j++) { \ + if (((leaf_idx[j] >> heights[offset - 1]) ^ 0x1) == tree_idx) { \ + memcpy(auth_pathx4 + j * (tree_height)*PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + \ + heights[offset - 1] * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, \ + stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N + (offset - 1) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, \ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); \ + } \ + } \ + } \ + } \ + \ + for (j = 0; j < 4; j++) { \ + memcpy(rootx4 + j * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); \ + } \ + } + +treehashx4_variant(FORS_HEIGHT, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_FORS_HEIGHT) diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/utilsx4.h b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/utilsx4.h new file mode 100644 index 00000000..ef7b5202 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/utilsx4.h @@ -0,0 +1,38 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_UTILSX4_H +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_UTILSX4_H + +#include "hash_state.h" +#include "params.h" + +#include + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_treehashx4_FORS_HEIGHT(unsigned char *rootx4, + unsigned char *auth_pathx4, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + const uint32_t leaf_idx[4], + uint32_t idx_offset[4], + void (*gen_leafx4)(unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx4[4 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/wots.c b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/wots.c new file mode 100644 index 00000000..f53be133 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/wots.c @@ -0,0 +1,240 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "hashx4.h" +#include "params.h" +#include "thash.h" +#include "thashx4.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], const hash_state *state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_prf_addr(sk, sk_seed, wots_addr, state_seeded); +} + +/** + * 4-way parallel version of wots_gen_sk; expects 4x as much space in sk + */ +static void wots_gen_skx4(unsigned char *skx4, const unsigned char *sk_seed, + uint32_t wots_addrx4[4 * 8], const hash_state *state_seeded) { + unsigned int j; + + /* Make sure that the hash address is actually zeroed. */ + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_hash_addr(wots_addrx4 + j * 8, 0); + } + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_prf_addrx4(skx4 + 0 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, + skx4 + 1 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, + skx4 + 2 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, + skx4 + 3 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, + sk_seed, wots_addrx4, + state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_W; i++) { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thash_1(out, out, pub_seed, addr, state_seeded); + } +} + +/** + * 4-way parallel version of gen_chain; expects 4x as much space in out, and + * 4x as much space in inx4. Assumes start and step identical across chains. + */ +static void gen_chainx4(unsigned char *outx4, const unsigned char *inx4, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addrx4[4 * 8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + /* Initialize outx4 with the value at position 'start'. */ + memcpy(outx4, inx4, 4 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_W; i++) { + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_hash_addr(addrx4 + j * 8, i); + } + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_thashx4_1(outx4 + 0 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, + outx4 + 1 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, + outx4 + 2 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, + outx4 + 3 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, + outx4 + 0 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, + outx4 + 1 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, + outx4 + 2 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, + outx4 + 3 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, + pub_seed, addrx4, + state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { + int in = 0; + int out = 0; + unsigned char total = 0; + int bits = 0; + int consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LOGW; + output[out] = (unsigned int)(total >> bits) & (PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_W - 1); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_wots_gen_pk(unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + uint32_t addrx4[4 * 8]; + unsigned char pkbuf[4 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N]; + + for (j = 0; j < 4; j++) { + memcpy(addrx4 + j * 8, addr, sizeof(uint32_t) * 8); + } + + /* The last iteration typically does not have complete set of 4 chains, + but because we use pkbuf, this is not an issue -- we still do as many + in parallel as possible. */ + for (i = 0; i < ((PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LEN + 3) & ~0x3); i += 4) { + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_chain_addr(addrx4 + j * 8, i + j); + } + wots_gen_skx4(pkbuf, sk_seed, addrx4, state_seeded); + gen_chainx4(pkbuf, pkbuf, 0, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_W - 1, pub_seed, addrx4, state_seeded); + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LEN) { + memcpy(pk + (i + j)*PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, pkbuf + j * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N); + } + } + } + + // Get rid of unused argument variable. + (void)state_seeded; +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_wots_sign(unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, sk_seed, addr, state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, sig + i * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, 0, lengths[i], pub_seed, addr, state_seeded); + } + + // avoid unused argument + (void)state_seeded; +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_wots_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, sig + i * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_N, + lengths[i], PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_W - 1 - lengths[i], pub_seed, addr, + state_seeded); + } + + // avoid unused argument + (void)state_seeded; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/wots.h b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/wots.h new file mode 100644 index 00000000..3433af11 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/avx2/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_H +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_AVX2_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/LICENSE b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..91f5e2c0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-shake256-192f-simple_clean.lib +OBJECTS=address.obj wots.obj utils.obj fors.obj sign.obj hash_shake256.obj thash_shake256_simple.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/address.c b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/address.c new file mode 100644 index 00000000..cce899fe --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/address.h b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/address.h new file mode 100644 index 00000000..f57ffb75 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDRESS_H +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/api.h b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/api.h new file mode 100644 index 00000000..48e622db --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_API_H +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_CRYPTO_SECRETKEYBYTES 96 +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_CRYPTO_PUBLICKEYBYTES 48 +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_CRYPTO_BYTES 35664 +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_CRYPTO_SEEDBYTES 72 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/fors.c b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/fors.c new file mode 100644 index 00000000..bc6f7f50 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/fors.c @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "thash.h" +#include "utils.h" + +static void fors_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t fors_leaf_addr[8], const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_prf_addr( + sk, sk_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_thash_1( + leaf, sk, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t fors_tree_addr[8], + const hash_state *hash_state_seeded) { + uint32_t fors_leaf_addr[8] = {0}; + + /* Only copy the parts that must be kept in fors_leaf_addr. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_copy_keypair_addr( + fors_leaf_addr, fors_tree_addr); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_type( + fors_leaf_addr, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_tree_index( + fors_leaf_addr, addr_idx); + + fors_gen_sk(leaf, sk_seed, fors_leaf_addr, hash_state_seeded); + fors_sk_to_leaf(leaf, leaf, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_copy_keypair_addr( + fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_copy_keypair_addr( + fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_type( + fors_tree_addr, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_type( + fors_pk_addr, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_tree_height( + fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_tree_index( + fors_tree_addr, indices[i] + idx_offset); + + /* Include the secret key part that produces the selected leaf node. */ + fors_gen_sk(sig, sk_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N; + + /* Compute the authentication path for this leaf node. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_treehash_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, sig, sk_seed, pub_seed, + indices[i], idx_offset, fors_gen_leaf, fors_tree_addr, + hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_thash_FORS_TREES( + pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_compute_root(roots + i * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, leaf, indices[i], idx_offset, sig, + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_HEIGHT, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/fors.h b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/fors.h new file mode 100644 index 00000000..a2648bea --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_H +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/hash.h b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/hash.h new file mode 100644 index 00000000..bbd5ebdb --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_HASH_H +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/hash_shake256.c b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/hash_shake256.c new file mode 100644 index 00000000..81d055a5 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/hash_shake256.c @@ -0,0 +1,106 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "fips202.h" + +/* For SHAKE256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, // NOLINT(readability-non-const-parameter) + const unsigned char *pub_seed, const unsigned char *sk_seed) { + (void)hash_state_seeded; /* Suppress an 'unused parameter' warning. */ + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* This is not necessary for SHAKE256, so we don't do anything */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_destroy_hash_function( + hash_state *hash_state_seeded) { // NOLINT(readability-non-const-parameter) + (void)hash_state_seeded; +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N bytes and an address + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, addr); + + shake256(out, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_BYTES); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed and an + * optional randomization value as well as the message. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, sk_prf, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N); + shake256_inc_absorb(&state, optrand, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(R, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, &state); + shake256_inc_ctx_release(&state); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_TREE_BITS (PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_TREE_HEIGHT * (PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_D - 1)) +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_TREE_BYTES ((PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_LEAF_BITS PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_LEAF_BYTES ((PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_DGST_BYTES (PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_TREE_BYTES + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_LEAF_BYTES) + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_DGST_BYTES]; + unsigned char *bufp = buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, R, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N); + shake256_inc_absorb(&state, pk, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_PK_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_DGST_BYTES, &state); + shake256_inc_ctx_release(&state); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/hash_state.h b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/hash_state.h new file mode 100644 index 00000000..7d92ef87 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/hash_state.h @@ -0,0 +1,30 @@ +#ifndef SPX_HASH_STATE_H +#define SPX_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * SHAKE256 does not need this state. Because this implementation is generated + * from a shared code base, we still need to specify some hash_state as it is + * still passed around. We chose to use an `int` as a placeholder for this + * purpose. + */ + +#define hash_state int + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/params.h b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/params.h new file mode 100644 index 00000000..292f83c4 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_PARAMS_H +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N 24 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FULL_HEIGHT 66 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_D 22 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_HEIGHT 8 +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_TREES 33 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N / PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_LEN (PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_LEN1 + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_BYTES (PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_PK_BYTES PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_TREE_HEIGHT (PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FULL_HEIGHT / PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_BYTES ((PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_PK_BYTES PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_BYTES (PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_BYTES + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_D * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FULL_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_PK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_SK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/sign.c b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/sign.c new file mode 100644 index 00000000..7efb32f9 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_D - 1); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, sk + 3 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N); + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N; + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_BYTES; + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_BYTES; + + if (PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_BYTES, sm + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/thash.h b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/thash.h new file mode 100644 index 00000000..b090f26a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_THASH_H +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/thash_shake256_simple.c b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/thash_shake256_simple.c new file mode 100644 index 00000000..8ed51d81 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/thash_shake256_simple.c @@ -0,0 +1,74 @@ +#include +#include + +#include "address.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" + +#include "fips202.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N bytes. + */ +static void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8]) { + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, addr); + memcpy(buf + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_BYTES, in, inblocks * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N); + + shake256(out, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_thash( + out, buf, in, 1, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_thash( + out, buf, in, 2, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_LEN, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_TREES, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/utils.c b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/utils.c new file mode 100644 index 00000000..82ae4b3f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, leaf, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_thash_2( + buffer + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N); + } else { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/utils.h b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/utils.h new file mode 100644 index 00000000..66b280a3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_UTILS_H +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/wots.c b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/wots.c new file mode 100644 index 00000000..40a615ea --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/wots.c @@ -0,0 +1,167 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], + const hash_state *hash_state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_prf_addr(sk, sk_seed, wots_addr, hash_state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_W; i++) { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_thash_1( + out, out, pub_seed, addr, hash_state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const size_t out_len, + const unsigned char *input) { + size_t in = 0; + size_t out = 0; + unsigned char total = 0; + unsigned int bits = 0; + size_t consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_LOGW; + output[out] = (unsigned int)((total >> bits) & (PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_W - 1)); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, + const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_ull_to_bytes( + csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(pk + i * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, pk + i * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, + 0, PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_W - 1, pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, 0, lengths[i], pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_N, + lengths[i], PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_W - 1 - lengths[i], pub_seed, addr, + hash_state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/wots.h b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/wots.h new file mode 100644 index 00000000..5627cc93 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192f-simple/clean/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_H +#define PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192FSIMPLE_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/META.yml b/crypto_sign/sphincs/sphincs-shake256-192s-robust/META.yml new file mode 100644 index 00000000..b58bbab8 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/META.yml @@ -0,0 +1,35 @@ +name: SPHINCS+ +type: signature +claimed-nist-level: 3 +length-public-key: 48 +length-secret-key: 96 +length-signature: 17064 +testvectors-sha256: 4f80c9cf98c017293c7543f96170f18655e6ef65675300aa302de42562b21f5a +nistkat-sha256: 619ce596575f52ed8fd3e5b0501db21985e505c95f0f595faa4d6a6f0a2fd81c +principal-submitters: + - Andreas Hülsing +auxiliary-submitters: + - Jean-Philippe Aumasson + - Daniel J. Bernstein, + - Christoph Dobraunig + - Maria Eichlseder + - Scott Fluhrer + - Stefan-Lukas Gazdag + - Panos Kampanakis + - Stefan Kölbl + - Tanja Lange + - Martin M. Lauridsen + - Florian Mendel + - Ruben Niederhagen + - Christian Rechberger + - Joost Rijneveld + - Peter Schwabe +implementations: + - name: clean + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + - name: avx2 + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + supported_platforms: + - architecture: x86_64 + required_flags: + - avx2 diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/LICENSE b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/Makefile.Microsoft_nmake new file mode 100644 index 00000000..fdc95802 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/Makefile.Microsoft_nmake @@ -0,0 +1,27 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-shake256-192s-robust_avx2.lib +OBJECTS=address.obj wots.obj utils.obj utilsx4.obj fips202x4.obj fors.obj sign.obj hash_shake256.obj thash_shake256_robust.obj hash_shake256x4.obj thash_shake256_robustx4.obj + +KECCAK4XDIR=..\..\..\common\keccak4x +KECCAK4XOBJ=KeccakP-1600-times4-SIMD256.obj +KECCAK4X=$(KECCAK4XDIR)\$(KECCAK4XOBJ) + +CFLAGS=/nologo /arch:AVX2 /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) $(KECCAK4X) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +$(KECCAK4X): + cd $(KECCAK4XDIR) && $(MAKE) /f Makefile.Microsoft_nmake $(KECCAK4XOBJ) + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) + -DEL $(KECCAK4X) diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/address.c b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/address.c new file mode 100644 index 00000000..64708b6f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/address.h b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/address.h new file mode 100644 index 00000000..75c55796 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDRESS_H +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/api.h b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/api.h new file mode 100644 index 00000000..25a193df --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_API_H +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_CRYPTO_SECRETKEYBYTES 96 +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES 48 +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_CRYPTO_BYTES 17064 +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_CRYPTO_SEEDBYTES 72 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/fips202x4.c b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/fips202x4.c new file mode 100644 index 00000000..8f33cfaf --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/fips202x4.c @@ -0,0 +1,205 @@ +#include +#include +#include + +#include "fips202.h" +#include "fips202x4.h" + +#define NROUNDS 24 +#define ROL(a, offset) (((a) << (offset)) ^ ((a) >> (64-(offset)))) + +static uint64_t load64(const unsigned char *x) { + unsigned long long r = 0, i; + + for (i = 0; i < 8; ++i) { + r |= (unsigned long long)x[i] << 8 * i; + } + return r; +} + +static void store64(uint8_t *x, uint64_t u) { + unsigned int i; + + for (i = 0; i < 8; ++i) { + x[i] = (uint8_t)u; + u >>= 8; + } +} + +/* Use implementation from the Keccak Code Package */ +extern void KeccakP1600times4_PermuteAll_24rounds(__m256i *s); +#define KeccakF1600_StatePermute4x KeccakP1600times4_PermuteAll_24rounds + +static void keccak_absorb4x(__m256i *s, + unsigned int r, + const unsigned char *m0, + const unsigned char *m1, + const unsigned char *m2, + const unsigned char *m3, + size_t mlen, + unsigned char p) { + unsigned char t0[200] = {0}; + unsigned char t1[200] = {0}; + unsigned char t2[200] = {0}; + unsigned char t3[200] = {0}; + + unsigned long long *ss = (unsigned long long *)s; + + + while (mlen >= r) { + for (size_t i = 0; i < r / 8; ++i) { + ss[4 * i + 0] ^= load64(m0 + 8 * i); + ss[4 * i + 1] ^= load64(m1 + 8 * i); + ss[4 * i + 2] ^= load64(m2 + 8 * i); + ss[4 * i + 3] ^= load64(m3 + 8 * i); + } + + KeccakF1600_StatePermute4x(s); + mlen -= r; + m0 += r; + m1 += r; + m2 += r; + m3 += r; + } + + memcpy(t0, m0, mlen); + memcpy(t1, m1, mlen); + memcpy(t2, m2, mlen); + memcpy(t3, m3, mlen); + + t0[mlen] = p; + t1[mlen] = p; + t2[mlen] = p; + t3[mlen] = p; + + t0[r - 1] |= 128; + t1[r - 1] |= 128; + t2[r - 1] |= 128; + t3[r - 1] |= 128; + + for (size_t i = 0; i < r / 8; ++i) { + ss[4 * i + 0] ^= load64(t0 + 8 * i); + ss[4 * i + 1] ^= load64(t1 + 8 * i); + ss[4 * i + 2] ^= load64(t2 + 8 * i); + ss[4 * i + 3] ^= load64(t3 + 8 * i); + } +} + + +static void keccak_squeezeblocks4x(unsigned char *h0, + unsigned char *h1, + unsigned char *h2, + unsigned char *h3, + unsigned long long int nblocks, + __m256i *s, + unsigned int r) { + unsigned int i; + + unsigned long long *ss = (unsigned long long *)s; + + while (nblocks > 0) { + KeccakF1600_StatePermute4x(s); + for (i = 0; i < (r >> 3); i++) { + store64(h0 + 8 * i, ss[4 * i + 0]); + store64(h1 + 8 * i, ss[4 * i + 1]); + store64(h2 + 8 * i, ss[4 * i + 2]); + store64(h3 + 8 * i, ss[4 * i + 3]); + } + h0 += r; + h1 += r; + h2 += r; + h3 += r; + nblocks--; + } +} + + + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_shake128x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen) { + __m256i s[25]; + unsigned char t0[SHAKE128_RATE]; + unsigned char t1[SHAKE128_RATE]; + unsigned char t2[SHAKE128_RATE]; + unsigned char t3[SHAKE128_RATE]; + unsigned int i; + + /* zero state */ + for (i = 0; i < 25; i++) { + s[i] = _mm256_xor_si256(s[i], s[i]); + } + + /* absorb 4 message of identical length in parallel */ + keccak_absorb4x(s, SHAKE128_RATE, in0, in1, in2, in3, (size_t)inlen, 0x1F); + + /* Squeeze output */ + keccak_squeezeblocks4x(out0, out1, out2, out3, outlen / SHAKE128_RATE, s, SHAKE128_RATE); + + out0 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out1 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out2 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out3 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + + if (outlen % SHAKE128_RATE) { + keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE128_RATE); + for (i = 0; i < outlen % SHAKE128_RATE; i++) { + out0[i] = t0[i]; + out1[i] = t1[i]; + out2[i] = t2[i]; + out3[i] = t3[i]; + } + } +} + + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_shake256x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen) { + __m256i s[25]; + unsigned char t0[SHAKE256_RATE]; + unsigned char t1[SHAKE256_RATE]; + unsigned char t2[SHAKE256_RATE]; + unsigned char t3[SHAKE256_RATE]; + unsigned int i; + + /* zero state */ + for (i = 0; i < 25; i++) { + s[i] = _mm256_xor_si256(s[i], s[i]); + } + + /* absorb 4 message of identical length in parallel */ + keccak_absorb4x(s, SHAKE256_RATE, in0, in1, in2, in3, (size_t)inlen, 0x1F); + + /* Squeeze output */ + keccak_squeezeblocks4x(out0, out1, out2, out3, outlen / SHAKE256_RATE, s, SHAKE256_RATE); + + out0 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out1 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out2 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out3 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + + if (outlen % SHAKE256_RATE) { + keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE256_RATE); + for (i = 0; i < outlen % SHAKE256_RATE; i++) { + out0[i] = t0[i]; + out1[i] = t1[i]; + out2[i] = t2[i]; + out3[i] = t3[i]; + } + } +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/fips202x4.h b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/fips202x4.h new file mode 100644 index 00000000..409f9f0c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/fips202x4.h @@ -0,0 +1,27 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FIPS202X4_H +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FIPS202X4_H + +#include + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_shake128x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, unsigned long long inlen); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_shake256x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/fors.c b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/fors.c new file mode 100644 index 00000000..8411b93e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/fors.c @@ -0,0 +1,206 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hashx4.h" +#include "thash.h" +#include "thashx4.h" +#include "utils.h" +#include "utilsx4.h" + +static void fors_gen_skx4(unsigned char *sk0, + unsigned char *sk1, + unsigned char *sk2, + unsigned char *sk3, const unsigned char *sk_seed, + uint32_t fors_leaf_addrx4[4 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_prf_addrx4(sk0, sk1, sk2, sk3, sk_seed, fors_leaf_addrx4, state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thash_1(leaf, sk, pub_seed, fors_leaf_addr, state_seeded); +} + +static void fors_sk_to_leafx4(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + const unsigned char *sk0, + const unsigned char *sk1, + const unsigned char *sk2, + const unsigned char *sk3, + const unsigned char *pub_seed, + uint32_t fors_leaf_addrx4[4 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thashx4_1(leaf0, leaf1, leaf2, leaf3, + sk0, sk1, sk2, sk3, pub_seed, fors_leaf_addrx4, state_seeded); +} + +static void fors_gen_leafx4(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx0, + uint32_t addr_idx1, + uint32_t addr_idx2, + uint32_t addr_idx3, + const uint32_t fors_tree_addr[8], + const hash_state *state_seeded) { + uint32_t fors_leaf_addrx4[4 * 8] = {0}; + unsigned int j; + + /* Only copy the parts that must be kept in fors_leaf_addrx4. */ + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_copy_keypair_addr(fors_leaf_addrx4 + j * 8, fors_tree_addr); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_type(fors_leaf_addrx4 + j * 8, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_tree_index(fors_leaf_addrx4 + 0 * 8, addr_idx0); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_tree_index(fors_leaf_addrx4 + 1 * 8, addr_idx1); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_tree_index(fors_leaf_addrx4 + 2 * 8, addr_idx2); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_tree_index(fors_leaf_addrx4 + 3 * 8, addr_idx3); + + fors_gen_skx4(leaf0, leaf1, leaf2, leaf3, sk_seed, fors_leaf_addrx4, state_seeded); + fors_sk_to_leafx4(leaf0, leaf1, leaf2, leaf3, + leaf0, leaf1, leaf2, leaf3, pub_seed, fors_leaf_addrx4, state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_fors_sign(unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *state_seeded) { + /* Round up to multiple of 4 to prevent out-of-bounds for x4 parallelism */ + uint32_t indices[(PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_TREES + 3) & ~3] = {0}; + unsigned char roots[((PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_TREES + 3) & ~3) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N]; + /* Sign to a buffer, since we may not have a nice multiple of 4 and would + otherwise overrun the signature. */ + unsigned char sigbufx4[4 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N * (1 + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_HEIGHT)]; + uint32_t fors_tree_addrx4[4 * 8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset[4] = {0}; + unsigned int i, j; + + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_copy_keypair_addr(fors_tree_addrx4 + j * 8, fors_addr); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_type(fors_tree_addrx4 + j * 8, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < ((PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_TREES + 3) & ~0x3); i += 4) { + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_TREES) { + idx_offset[j] = (i + j) * (1 << PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_tree_height(fors_tree_addrx4 + j * 8, 0); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_tree_index(fors_tree_addrx4 + j * 8, + indices[i + j] + idx_offset[j]); + } + } + + /* Include the secret key part that produces the selected leaf nodes. */ + fors_gen_skx4(sigbufx4 + 0 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, + sigbufx4 + 1 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, + sigbufx4 + 2 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, + sigbufx4 + 3 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, + sk_seed, fors_tree_addrx4, state_seeded); + + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_treehashx4_FORS_HEIGHT(roots + i * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, sigbufx4 + 4 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, sk_seed, pub_seed, + &indices[i], idx_offset, fors_gen_leafx4, fors_tree_addrx4, + state_seeded); + + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_TREES) { + memcpy(sig, sigbufx4 + j * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); + memcpy(sig + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, + sigbufx4 + 4 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + j * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_HEIGHT, + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_HEIGHT); + sig += PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N * (1 + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_HEIGHT); + } + } + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_fors_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_compute_root(roots + i * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, leaf, indices[i], idx_offset, + sig, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_HEIGHT, pub_seed, fors_tree_addr, + state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/fors.h b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/fors.h new file mode 100644 index 00000000..94c8368d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_H +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/hash.h b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/hash.h new file mode 100644 index 00000000..6d13822c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_HASH_H +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/hash_shake256.c b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/hash_shake256.c new file mode 100644 index 00000000..b7390a6e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/hash_shake256.c @@ -0,0 +1,106 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "fips202.h" + +/* For SHAKE256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, // NOLINT(readability-non-const-parameter) + const unsigned char *pub_seed, const unsigned char *sk_seed) { + (void)hash_state_seeded; /* Suppress an 'unused parameter' warning. */ + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* This is not necessary for SHAKE256, so we don't do anything */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_destroy_hash_function( + hash_state *hash_state_seeded) { // NOLINT(readability-non-const-parameter) + (void)hash_state_seeded; +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N bytes and an address + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, addr); + + shake256(out, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed and an + * optional randomization value as well as the message. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, sk_prf, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); + shake256_inc_absorb(&state, optrand, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(R, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, &state); + shake256_inc_ctx_release(&state); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_TREE_BITS (PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_TREE_HEIGHT * (PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_D - 1)) +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_TREE_BYTES ((PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_LEAF_BITS PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_LEAF_BYTES ((PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_DGST_BYTES (PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_TREE_BYTES + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_LEAF_BYTES) + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_DGST_BYTES]; + unsigned char *bufp = buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, R, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); + shake256_inc_absorb(&state, pk, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_PK_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_DGST_BYTES, &state); + shake256_inc_ctx_release(&state); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/hash_shake256x4.c b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/hash_shake256x4.c new file mode 100644 index 00000000..be83a64a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/hash_shake256x4.c @@ -0,0 +1,38 @@ +#include +#include + +#include "address.h" +#include "fips202x4.h" +#include "hashx4.h" +#include "params.h" + +/* + * 4-way parallel version of prf_addr; takes 4x as much input and output + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_prf_addrx4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + const unsigned char *key, + const uint32_t addrx4[4 * 8], + const hash_state *state_seeded) { + unsigned char bufx4[4 * (PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES)]; + unsigned int j; + + for (j = 0; j < 4; j++) { + memcpy(bufx4 + j * (PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES), key, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_addr_to_bytes(bufx4 + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + j * (PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES), addrx4 + j * 8); + } + + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_shake256x4(out0, + out1, + out2, + out3, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, + bufx4 + 0 * (PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES), + bufx4 + 1 * (PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES), + bufx4 + 2 * (PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES), + bufx4 + 3 * (PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES), PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES); + + /* Avoid unused parameter warning */ + (void)state_seeded; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/hash_state.h b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/hash_state.h new file mode 100644 index 00000000..fcb026dd --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/hash_state.h @@ -0,0 +1,30 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_HASH_STATE_H +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * SHAKE256 does not need this state. Because this implementation is generated + * from a shared code base, we still need to specify some hash_state as it is + * still passed around. We chose to use an `int` as a placeholder for this + * purpose. + */ + +typedef int hash_state; + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/hashx4.h b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/hashx4.h new file mode 100644 index 00000000..38724c72 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/hashx4.h @@ -0,0 +1,16 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_HASHX4_H +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_HASHX4_H + +#include + +#include "hash_state.h" + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_prf_addrx4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + const unsigned char *key, + const uint32_t addrx4[4 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/params.h b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/params.h new file mode 100644 index 00000000..3a81fd64 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_PARAMS_H +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N 24 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FULL_HEIGHT 64 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_D 8 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_HEIGHT 16 +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_TREES 14 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N / PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LEN (PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LEN1 + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_BYTES (PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_PK_BYTES PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_TREE_HEIGHT (PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FULL_HEIGHT / PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_BYTES ((PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_PK_BYTES PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_BYTES (PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_BYTES + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_D * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FULL_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_PK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_SK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/sign.c b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/sign.c new file mode 100644 index 00000000..2de69924 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/sign.c @@ -0,0 +1,409 @@ +#include +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_D - 1); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, sk + 3 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); + + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + + // guarantee alignment of pk + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES / 16]; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + + // guarantee alignment of sk + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_CRYPTO_SECRETKEYBYTES / 16]; + uint8_t sk[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_CRYPTO_SECRETKEYBYTES]; + } aligned_sk; + + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_CRYPTO_SEEDBYTES / 16]; + uint8_t seed[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_CRYPTO_SEEDBYTES]; + } aligned_seed; + randombytes(aligned_seed.seed, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_CRYPTO_SEEDBYTES); + + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_crypto_sign_seed_keypair( + aligned_pk.pk, aligned_sk.sk, aligned_seed.seed); + memcpy(pk, aligned_pk.pk, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES); + memcpy(sk, aligned_sk.sk, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_CRYPTO_SECRETKEYBYTES); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + // guarantee alignment of sk + union { + __m128 *_x; + uint8_t sk[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_CRYPTO_SECRETKEYBYTES]; + } aligned_sk; + memcpy(aligned_sk.sk, sk, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_CRYPTO_SECRETKEYBYTES); + sk = aligned_sk.sk; + + // guarantee alignment of sig + union { + __m128 *_x; + uint8_t sig[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_BYTES]; + } aligned_sig; + uint8_t *orig_sig = sig; + sig = (uint8_t *)aligned_sig.sig; + + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N; + + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_D; i++) { + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_TREE_HEIGHT; + } + + memcpy(orig_sig, aligned_sig.sig, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_BYTES); + *siglen = PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_BYTES; + + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + // guarantee alignment of pk + union { + __m128 *_x; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + memcpy(aligned_pk.pk, pk, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES); + pk = aligned_pk.pk; + + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_D; i++) { + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_TREE_HEIGHT; + } + + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + + // guarantee alignment of pk + union { + __m128 *_x; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + memcpy(aligned_pk.pk, pk, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES); + pk = aligned_pk.pk; + + + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_BYTES; + + if (PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_BYTES, sm + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/thash.h b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/thash.h new file mode 100644 index 00000000..4a41e341 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_THASH_H +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/thash_shake256_robust.c b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/thash_shake256_robust.c new file mode 100644 index 00000000..812e78d0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/thash_shake256_robust.c @@ -0,0 +1,81 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "fips202.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N bytes. + */ +static void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8]) { + + unsigned char *bitmask = buf + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES; + unsigned int i; + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, addr); + + shake256(bitmask, inblocks * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N; i++) { + buf[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES + i] = in[i] ^ bitmask[i]; + } + + shake256(out, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thash( + out, buf, in, 1, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thash( + out, buf, in, 2, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LEN, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_TREES, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/thash_shake256_robustx4.c b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/thash_shake256_robustx4.c new file mode 100644 index 00000000..a5f3845d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/thash_shake256_robustx4.c @@ -0,0 +1,66 @@ +#include +#include + +#include "thashx4.h" +#include "address.h" +#include "params.h" + +#include "fips202x4.h" + +/** + * 4-way parallel version of thash; takes 4x as much input and output + */ +#define thash_variant(name, inblocks) \ + void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thashx4_##name(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *pub_seed, uint32_t addrx4[4*8], \ + const hash_state *state_seeded) \ + { \ + unsigned char buf0[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N]; \ + unsigned char buf1[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N]; \ + unsigned char buf2[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N]; \ + unsigned char buf3[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N]; \ + unsigned char bitmask0[(inblocks) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N]; \ + unsigned char bitmask1[(inblocks) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N]; \ + unsigned char bitmask2[(inblocks) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N]; \ + unsigned char bitmask3[(inblocks) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N]; \ + unsigned int i; \ + \ + memcpy(buf0, pub_seed, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); \ + memcpy(buf1, pub_seed, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); \ + memcpy(buf2, pub_seed, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); \ + memcpy(buf3, pub_seed, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); \ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_addr_to_bytes(buf0 + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, addrx4 + 0*8); \ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_addr_to_bytes(buf1 + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, addrx4 + 1*8); \ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_addr_to_bytes(buf2 + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, addrx4 + 2*8); \ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_addr_to_bytes(buf3 + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, addrx4 + 3*8); \ + \ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_shake256x4(bitmask0, bitmask1, bitmask2, bitmask3, (inblocks) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, \ + buf0, buf1, buf2, buf3, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES); \ + \ + for (i = 0; i < (inblocks) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N; i++) { \ + buf0[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES + i] = in0[i] ^ bitmask0[i]; \ + buf1[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES + i] = in1[i] ^ bitmask1[i]; \ + buf2[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES + i] = in2[i] ^ bitmask2[i]; \ + buf3[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES + i] = in3[i] ^ bitmask3[i]; \ + } \ + \ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_shake256x4( \ + out0, out1, out2, out3, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, \ + buf0, buf1, buf2, buf3, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); \ + \ + /* avoid unused parameter warning */ \ + (void)state_seeded; \ + } + + +thash_variant(1, 1) +thash_variant(2, 2) +thash_variant(WOTS_LEN, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LEN) +thash_variant(FORS_TREES, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_TREES) diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/thashx4.h b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/thashx4.h new file mode 100644 index 00000000..5f4e6cb1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/thashx4.h @@ -0,0 +1,25 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_THASHX4_H +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_THASHX4_H + +#include + +#include "hash_state.h" + +#define thashx4_header(inblocks) \ + void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thashx4_##inblocks(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *pub_seed, uint32_t addrx4[4*8], \ + const hash_state *state_seeded) + +thashx4_header(1); +thashx4_header(2); +thashx4_header(WOTS_LEN); +thashx4_header(FORS_TREES); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/utils.c b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/utils.c new file mode 100644 index 00000000..228b7164 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, leaf, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, auth_path, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thash_2( + buffer + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); + } else { + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, auth_path, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/utils.h b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/utils.h new file mode 100644 index 00000000..4e3bcfa4 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_UTILS_H +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/utilsx4.c b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/utilsx4.c new file mode 100644 index 00000000..34b81f3c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/utilsx4.c @@ -0,0 +1,98 @@ +#include "address.h" +#include "params.h" +#include "thashx4.h" +#include "utils.h" +#include "utilsx4.h" + +#include + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +#define treehashx4_variant(name, tree_height) \ + void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_treehashx4_##name( \ + unsigned char *rootx4, unsigned char *auth_pathx4, const unsigned char *sk_seed, \ + const unsigned char *pub_seed, const uint32_t leaf_idx[4], uint32_t idx_offset[4], \ + void (*gen_leafx4)(unsigned char * /* leaf0 */, unsigned char * /* leaf1 */, \ + unsigned char * /* leaf2 */, unsigned char * /* leaf3 */, \ + const unsigned char * /* sk_seed */, \ + const unsigned char * /* pub_seed */, uint32_t /* addr_idx0 */, \ + uint32_t /* addr_idx1 */, uint32_t /* addr_idx2 */, \ + uint32_t /* addr_idx3 */, const uint32_t[8] /* tree_addr */, \ + const hash_state * /* state_seeded */), \ + uint32_t tree_addrx4[4 * 8], const hash_state *state_seeded) { \ + unsigned char stackx4[4 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N]; \ + unsigned int heights[(tree_height) + 1]; \ + unsigned int offset = 0; \ + uint32_t idx; \ + uint32_t tree_idx; \ + unsigned int j; \ + \ + for (idx = 0; idx < (uint32_t)(1 << (tree_height)); idx++) { \ + /* Add the next leaf node to the stack. */ \ + gen_leafx4(stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, sk_seed, \ + pub_seed, idx + idx_offset[0], idx + idx_offset[1], idx + idx_offset[2], \ + idx + idx_offset[3], tree_addrx4, state_seeded); \ + offset++; \ + heights[offset - 1] = 0; \ + \ + /* If this is a node we need for the auth path.. */ \ + for (j = 0; j < 4; j++) { \ + if ((leaf_idx[j] ^ 0x1) == idx) { \ + memcpy(auth_pathx4 + j * (tree_height)*PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, \ + stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + (offset - 1) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, \ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); \ + } \ + } \ + \ + /* While the top-most nodes are of equal height.. */ \ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { \ + /* Compute index of the new node, in the next layer. */ \ + tree_idx = (idx >> (heights[offset - 1] + 1)); \ + \ + /* Set the address of the node we're creating. */ \ + for (j = 0; j < 4; j++) { \ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_tree_height(tree_addrx4 + j * 8, heights[offset - 1] + 1); \ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_tree_index(tree_addrx4 + j * 8, \ + tree_idx + (idx_offset[j] >> (heights[offset - 1] + 1))); \ + } \ + /* Hash the top-most nodes from the stack together. */ \ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thashx4_2(stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, \ + stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, \ + pub_seed, tree_addrx4, state_seeded); \ + offset--; \ + /* Note that the top-most node is now one layer higher. */ \ + heights[offset - 1]++; \ + \ + /* If this is a node we need for the auth path.. */ \ + for (j = 0; j < 4; j++) { \ + if (((leaf_idx[j] >> heights[offset - 1]) ^ 0x1) == tree_idx) { \ + memcpy(auth_pathx4 + j * (tree_height)*PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + \ + heights[offset - 1] * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, \ + stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N + (offset - 1) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, \ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); \ + } \ + } \ + } \ + } \ + \ + for (j = 0; j < 4; j++) { \ + memcpy(rootx4 + j * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); \ + } \ + } + +treehashx4_variant(FORS_HEIGHT, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_FORS_HEIGHT) diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/utilsx4.h b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/utilsx4.h new file mode 100644 index 00000000..3fa10835 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/utilsx4.h @@ -0,0 +1,38 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_UTILSX4_H +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_UTILSX4_H + +#include "hash_state.h" +#include "params.h" + +#include + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_treehashx4_FORS_HEIGHT(unsigned char *rootx4, + unsigned char *auth_pathx4, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + const uint32_t leaf_idx[4], + uint32_t idx_offset[4], + void (*gen_leafx4)(unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx4[4 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/wots.c b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/wots.c new file mode 100644 index 00000000..d0a51224 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/wots.c @@ -0,0 +1,240 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "hashx4.h" +#include "params.h" +#include "thash.h" +#include "thashx4.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], const hash_state *state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_prf_addr(sk, sk_seed, wots_addr, state_seeded); +} + +/** + * 4-way parallel version of wots_gen_sk; expects 4x as much space in sk + */ +static void wots_gen_skx4(unsigned char *skx4, const unsigned char *sk_seed, + uint32_t wots_addrx4[4 * 8], const hash_state *state_seeded) { + unsigned int j; + + /* Make sure that the hash address is actually zeroed. */ + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_hash_addr(wots_addrx4 + j * 8, 0); + } + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_prf_addrx4(skx4 + 0 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, + skx4 + 1 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, + skx4 + 2 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, + skx4 + 3 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, + sk_seed, wots_addrx4, + state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_W; i++) { + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thash_1(out, out, pub_seed, addr, state_seeded); + } +} + +/** + * 4-way parallel version of gen_chain; expects 4x as much space in out, and + * 4x as much space in inx4. Assumes start and step identical across chains. + */ +static void gen_chainx4(unsigned char *outx4, const unsigned char *inx4, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addrx4[4 * 8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + /* Initialize outx4 with the value at position 'start'. */ + memcpy(outx4, inx4, 4 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_W; i++) { + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_hash_addr(addrx4 + j * 8, i); + } + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_thashx4_1(outx4 + 0 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, + outx4 + 1 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, + outx4 + 2 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, + outx4 + 3 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, + outx4 + 0 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, + outx4 + 1 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, + outx4 + 2 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, + outx4 + 3 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, + pub_seed, addrx4, + state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { + int in = 0; + int out = 0; + unsigned char total = 0; + int bits = 0; + int consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LOGW; + output[out] = (unsigned int)(total >> bits) & (PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_W - 1); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_wots_gen_pk(unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + uint32_t addrx4[4 * 8]; + unsigned char pkbuf[4 * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N]; + + for (j = 0; j < 4; j++) { + memcpy(addrx4 + j * 8, addr, sizeof(uint32_t) * 8); + } + + /* The last iteration typically does not have complete set of 4 chains, + but because we use pkbuf, this is not an issue -- we still do as many + in parallel as possible. */ + for (i = 0; i < ((PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LEN + 3) & ~0x3); i += 4) { + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_chain_addr(addrx4 + j * 8, i + j); + } + wots_gen_skx4(pkbuf, sk_seed, addrx4, state_seeded); + gen_chainx4(pkbuf, pkbuf, 0, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_W - 1, pub_seed, addrx4, state_seeded); + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LEN) { + memcpy(pk + (i + j)*PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, pkbuf + j * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N); + } + } + } + + // Get rid of unused argument variable. + (void)state_seeded; +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_wots_sign(unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, sk_seed, addr, state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, sig + i * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, 0, lengths[i], pub_seed, addr, state_seeded); + } + + // avoid unused argument + (void)state_seeded; +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_wots_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, sig + i * PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_N, + lengths[i], PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_W - 1 - lengths[i], pub_seed, addr, + state_seeded); + } + + // avoid unused argument + (void)state_seeded; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/wots.h b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/wots.h new file mode 100644 index 00000000..40c0093c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/avx2/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_H +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/LICENSE b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..3dd9feae --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-shake256-192s-robust_clean.lib +OBJECTS=address.obj wots.obj utils.obj fors.obj sign.obj hash_shake256.obj thash_shake256_robust.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/address.c b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/address.c new file mode 100644 index 00000000..1d6afdd8 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/address.h b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/address.h new file mode 100644 index 00000000..2d3ec40c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDRESS_H +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/api.h b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/api.h new file mode 100644 index 00000000..d7b4657b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_API_H +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_CRYPTO_SECRETKEYBYTES 96 +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_CRYPTO_PUBLICKEYBYTES 48 +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_CRYPTO_BYTES 17064 +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_CRYPTO_SEEDBYTES 72 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/fors.c b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/fors.c new file mode 100644 index 00000000..2873041d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/fors.c @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "thash.h" +#include "utils.h" + +static void fors_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t fors_leaf_addr[8], const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_prf_addr( + sk, sk_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_thash_1( + leaf, sk, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t fors_tree_addr[8], + const hash_state *hash_state_seeded) { + uint32_t fors_leaf_addr[8] = {0}; + + /* Only copy the parts that must be kept in fors_leaf_addr. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_copy_keypair_addr( + fors_leaf_addr, fors_tree_addr); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_type( + fors_leaf_addr, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_tree_index( + fors_leaf_addr, addr_idx); + + fors_gen_sk(leaf, sk_seed, fors_leaf_addr, hash_state_seeded); + fors_sk_to_leaf(leaf, leaf, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_copy_keypair_addr( + fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_copy_keypair_addr( + fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_type( + fors_tree_addr, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_type( + fors_pk_addr, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_tree_height( + fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_tree_index( + fors_tree_addr, indices[i] + idx_offset); + + /* Include the secret key part that produces the selected leaf node. */ + fors_gen_sk(sig, sk_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N; + + /* Compute the authentication path for this leaf node. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_treehash_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, sig, sk_seed, pub_seed, + indices[i], idx_offset, fors_gen_leaf, fors_tree_addr, + hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_thash_FORS_TREES( + pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_compute_root(roots + i * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, leaf, indices[i], idx_offset, sig, + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_HEIGHT, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/fors.h b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/fors.h new file mode 100644 index 00000000..55c308a5 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_H +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/hash.h b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/hash.h new file mode 100644 index 00000000..1fb4ccb2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_HASH_H +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/hash_shake256.c b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/hash_shake256.c new file mode 100644 index 00000000..facef0b4 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/hash_shake256.c @@ -0,0 +1,106 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "fips202.h" + +/* For SHAKE256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, // NOLINT(readability-non-const-parameter) + const unsigned char *pub_seed, const unsigned char *sk_seed) { + (void)hash_state_seeded; /* Suppress an 'unused parameter' warning. */ + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* This is not necessary for SHAKE256, so we don't do anything */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_destroy_hash_function( + hash_state *hash_state_seeded) { // NOLINT(readability-non-const-parameter) + (void)hash_state_seeded; +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N bytes and an address + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, addr); + + shake256(out, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_BYTES); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed and an + * optional randomization value as well as the message. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, sk_prf, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N); + shake256_inc_absorb(&state, optrand, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(R, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, &state); + shake256_inc_ctx_release(&state); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_TREE_BITS (PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_TREE_HEIGHT * (PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_D - 1)) +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_TREE_BYTES ((PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_LEAF_BITS PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_LEAF_BYTES ((PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_DGST_BYTES (PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_TREE_BYTES + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_LEAF_BYTES) + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_DGST_BYTES]; + unsigned char *bufp = buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, R, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N); + shake256_inc_absorb(&state, pk, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_PK_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_DGST_BYTES, &state); + shake256_inc_ctx_release(&state); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/hash_state.h b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/hash_state.h new file mode 100644 index 00000000..7d92ef87 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/hash_state.h @@ -0,0 +1,30 @@ +#ifndef SPX_HASH_STATE_H +#define SPX_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * SHAKE256 does not need this state. Because this implementation is generated + * from a shared code base, we still need to specify some hash_state as it is + * still passed around. We chose to use an `int` as a placeholder for this + * purpose. + */ + +#define hash_state int + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/params.h b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/params.h new file mode 100644 index 00000000..99ef148c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_PARAMS_H +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N 24 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FULL_HEIGHT 64 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_D 8 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_HEIGHT 16 +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_TREES 14 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N / PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_LEN (PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_LEN1 + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_BYTES (PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_PK_BYTES PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_TREE_HEIGHT (PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FULL_HEIGHT / PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_BYTES ((PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_PK_BYTES PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_BYTES (PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_BYTES + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_D * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FULL_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_PK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_SK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/sign.c b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/sign.c new file mode 100644 index 00000000..cbd71edd --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_D - 1); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, sk + 3 * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N); + + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N; + + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_BYTES; + + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_BYTES; + + if (PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_BYTES, sm + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/thash.h b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/thash.h new file mode 100644 index 00000000..b278d6ac --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_THASH_H +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/thash_shake256_robust.c b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/thash_shake256_robust.c new file mode 100644 index 00000000..bab42bf6 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/thash_shake256_robust.c @@ -0,0 +1,81 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "fips202.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N bytes. + */ +static void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8]) { + + unsigned char *bitmask = buf + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_BYTES; + unsigned int i; + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, addr); + + shake256(bitmask, inblocks * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_BYTES); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N; i++) { + buf[PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_BYTES + i] = in[i] ^ bitmask[i]; + } + + shake256(out, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_thash( + out, buf, in, 1, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_thash( + out, buf, in, 2, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_LEN, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_TREES, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/utils.c b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/utils.c new file mode 100644 index 00000000..958b12f1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, leaf, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_thash_2( + buffer + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N); + } else { + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/utils.h b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/utils.h new file mode 100644 index 00000000..b4079184 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_UTILS_H +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/wots.c b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/wots.c new file mode 100644 index 00000000..5ec0410c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/wots.c @@ -0,0 +1,167 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], + const hash_state *hash_state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_prf_addr(sk, sk_seed, wots_addr, hash_state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_W; i++) { + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_thash_1( + out, out, pub_seed, addr, hash_state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const size_t out_len, + const unsigned char *input) { + size_t in = 0; + size_t out = 0; + unsigned char total = 0; + unsigned int bits = 0; + size_t consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_LOGW; + output[out] = (unsigned int)((total >> bits) & (PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_W - 1)); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, + const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_ull_to_bytes( + csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(pk + i * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, pk + i * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, + 0, PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_W - 1, pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, 0, lengths[i], pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_N, + lengths[i], PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_W - 1 - lengths[i], pub_seed, addr, + hash_state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/wots.h b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/wots.h new file mode 100644 index 00000000..19bcd4a7 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-robust/clean/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_H +#define PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192SROBUST_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/META.yml b/crypto_sign/sphincs/sphincs-shake256-192s-simple/META.yml new file mode 100644 index 00000000..1b65dbdd --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/META.yml @@ -0,0 +1,35 @@ +name: SPHINCS+ +type: signature +claimed-nist-level: 3 +length-public-key: 48 +length-secret-key: 96 +length-signature: 17064 +testvectors-sha256: ea1c38dafdeec8bd6b5a844955b1edffbb1d16f392a647fdae8e6dd148c6396c +nistkat-sha256: 31b341c25230f8524e123db8a5dc29e8dd952cd11a63a821ac488b97d5106597 +principal-submitters: + - Andreas Hülsing +auxiliary-submitters: + - Jean-Philippe Aumasson + - Daniel J. Bernstein, + - Christoph Dobraunig + - Maria Eichlseder + - Scott Fluhrer + - Stefan-Lukas Gazdag + - Panos Kampanakis + - Stefan Kölbl + - Tanja Lange + - Martin M. Lauridsen + - Florian Mendel + - Ruben Niederhagen + - Christian Rechberger + - Joost Rijneveld + - Peter Schwabe +implementations: + - name: clean + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + - name: avx2 + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + supported_platforms: + - architecture: x86_64 + required_flags: + - avx2 diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/LICENSE b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/Makefile.Microsoft_nmake new file mode 100644 index 00000000..2dd3cad1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/Makefile.Microsoft_nmake @@ -0,0 +1,27 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-shake256-192s-simple_avx2.lib +OBJECTS=address.obj wots.obj utils.obj utilsx4.obj fips202x4.obj fors.obj sign.obj hash_shake256.obj thash_shake256_simple.obj hash_shake256x4.obj thash_shake256_simplex4.obj + +KECCAK4XDIR=..\..\..\common\keccak4x +KECCAK4XOBJ=KeccakP-1600-times4-SIMD256.obj +KECCAK4X=$(KECCAK4XDIR)\$(KECCAK4XOBJ) + +CFLAGS=/nologo /arch:AVX2 /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) $(KECCAK4X) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +$(KECCAK4X): + cd $(KECCAK4XDIR) && $(MAKE) /f Makefile.Microsoft_nmake $(KECCAK4XOBJ) + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) + -DEL $(KECCAK4X) diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/address.c b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/address.c new file mode 100644 index 00000000..f90c6265 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/address.h b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/address.h new file mode 100644 index 00000000..47ef7415 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDRESS_H +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/api.h b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/api.h new file mode 100644 index 00000000..0f6cfe1c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_API_H +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES 96 +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES 48 +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_CRYPTO_BYTES 17064 +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_CRYPTO_SEEDBYTES 72 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/fips202x4.c b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/fips202x4.c new file mode 100644 index 00000000..b34d28c6 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/fips202x4.c @@ -0,0 +1,205 @@ +#include +#include +#include + +#include "fips202.h" +#include "fips202x4.h" + +#define NROUNDS 24 +#define ROL(a, offset) (((a) << (offset)) ^ ((a) >> (64-(offset)))) + +static uint64_t load64(const unsigned char *x) { + unsigned long long r = 0, i; + + for (i = 0; i < 8; ++i) { + r |= (unsigned long long)x[i] << 8 * i; + } + return r; +} + +static void store64(uint8_t *x, uint64_t u) { + unsigned int i; + + for (i = 0; i < 8; ++i) { + x[i] = (uint8_t)u; + u >>= 8; + } +} + +/* Use implementation from the Keccak Code Package */ +extern void KeccakP1600times4_PermuteAll_24rounds(__m256i *s); +#define KeccakF1600_StatePermute4x KeccakP1600times4_PermuteAll_24rounds + +static void keccak_absorb4x(__m256i *s, + unsigned int r, + const unsigned char *m0, + const unsigned char *m1, + const unsigned char *m2, + const unsigned char *m3, + size_t mlen, + unsigned char p) { + unsigned char t0[200] = {0}; + unsigned char t1[200] = {0}; + unsigned char t2[200] = {0}; + unsigned char t3[200] = {0}; + + unsigned long long *ss = (unsigned long long *)s; + + + while (mlen >= r) { + for (size_t i = 0; i < r / 8; ++i) { + ss[4 * i + 0] ^= load64(m0 + 8 * i); + ss[4 * i + 1] ^= load64(m1 + 8 * i); + ss[4 * i + 2] ^= load64(m2 + 8 * i); + ss[4 * i + 3] ^= load64(m3 + 8 * i); + } + + KeccakF1600_StatePermute4x(s); + mlen -= r; + m0 += r; + m1 += r; + m2 += r; + m3 += r; + } + + memcpy(t0, m0, mlen); + memcpy(t1, m1, mlen); + memcpy(t2, m2, mlen); + memcpy(t3, m3, mlen); + + t0[mlen] = p; + t1[mlen] = p; + t2[mlen] = p; + t3[mlen] = p; + + t0[r - 1] |= 128; + t1[r - 1] |= 128; + t2[r - 1] |= 128; + t3[r - 1] |= 128; + + for (size_t i = 0; i < r / 8; ++i) { + ss[4 * i + 0] ^= load64(t0 + 8 * i); + ss[4 * i + 1] ^= load64(t1 + 8 * i); + ss[4 * i + 2] ^= load64(t2 + 8 * i); + ss[4 * i + 3] ^= load64(t3 + 8 * i); + } +} + + +static void keccak_squeezeblocks4x(unsigned char *h0, + unsigned char *h1, + unsigned char *h2, + unsigned char *h3, + unsigned long long int nblocks, + __m256i *s, + unsigned int r) { + unsigned int i; + + unsigned long long *ss = (unsigned long long *)s; + + while (nblocks > 0) { + KeccakF1600_StatePermute4x(s); + for (i = 0; i < (r >> 3); i++) { + store64(h0 + 8 * i, ss[4 * i + 0]); + store64(h1 + 8 * i, ss[4 * i + 1]); + store64(h2 + 8 * i, ss[4 * i + 2]); + store64(h3 + 8 * i, ss[4 * i + 3]); + } + h0 += r; + h1 += r; + h2 += r; + h3 += r; + nblocks--; + } +} + + + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_shake128x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen) { + __m256i s[25]; + unsigned char t0[SHAKE128_RATE]; + unsigned char t1[SHAKE128_RATE]; + unsigned char t2[SHAKE128_RATE]; + unsigned char t3[SHAKE128_RATE]; + unsigned int i; + + /* zero state */ + for (i = 0; i < 25; i++) { + s[i] = _mm256_xor_si256(s[i], s[i]); + } + + /* absorb 4 message of identical length in parallel */ + keccak_absorb4x(s, SHAKE128_RATE, in0, in1, in2, in3, (size_t)inlen, 0x1F); + + /* Squeeze output */ + keccak_squeezeblocks4x(out0, out1, out2, out3, outlen / SHAKE128_RATE, s, SHAKE128_RATE); + + out0 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out1 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out2 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out3 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + + if (outlen % SHAKE128_RATE) { + keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE128_RATE); + for (i = 0; i < outlen % SHAKE128_RATE; i++) { + out0[i] = t0[i]; + out1[i] = t1[i]; + out2[i] = t2[i]; + out3[i] = t3[i]; + } + } +} + + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_shake256x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen) { + __m256i s[25]; + unsigned char t0[SHAKE256_RATE]; + unsigned char t1[SHAKE256_RATE]; + unsigned char t2[SHAKE256_RATE]; + unsigned char t3[SHAKE256_RATE]; + unsigned int i; + + /* zero state */ + for (i = 0; i < 25; i++) { + s[i] = _mm256_xor_si256(s[i], s[i]); + } + + /* absorb 4 message of identical length in parallel */ + keccak_absorb4x(s, SHAKE256_RATE, in0, in1, in2, in3, (size_t)inlen, 0x1F); + + /* Squeeze output */ + keccak_squeezeblocks4x(out0, out1, out2, out3, outlen / SHAKE256_RATE, s, SHAKE256_RATE); + + out0 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out1 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out2 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out3 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + + if (outlen % SHAKE256_RATE) { + keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE256_RATE); + for (i = 0; i < outlen % SHAKE256_RATE; i++) { + out0[i] = t0[i]; + out1[i] = t1[i]; + out2[i] = t2[i]; + out3[i] = t3[i]; + } + } +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/fips202x4.h b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/fips202x4.h new file mode 100644 index 00000000..1fe7acd5 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/fips202x4.h @@ -0,0 +1,27 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FIPS202X4_H +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FIPS202X4_H + +#include + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_shake128x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, unsigned long long inlen); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_shake256x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/fors.c b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/fors.c new file mode 100644 index 00000000..a1ba0624 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/fors.c @@ -0,0 +1,206 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hashx4.h" +#include "thash.h" +#include "thashx4.h" +#include "utils.h" +#include "utilsx4.h" + +static void fors_gen_skx4(unsigned char *sk0, + unsigned char *sk1, + unsigned char *sk2, + unsigned char *sk3, const unsigned char *sk_seed, + uint32_t fors_leaf_addrx4[4 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_prf_addrx4(sk0, sk1, sk2, sk3, sk_seed, fors_leaf_addrx4, state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thash_1(leaf, sk, pub_seed, fors_leaf_addr, state_seeded); +} + +static void fors_sk_to_leafx4(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + const unsigned char *sk0, + const unsigned char *sk1, + const unsigned char *sk2, + const unsigned char *sk3, + const unsigned char *pub_seed, + uint32_t fors_leaf_addrx4[4 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thashx4_1(leaf0, leaf1, leaf2, leaf3, + sk0, sk1, sk2, sk3, pub_seed, fors_leaf_addrx4, state_seeded); +} + +static void fors_gen_leafx4(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx0, + uint32_t addr_idx1, + uint32_t addr_idx2, + uint32_t addr_idx3, + const uint32_t fors_tree_addr[8], + const hash_state *state_seeded) { + uint32_t fors_leaf_addrx4[4 * 8] = {0}; + unsigned int j; + + /* Only copy the parts that must be kept in fors_leaf_addrx4. */ + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_copy_keypair_addr(fors_leaf_addrx4 + j * 8, fors_tree_addr); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_type(fors_leaf_addrx4 + j * 8, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx4 + 0 * 8, addr_idx0); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx4 + 1 * 8, addr_idx1); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx4 + 2 * 8, addr_idx2); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx4 + 3 * 8, addr_idx3); + + fors_gen_skx4(leaf0, leaf1, leaf2, leaf3, sk_seed, fors_leaf_addrx4, state_seeded); + fors_sk_to_leafx4(leaf0, leaf1, leaf2, leaf3, + leaf0, leaf1, leaf2, leaf3, pub_seed, fors_leaf_addrx4, state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_fors_sign(unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *state_seeded) { + /* Round up to multiple of 4 to prevent out-of-bounds for x4 parallelism */ + uint32_t indices[(PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_TREES + 3) & ~3] = {0}; + unsigned char roots[((PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_TREES + 3) & ~3) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N]; + /* Sign to a buffer, since we may not have a nice multiple of 4 and would + otherwise overrun the signature. */ + unsigned char sigbufx4[4 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N * (1 + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_HEIGHT)]; + uint32_t fors_tree_addrx4[4 * 8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset[4] = {0}; + unsigned int i, j; + + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_copy_keypair_addr(fors_tree_addrx4 + j * 8, fors_addr); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_type(fors_tree_addrx4 + j * 8, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < ((PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_TREES + 3) & ~0x3); i += 4) { + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_TREES) { + idx_offset[j] = (i + j) * (1 << PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_tree_height(fors_tree_addrx4 + j * 8, 0); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_tree_index(fors_tree_addrx4 + j * 8, + indices[i + j] + idx_offset[j]); + } + } + + /* Include the secret key part that produces the selected leaf nodes. */ + fors_gen_skx4(sigbufx4 + 0 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, + sigbufx4 + 1 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, + sigbufx4 + 2 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, + sigbufx4 + 3 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, + sk_seed, fors_tree_addrx4, state_seeded); + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_treehashx4_FORS_HEIGHT(roots + i * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, sigbufx4 + 4 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, sk_seed, pub_seed, + &indices[i], idx_offset, fors_gen_leafx4, fors_tree_addrx4, + state_seeded); + + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_TREES) { + memcpy(sig, sigbufx4 + j * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); + memcpy(sig + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, + sigbufx4 + 4 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + j * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_HEIGHT, + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_HEIGHT); + sig += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N * (1 + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_HEIGHT); + } + } + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_fors_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_compute_root(roots + i * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, leaf, indices[i], idx_offset, + sig, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_HEIGHT, pub_seed, fors_tree_addr, + state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/fors.h b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/fors.h new file mode 100644 index 00000000..3addc74a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_H +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/hash.h b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/hash.h new file mode 100644 index 00000000..cc890f06 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_HASH_H +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/hash_shake256.c b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/hash_shake256.c new file mode 100644 index 00000000..2ff61c06 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/hash_shake256.c @@ -0,0 +1,106 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "fips202.h" + +/* For SHAKE256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, // NOLINT(readability-non-const-parameter) + const unsigned char *pub_seed, const unsigned char *sk_seed) { + (void)hash_state_seeded; /* Suppress an 'unused parameter' warning. */ + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* This is not necessary for SHAKE256, so we don't do anything */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_destroy_hash_function( + hash_state *hash_state_seeded) { // NOLINT(readability-non-const-parameter) + (void)hash_state_seeded; +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N bytes and an address + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, addr); + + shake256(out, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_BYTES); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed and an + * optional randomization value as well as the message. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, sk_prf, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); + shake256_inc_absorb(&state, optrand, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(R, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, &state); + shake256_inc_ctx_release(&state); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_TREE_BITS (PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_TREE_HEIGHT * (PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_D - 1)) +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_TREE_BYTES ((PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_LEAF_BITS PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_LEAF_BYTES ((PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_DGST_BYTES (PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_TREE_BYTES + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_LEAF_BYTES) + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_DGST_BYTES]; + unsigned char *bufp = buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, R, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); + shake256_inc_absorb(&state, pk, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_PK_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_DGST_BYTES, &state); + shake256_inc_ctx_release(&state); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/hash_shake256x4.c b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/hash_shake256x4.c new file mode 100644 index 00000000..d2c94a8e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/hash_shake256x4.c @@ -0,0 +1,38 @@ +#include +#include + +#include "address.h" +#include "fips202x4.h" +#include "hashx4.h" +#include "params.h" + +/* + * 4-way parallel version of prf_addr; takes 4x as much input and output + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_prf_addrx4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + const unsigned char *key, + const uint32_t addrx4[4 * 8], + const hash_state *state_seeded) { + unsigned char bufx4[4 * (PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_BYTES)]; + unsigned int j; + + for (j = 0; j < 4; j++) { + memcpy(bufx4 + j * (PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_BYTES), key, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_addr_to_bytes(bufx4 + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + j * (PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_BYTES), addrx4 + j * 8); + } + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_shake256x4(out0, + out1, + out2, + out3, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, + bufx4 + 0 * (PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_BYTES), + bufx4 + 1 * (PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_BYTES), + bufx4 + 2 * (PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_BYTES), + bufx4 + 3 * (PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_BYTES), PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_BYTES); + + /* Avoid unused parameter warning */ + (void)state_seeded; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/hash_state.h b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/hash_state.h new file mode 100644 index 00000000..89a53cf5 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/hash_state.h @@ -0,0 +1,30 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_HASH_STATE_H +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * SHAKE256 does not need this state. Because this implementation is generated + * from a shared code base, we still need to specify some hash_state as it is + * still passed around. We chose to use an `int` as a placeholder for this + * purpose. + */ + +typedef int hash_state; + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/hashx4.h b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/hashx4.h new file mode 100644 index 00000000..18a74575 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/hashx4.h @@ -0,0 +1,16 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_HASHX4_H +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_HASHX4_H + +#include + +#include "hash_state.h" + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_prf_addrx4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + const unsigned char *key, + const uint32_t addrx4[4 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/params.h b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/params.h new file mode 100644 index 00000000..3e333766 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_PARAMS_H +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N 24 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FULL_HEIGHT 64 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_D 8 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_HEIGHT 16 +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_TREES 14 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N / PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LEN (PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LEN1 + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_BYTES (PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_PK_BYTES PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_TREE_HEIGHT (PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FULL_HEIGHT / PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_BYTES ((PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_PK_BYTES PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_BYTES (PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_BYTES + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_D * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FULL_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_PK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_SK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/sign.c b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/sign.c new file mode 100644 index 00000000..66b38e5d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/sign.c @@ -0,0 +1,409 @@ +#include +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_D - 1); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, sk + 3 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); + + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + + // guarantee alignment of pk + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES / 16]; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + + // guarantee alignment of sk + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES / 16]; + uint8_t sk[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES]; + } aligned_sk; + + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_CRYPTO_SEEDBYTES / 16]; + uint8_t seed[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_CRYPTO_SEEDBYTES]; + } aligned_seed; + randombytes(aligned_seed.seed, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_CRYPTO_SEEDBYTES); + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_crypto_sign_seed_keypair( + aligned_pk.pk, aligned_sk.sk, aligned_seed.seed); + memcpy(pk, aligned_pk.pk, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES); + memcpy(sk, aligned_sk.sk, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + // guarantee alignment of sk + union { + __m128 *_x; + uint8_t sk[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES]; + } aligned_sk; + memcpy(aligned_sk.sk, sk, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES); + sk = aligned_sk.sk; + + // guarantee alignment of sig + union { + __m128 *_x; + uint8_t sig[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_BYTES]; + } aligned_sig; + uint8_t *orig_sig = sig; + sig = (uint8_t *)aligned_sig.sig; + + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N; + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_D; i++) { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_TREE_HEIGHT; + } + + memcpy(orig_sig, aligned_sig.sig, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_BYTES); + *siglen = PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_BYTES; + + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + // guarantee alignment of pk + union { + __m128 *_x; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + memcpy(aligned_pk.pk, pk, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES); + pk = aligned_pk.pk; + + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_D; i++) { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_TREE_HEIGHT; + } + + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + + // guarantee alignment of pk + union { + __m128 *_x; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + memcpy(aligned_pk.pk, pk, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES); + pk = aligned_pk.pk; + + + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_BYTES; + + if (PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_BYTES, sm + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/thash.h b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/thash.h new file mode 100644 index 00000000..ae7cb25d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_THASH_H +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/thash_shake256_simple.c b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/thash_shake256_simple.c new file mode 100644 index 00000000..1d9fef8d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/thash_shake256_simple.c @@ -0,0 +1,74 @@ +#include +#include + +#include "address.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" + +#include "fips202.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N bytes. + */ +static void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8]) { + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, addr); + memcpy(buf + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_BYTES, in, inblocks * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); + + shake256(out, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thash( + out, buf, in, 1, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thash( + out, buf, in, 2, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LEN, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_TREES, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/thash_shake256_simplex4.c b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/thash_shake256_simplex4.c new file mode 100644 index 00000000..93ff168a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/thash_shake256_simplex4.c @@ -0,0 +1,47 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thashx4.h" + +#include "fips202x4.h" + +/** + * 4-way parallel version of thash; takes 4x as much input and output + */ +#define thashx4_variant(name, inblocks) \ + void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thashx4_##name( \ + unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, \ + const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, \ + const unsigned char *in3, const unsigned char *pub_seed, uint32_t addrx4[4 * 8], \ + const hash_state *state_seeded) { \ + unsigned char buf0[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N]; \ + unsigned char buf1[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N]; \ + unsigned char buf2[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N]; \ + unsigned char buf3[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N]; \ + \ + memcpy(buf0, pub_seed, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); \ + memcpy(buf1, pub_seed, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); \ + memcpy(buf2, pub_seed, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); \ + memcpy(buf3, pub_seed, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); \ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_addr_to_bytes(buf0 + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, addrx4 + 0 * 8); \ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_addr_to_bytes(buf1 + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, addrx4 + 1 * 8); \ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_addr_to_bytes(buf2 + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, addrx4 + 2 * 8); \ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_addr_to_bytes(buf3 + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, addrx4 + 3 * 8); \ + memcpy(buf0 + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_BYTES, in0, (inblocks)*PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); \ + memcpy(buf1 + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_BYTES, in1, (inblocks)*PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); \ + memcpy(buf2 + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_BYTES, in2, (inblocks)*PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); \ + memcpy(buf3 + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_BYTES, in3, (inblocks)*PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); \ + \ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_shake256x4(out0, out1, out2, out3, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, buf0, buf1, buf2, buf3, \ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); \ + \ + /* Avoid unused parameter warning */ \ + (void)state_seeded; \ + } + +thashx4_variant(1, 1) +thashx4_variant(2, 2) +thashx4_variant(WOTS_LEN, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LEN) +thashx4_variant(FORS_TREES, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_TREES) diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/thashx4.h b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/thashx4.h new file mode 100644 index 00000000..bcf59211 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/thashx4.h @@ -0,0 +1,25 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_THASHX4_H +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_THASHX4_H + +#include + +#include "hash_state.h" + +#define thashx4_header(inblocks) \ + void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thashx4_##inblocks(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *pub_seed, uint32_t addrx4[4*8], \ + const hash_state *state_seeded) + +thashx4_header(1); +thashx4_header(2); +thashx4_header(WOTS_LEN); +thashx4_header(FORS_TREES); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/utils.c b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/utils.c new file mode 100644 index 00000000..8b7d840f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, leaf, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, auth_path, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thash_2( + buffer + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); + } else { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, auth_path, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/utils.h b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/utils.h new file mode 100644 index 00000000..8ffa2d45 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_UTILS_H +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/utilsx4.c b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/utilsx4.c new file mode 100644 index 00000000..29a95a53 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/utilsx4.c @@ -0,0 +1,98 @@ +#include "address.h" +#include "params.h" +#include "thashx4.h" +#include "utils.h" +#include "utilsx4.h" + +#include + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +#define treehashx4_variant(name, tree_height) \ + void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_treehashx4_##name( \ + unsigned char *rootx4, unsigned char *auth_pathx4, const unsigned char *sk_seed, \ + const unsigned char *pub_seed, const uint32_t leaf_idx[4], uint32_t idx_offset[4], \ + void (*gen_leafx4)(unsigned char * /* leaf0 */, unsigned char * /* leaf1 */, \ + unsigned char * /* leaf2 */, unsigned char * /* leaf3 */, \ + const unsigned char * /* sk_seed */, \ + const unsigned char * /* pub_seed */, uint32_t /* addr_idx0 */, \ + uint32_t /* addr_idx1 */, uint32_t /* addr_idx2 */, \ + uint32_t /* addr_idx3 */, const uint32_t[8] /* tree_addr */, \ + const hash_state * /* state_seeded */), \ + uint32_t tree_addrx4[4 * 8], const hash_state *state_seeded) { \ + unsigned char stackx4[4 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N]; \ + unsigned int heights[(tree_height) + 1]; \ + unsigned int offset = 0; \ + uint32_t idx; \ + uint32_t tree_idx; \ + unsigned int j; \ + \ + for (idx = 0; idx < (uint32_t)(1 << (tree_height)); idx++) { \ + /* Add the next leaf node to the stack. */ \ + gen_leafx4(stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, sk_seed, \ + pub_seed, idx + idx_offset[0], idx + idx_offset[1], idx + idx_offset[2], \ + idx + idx_offset[3], tree_addrx4, state_seeded); \ + offset++; \ + heights[offset - 1] = 0; \ + \ + /* If this is a node we need for the auth path.. */ \ + for (j = 0; j < 4; j++) { \ + if ((leaf_idx[j] ^ 0x1) == idx) { \ + memcpy(auth_pathx4 + j * (tree_height)*PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, \ + stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + (offset - 1) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, \ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); \ + } \ + } \ + \ + /* While the top-most nodes are of equal height.. */ \ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { \ + /* Compute index of the new node, in the next layer. */ \ + tree_idx = (idx >> (heights[offset - 1] + 1)); \ + \ + /* Set the address of the node we're creating. */ \ + for (j = 0; j < 4; j++) { \ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_tree_height(tree_addrx4 + j * 8, heights[offset - 1] + 1); \ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_tree_index(tree_addrx4 + j * 8, \ + tree_idx + (idx_offset[j] >> (heights[offset - 1] + 1))); \ + } \ + /* Hash the top-most nodes from the stack together. */ \ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thashx4_2(stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, \ + stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, \ + pub_seed, tree_addrx4, state_seeded); \ + offset--; \ + /* Note that the top-most node is now one layer higher. */ \ + heights[offset - 1]++; \ + \ + /* If this is a node we need for the auth path.. */ \ + for (j = 0; j < 4; j++) { \ + if (((leaf_idx[j] >> heights[offset - 1]) ^ 0x1) == tree_idx) { \ + memcpy(auth_pathx4 + j * (tree_height)*PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + \ + heights[offset - 1] * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, \ + stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N + (offset - 1) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, \ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); \ + } \ + } \ + } \ + } \ + \ + for (j = 0; j < 4; j++) { \ + memcpy(rootx4 + j * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); \ + } \ + } + +treehashx4_variant(FORS_HEIGHT, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_FORS_HEIGHT) diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/utilsx4.h b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/utilsx4.h new file mode 100644 index 00000000..70d9a024 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/utilsx4.h @@ -0,0 +1,38 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_UTILSX4_H +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_UTILSX4_H + +#include "hash_state.h" +#include "params.h" + +#include + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_treehashx4_FORS_HEIGHT(unsigned char *rootx4, + unsigned char *auth_pathx4, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + const uint32_t leaf_idx[4], + uint32_t idx_offset[4], + void (*gen_leafx4)(unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx4[4 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/wots.c b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/wots.c new file mode 100644 index 00000000..cca39822 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/wots.c @@ -0,0 +1,240 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "hashx4.h" +#include "params.h" +#include "thash.h" +#include "thashx4.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], const hash_state *state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_prf_addr(sk, sk_seed, wots_addr, state_seeded); +} + +/** + * 4-way parallel version of wots_gen_sk; expects 4x as much space in sk + */ +static void wots_gen_skx4(unsigned char *skx4, const unsigned char *sk_seed, + uint32_t wots_addrx4[4 * 8], const hash_state *state_seeded) { + unsigned int j; + + /* Make sure that the hash address is actually zeroed. */ + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_hash_addr(wots_addrx4 + j * 8, 0); + } + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_prf_addrx4(skx4 + 0 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, + skx4 + 1 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, + skx4 + 2 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, + skx4 + 3 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, + sk_seed, wots_addrx4, + state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_W; i++) { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thash_1(out, out, pub_seed, addr, state_seeded); + } +} + +/** + * 4-way parallel version of gen_chain; expects 4x as much space in out, and + * 4x as much space in inx4. Assumes start and step identical across chains. + */ +static void gen_chainx4(unsigned char *outx4, const unsigned char *inx4, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addrx4[4 * 8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + /* Initialize outx4 with the value at position 'start'. */ + memcpy(outx4, inx4, 4 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_W; i++) { + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_hash_addr(addrx4 + j * 8, i); + } + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_thashx4_1(outx4 + 0 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, + outx4 + 1 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, + outx4 + 2 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, + outx4 + 3 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, + outx4 + 0 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, + outx4 + 1 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, + outx4 + 2 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, + outx4 + 3 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, + pub_seed, addrx4, + state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { + int in = 0; + int out = 0; + unsigned char total = 0; + int bits = 0; + int consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LOGW; + output[out] = (unsigned int)(total >> bits) & (PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_W - 1); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_wots_gen_pk(unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + uint32_t addrx4[4 * 8]; + unsigned char pkbuf[4 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N]; + + for (j = 0; j < 4; j++) { + memcpy(addrx4 + j * 8, addr, sizeof(uint32_t) * 8); + } + + /* The last iteration typically does not have complete set of 4 chains, + but because we use pkbuf, this is not an issue -- we still do as many + in parallel as possible. */ + for (i = 0; i < ((PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LEN + 3) & ~0x3); i += 4) { + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_chain_addr(addrx4 + j * 8, i + j); + } + wots_gen_skx4(pkbuf, sk_seed, addrx4, state_seeded); + gen_chainx4(pkbuf, pkbuf, 0, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_W - 1, pub_seed, addrx4, state_seeded); + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LEN) { + memcpy(pk + (i + j)*PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, pkbuf + j * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N); + } + } + } + + // Get rid of unused argument variable. + (void)state_seeded; +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_wots_sign(unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, sk_seed, addr, state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, sig + i * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, 0, lengths[i], pub_seed, addr, state_seeded); + } + + // avoid unused argument + (void)state_seeded; +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_wots_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, sig + i * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_N, + lengths[i], PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_W - 1 - lengths[i], pub_seed, addr, + state_seeded); + } + + // avoid unused argument + (void)state_seeded; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/wots.h b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/wots.h new file mode 100644 index 00000000..117c39a0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/avx2/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_H +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_AVX2_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/LICENSE b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..181e5150 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-shake256-192s-simple_clean.lib +OBJECTS=address.obj wots.obj utils.obj fors.obj sign.obj hash_shake256.obj thash_shake256_simple.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/address.c b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/address.c new file mode 100644 index 00000000..1a3092c0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/address.h b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/address.h new file mode 100644 index 00000000..af5b119e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDRESS_H +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/api.h b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/api.h new file mode 100644 index 00000000..102f6ae6 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_API_H +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_CRYPTO_SECRETKEYBYTES 96 +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_CRYPTO_PUBLICKEYBYTES 48 +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_CRYPTO_BYTES 17064 +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_CRYPTO_SEEDBYTES 72 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/fors.c b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/fors.c new file mode 100644 index 00000000..f4bacdc6 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/fors.c @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "thash.h" +#include "utils.h" + +static void fors_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t fors_leaf_addr[8], const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_prf_addr( + sk, sk_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_thash_1( + leaf, sk, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t fors_tree_addr[8], + const hash_state *hash_state_seeded) { + uint32_t fors_leaf_addr[8] = {0}; + + /* Only copy the parts that must be kept in fors_leaf_addr. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_copy_keypair_addr( + fors_leaf_addr, fors_tree_addr); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_type( + fors_leaf_addr, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_tree_index( + fors_leaf_addr, addr_idx); + + fors_gen_sk(leaf, sk_seed, fors_leaf_addr, hash_state_seeded); + fors_sk_to_leaf(leaf, leaf, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_copy_keypair_addr( + fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_copy_keypair_addr( + fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_type( + fors_tree_addr, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_type( + fors_pk_addr, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_tree_height( + fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_tree_index( + fors_tree_addr, indices[i] + idx_offset); + + /* Include the secret key part that produces the selected leaf node. */ + fors_gen_sk(sig, sk_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N; + + /* Compute the authentication path for this leaf node. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_treehash_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, sig, sk_seed, pub_seed, + indices[i], idx_offset, fors_gen_leaf, fors_tree_addr, + hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_thash_FORS_TREES( + pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_compute_root(roots + i * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, leaf, indices[i], idx_offset, sig, + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_HEIGHT, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/fors.h b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/fors.h new file mode 100644 index 00000000..108620f7 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_H +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/hash.h b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/hash.h new file mode 100644 index 00000000..5f1390ad --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_HASH_H +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/hash_shake256.c b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/hash_shake256.c new file mode 100644 index 00000000..26c5d912 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/hash_shake256.c @@ -0,0 +1,106 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "fips202.h" + +/* For SHAKE256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, // NOLINT(readability-non-const-parameter) + const unsigned char *pub_seed, const unsigned char *sk_seed) { + (void)hash_state_seeded; /* Suppress an 'unused parameter' warning. */ + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* This is not necessary for SHAKE256, so we don't do anything */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_destroy_hash_function( + hash_state *hash_state_seeded) { // NOLINT(readability-non-const-parameter) + (void)hash_state_seeded; +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N bytes and an address + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, addr); + + shake256(out, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_BYTES); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed and an + * optional randomization value as well as the message. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, sk_prf, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N); + shake256_inc_absorb(&state, optrand, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(R, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, &state); + shake256_inc_ctx_release(&state); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_TREE_BITS (PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_TREE_HEIGHT * (PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_D - 1)) +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_TREE_BYTES ((PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_LEAF_BITS PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_LEAF_BYTES ((PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_DGST_BYTES (PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_TREE_BYTES + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_LEAF_BYTES) + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_DGST_BYTES]; + unsigned char *bufp = buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, R, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N); + shake256_inc_absorb(&state, pk, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_PK_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_DGST_BYTES, &state); + shake256_inc_ctx_release(&state); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/hash_state.h b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/hash_state.h new file mode 100644 index 00000000..7d92ef87 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/hash_state.h @@ -0,0 +1,30 @@ +#ifndef SPX_HASH_STATE_H +#define SPX_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * SHAKE256 does not need this state. Because this implementation is generated + * from a shared code base, we still need to specify some hash_state as it is + * still passed around. We chose to use an `int` as a placeholder for this + * purpose. + */ + +#define hash_state int + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/params.h b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/params.h new file mode 100644 index 00000000..bb6f8785 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_PARAMS_H +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N 24 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FULL_HEIGHT 64 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_D 8 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_HEIGHT 16 +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_TREES 14 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N / PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_LEN (PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_LEN1 + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_BYTES (PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_PK_BYTES PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_TREE_HEIGHT (PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FULL_HEIGHT / PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_BYTES ((PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_PK_BYTES PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_BYTES (PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_BYTES + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_D * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FULL_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_PK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_SK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/sign.c b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/sign.c new file mode 100644 index 00000000..52217961 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_D - 1); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, sk + 3 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N); + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N; + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_BYTES; + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_BYTES; + + if (PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_BYTES, sm + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/thash.h b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/thash.h new file mode 100644 index 00000000..1788df02 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_THASH_H +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/thash_shake256_simple.c b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/thash_shake256_simple.c new file mode 100644 index 00000000..fcb8dcc7 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/thash_shake256_simple.c @@ -0,0 +1,74 @@ +#include +#include + +#include "address.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" + +#include "fips202.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N bytes. + */ +static void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8]) { + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, addr); + memcpy(buf + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_BYTES, in, inblocks * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N); + + shake256(out, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_thash( + out, buf, in, 1, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_thash( + out, buf, in, 2, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_LEN, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_TREES, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/utils.c b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/utils.c new file mode 100644 index 00000000..d88090ba --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, leaf, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_thash_2( + buffer + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N); + } else { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/utils.h b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/utils.h new file mode 100644 index 00000000..14df17c9 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_UTILS_H +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/wots.c b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/wots.c new file mode 100644 index 00000000..e4782279 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/wots.c @@ -0,0 +1,167 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], + const hash_state *hash_state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_prf_addr(sk, sk_seed, wots_addr, hash_state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_W; i++) { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_thash_1( + out, out, pub_seed, addr, hash_state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const size_t out_len, + const unsigned char *input) { + size_t in = 0; + size_t out = 0; + unsigned char total = 0; + unsigned int bits = 0; + size_t consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_LOGW; + output[out] = (unsigned int)((total >> bits) & (PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_W - 1)); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, + const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_ull_to_bytes( + csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(pk + i * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, pk + i * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, + 0, PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_W - 1, pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, 0, lengths[i], pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_N, + lengths[i], PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_W - 1 - lengths[i], pub_seed, addr, + hash_state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/wots.h b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/wots.h new file mode 100644 index 00000000..3c279306 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-192s-simple/clean/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_H +#define PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256192SSIMPLE_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/META.yml b/crypto_sign/sphincs/sphincs-shake256-256f-robust/META.yml new file mode 100644 index 00000000..75cf7f25 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/META.yml @@ -0,0 +1,35 @@ +name: SPHINCS+ +type: signature +claimed-nist-level: 5 +length-public-key: 64 +length-secret-key: 128 +length-signature: 49216 +testvectors-sha256: 4757a2ce7aec6daac4ab894336586949f7919c63d55200ec6325eb395efcf1ef +nistkat-sha256: d5410edbaa120cf24f0bcf8cb834fdb08b4b5652809ee17c026d37212f4a4934 +principal-submitters: + - Andreas Hülsing +auxiliary-submitters: + - Jean-Philippe Aumasson + - Daniel J. Bernstein, + - Christoph Dobraunig + - Maria Eichlseder + - Scott Fluhrer + - Stefan-Lukas Gazdag + - Panos Kampanakis + - Stefan Kölbl + - Tanja Lange + - Martin M. Lauridsen + - Florian Mendel + - Ruben Niederhagen + - Christian Rechberger + - Joost Rijneveld + - Peter Schwabe +implementations: + - name: clean + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + - name: avx2 + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + supported_platforms: + - architecture: x86_64 + required_flags: + - avx2 diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/LICENSE b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/Makefile.Microsoft_nmake new file mode 100644 index 00000000..1e3e7283 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/Makefile.Microsoft_nmake @@ -0,0 +1,27 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-shake256-256f-robust_avx2.lib +OBJECTS=address.obj wots.obj utils.obj utilsx4.obj fips202x4.obj fors.obj sign.obj hash_shake256.obj thash_shake256_robust.obj hash_shake256x4.obj thash_shake256_robustx4.obj + +KECCAK4XDIR=..\..\..\common\keccak4x +KECCAK4XOBJ=KeccakP-1600-times4-SIMD256.obj +KECCAK4X=$(KECCAK4XDIR)\$(KECCAK4XOBJ) + +CFLAGS=/nologo /arch:AVX2 /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) $(KECCAK4X) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +$(KECCAK4X): + cd $(KECCAK4XDIR) && $(MAKE) /f Makefile.Microsoft_nmake $(KECCAK4XOBJ) + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) + -DEL $(KECCAK4X) diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/address.c b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/address.c new file mode 100644 index 00000000..de994715 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/address.h b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/address.h new file mode 100644 index 00000000..e95e579a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDRESS_H +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/api.h b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/api.h new file mode 100644 index 00000000..8689745d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_API_H +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_CRYPTO_SECRETKEYBYTES 128 +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES 64 +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_CRYPTO_BYTES 49216 +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_CRYPTO_SEEDBYTES 96 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/fips202x4.c b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/fips202x4.c new file mode 100644 index 00000000..e1a01a9c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/fips202x4.c @@ -0,0 +1,205 @@ +#include +#include +#include + +#include "fips202.h" +#include "fips202x4.h" + +#define NROUNDS 24 +#define ROL(a, offset) (((a) << (offset)) ^ ((a) >> (64-(offset)))) + +static uint64_t load64(const unsigned char *x) { + unsigned long long r = 0, i; + + for (i = 0; i < 8; ++i) { + r |= (unsigned long long)x[i] << 8 * i; + } + return r; +} + +static void store64(uint8_t *x, uint64_t u) { + unsigned int i; + + for (i = 0; i < 8; ++i) { + x[i] = (uint8_t)u; + u >>= 8; + } +} + +/* Use implementation from the Keccak Code Package */ +extern void KeccakP1600times4_PermuteAll_24rounds(__m256i *s); +#define KeccakF1600_StatePermute4x KeccakP1600times4_PermuteAll_24rounds + +static void keccak_absorb4x(__m256i *s, + unsigned int r, + const unsigned char *m0, + const unsigned char *m1, + const unsigned char *m2, + const unsigned char *m3, + size_t mlen, + unsigned char p) { + unsigned char t0[200] = {0}; + unsigned char t1[200] = {0}; + unsigned char t2[200] = {0}; + unsigned char t3[200] = {0}; + + unsigned long long *ss = (unsigned long long *)s; + + + while (mlen >= r) { + for (size_t i = 0; i < r / 8; ++i) { + ss[4 * i + 0] ^= load64(m0 + 8 * i); + ss[4 * i + 1] ^= load64(m1 + 8 * i); + ss[4 * i + 2] ^= load64(m2 + 8 * i); + ss[4 * i + 3] ^= load64(m3 + 8 * i); + } + + KeccakF1600_StatePermute4x(s); + mlen -= r; + m0 += r; + m1 += r; + m2 += r; + m3 += r; + } + + memcpy(t0, m0, mlen); + memcpy(t1, m1, mlen); + memcpy(t2, m2, mlen); + memcpy(t3, m3, mlen); + + t0[mlen] = p; + t1[mlen] = p; + t2[mlen] = p; + t3[mlen] = p; + + t0[r - 1] |= 128; + t1[r - 1] |= 128; + t2[r - 1] |= 128; + t3[r - 1] |= 128; + + for (size_t i = 0; i < r / 8; ++i) { + ss[4 * i + 0] ^= load64(t0 + 8 * i); + ss[4 * i + 1] ^= load64(t1 + 8 * i); + ss[4 * i + 2] ^= load64(t2 + 8 * i); + ss[4 * i + 3] ^= load64(t3 + 8 * i); + } +} + + +static void keccak_squeezeblocks4x(unsigned char *h0, + unsigned char *h1, + unsigned char *h2, + unsigned char *h3, + unsigned long long int nblocks, + __m256i *s, + unsigned int r) { + unsigned int i; + + unsigned long long *ss = (unsigned long long *)s; + + while (nblocks > 0) { + KeccakF1600_StatePermute4x(s); + for (i = 0; i < (r >> 3); i++) { + store64(h0 + 8 * i, ss[4 * i + 0]); + store64(h1 + 8 * i, ss[4 * i + 1]); + store64(h2 + 8 * i, ss[4 * i + 2]); + store64(h3 + 8 * i, ss[4 * i + 3]); + } + h0 += r; + h1 += r; + h2 += r; + h3 += r; + nblocks--; + } +} + + + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_shake128x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen) { + __m256i s[25]; + unsigned char t0[SHAKE128_RATE]; + unsigned char t1[SHAKE128_RATE]; + unsigned char t2[SHAKE128_RATE]; + unsigned char t3[SHAKE128_RATE]; + unsigned int i; + + /* zero state */ + for (i = 0; i < 25; i++) { + s[i] = _mm256_xor_si256(s[i], s[i]); + } + + /* absorb 4 message of identical length in parallel */ + keccak_absorb4x(s, SHAKE128_RATE, in0, in1, in2, in3, (size_t)inlen, 0x1F); + + /* Squeeze output */ + keccak_squeezeblocks4x(out0, out1, out2, out3, outlen / SHAKE128_RATE, s, SHAKE128_RATE); + + out0 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out1 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out2 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out3 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + + if (outlen % SHAKE128_RATE) { + keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE128_RATE); + for (i = 0; i < outlen % SHAKE128_RATE; i++) { + out0[i] = t0[i]; + out1[i] = t1[i]; + out2[i] = t2[i]; + out3[i] = t3[i]; + } + } +} + + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_shake256x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen) { + __m256i s[25]; + unsigned char t0[SHAKE256_RATE]; + unsigned char t1[SHAKE256_RATE]; + unsigned char t2[SHAKE256_RATE]; + unsigned char t3[SHAKE256_RATE]; + unsigned int i; + + /* zero state */ + for (i = 0; i < 25; i++) { + s[i] = _mm256_xor_si256(s[i], s[i]); + } + + /* absorb 4 message of identical length in parallel */ + keccak_absorb4x(s, SHAKE256_RATE, in0, in1, in2, in3, (size_t)inlen, 0x1F); + + /* Squeeze output */ + keccak_squeezeblocks4x(out0, out1, out2, out3, outlen / SHAKE256_RATE, s, SHAKE256_RATE); + + out0 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out1 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out2 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out3 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + + if (outlen % SHAKE256_RATE) { + keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE256_RATE); + for (i = 0; i < outlen % SHAKE256_RATE; i++) { + out0[i] = t0[i]; + out1[i] = t1[i]; + out2[i] = t2[i]; + out3[i] = t3[i]; + } + } +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/fips202x4.h b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/fips202x4.h new file mode 100644 index 00000000..dbe65ccd --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/fips202x4.h @@ -0,0 +1,27 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FIPS202X4_H +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FIPS202X4_H + +#include + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_shake128x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, unsigned long long inlen); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_shake256x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/fors.c b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/fors.c new file mode 100644 index 00000000..5bbcea82 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/fors.c @@ -0,0 +1,206 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hashx4.h" +#include "thash.h" +#include "thashx4.h" +#include "utils.h" +#include "utilsx4.h" + +static void fors_gen_skx4(unsigned char *sk0, + unsigned char *sk1, + unsigned char *sk2, + unsigned char *sk3, const unsigned char *sk_seed, + uint32_t fors_leaf_addrx4[4 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_prf_addrx4(sk0, sk1, sk2, sk3, sk_seed, fors_leaf_addrx4, state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thash_1(leaf, sk, pub_seed, fors_leaf_addr, state_seeded); +} + +static void fors_sk_to_leafx4(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + const unsigned char *sk0, + const unsigned char *sk1, + const unsigned char *sk2, + const unsigned char *sk3, + const unsigned char *pub_seed, + uint32_t fors_leaf_addrx4[4 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thashx4_1(leaf0, leaf1, leaf2, leaf3, + sk0, sk1, sk2, sk3, pub_seed, fors_leaf_addrx4, state_seeded); +} + +static void fors_gen_leafx4(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx0, + uint32_t addr_idx1, + uint32_t addr_idx2, + uint32_t addr_idx3, + const uint32_t fors_tree_addr[8], + const hash_state *state_seeded) { + uint32_t fors_leaf_addrx4[4 * 8] = {0}; + unsigned int j; + + /* Only copy the parts that must be kept in fors_leaf_addrx4. */ + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_copy_keypair_addr(fors_leaf_addrx4 + j * 8, fors_tree_addr); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_type(fors_leaf_addrx4 + j * 8, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_tree_index(fors_leaf_addrx4 + 0 * 8, addr_idx0); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_tree_index(fors_leaf_addrx4 + 1 * 8, addr_idx1); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_tree_index(fors_leaf_addrx4 + 2 * 8, addr_idx2); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_tree_index(fors_leaf_addrx4 + 3 * 8, addr_idx3); + + fors_gen_skx4(leaf0, leaf1, leaf2, leaf3, sk_seed, fors_leaf_addrx4, state_seeded); + fors_sk_to_leafx4(leaf0, leaf1, leaf2, leaf3, + leaf0, leaf1, leaf2, leaf3, pub_seed, fors_leaf_addrx4, state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_fors_sign(unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *state_seeded) { + /* Round up to multiple of 4 to prevent out-of-bounds for x4 parallelism */ + uint32_t indices[(PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_TREES + 3) & ~3] = {0}; + unsigned char roots[((PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_TREES + 3) & ~3) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N]; + /* Sign to a buffer, since we may not have a nice multiple of 4 and would + otherwise overrun the signature. */ + unsigned char sigbufx4[4 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N * (1 + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_HEIGHT)]; + uint32_t fors_tree_addrx4[4 * 8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset[4] = {0}; + unsigned int i, j; + + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_copy_keypair_addr(fors_tree_addrx4 + j * 8, fors_addr); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_type(fors_tree_addrx4 + j * 8, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < ((PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_TREES + 3) & ~0x3); i += 4) { + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_TREES) { + idx_offset[j] = (i + j) * (1 << PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_tree_height(fors_tree_addrx4 + j * 8, 0); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_tree_index(fors_tree_addrx4 + j * 8, + indices[i + j] + idx_offset[j]); + } + } + + /* Include the secret key part that produces the selected leaf nodes. */ + fors_gen_skx4(sigbufx4 + 0 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, + sigbufx4 + 1 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, + sigbufx4 + 2 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, + sigbufx4 + 3 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, + sk_seed, fors_tree_addrx4, state_seeded); + + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_treehashx4_FORS_HEIGHT(roots + i * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, sigbufx4 + 4 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, sk_seed, pub_seed, + &indices[i], idx_offset, fors_gen_leafx4, fors_tree_addrx4, + state_seeded); + + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_TREES) { + memcpy(sig, sigbufx4 + j * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); + memcpy(sig + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, + sigbufx4 + 4 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + j * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_HEIGHT, + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_HEIGHT); + sig += PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N * (1 + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_HEIGHT); + } + } + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_fors_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_compute_root(roots + i * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, leaf, indices[i], idx_offset, + sig, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_HEIGHT, pub_seed, fors_tree_addr, + state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/fors.h b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/fors.h new file mode 100644 index 00000000..58a0256a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_H +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/hash.h b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/hash.h new file mode 100644 index 00000000..eacdda3a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_HASH_H +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/hash_shake256.c b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/hash_shake256.c new file mode 100644 index 00000000..4e1c7491 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/hash_shake256.c @@ -0,0 +1,106 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "fips202.h" + +/* For SHAKE256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, // NOLINT(readability-non-const-parameter) + const unsigned char *pub_seed, const unsigned char *sk_seed) { + (void)hash_state_seeded; /* Suppress an 'unused parameter' warning. */ + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* This is not necessary for SHAKE256, so we don't do anything */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_destroy_hash_function( + hash_state *hash_state_seeded) { // NOLINT(readability-non-const-parameter) + (void)hash_state_seeded; +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N bytes and an address + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, addr); + + shake256(out, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed and an + * optional randomization value as well as the message. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, sk_prf, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); + shake256_inc_absorb(&state, optrand, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(R, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, &state); + shake256_inc_ctx_release(&state); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_TREE_BITS (PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_TREE_HEIGHT * (PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_D - 1)) +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_TREE_BYTES ((PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_LEAF_BITS PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_LEAF_BYTES ((PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_DGST_BYTES (PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_TREE_BYTES + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_LEAF_BYTES) + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_DGST_BYTES]; + unsigned char *bufp = buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, R, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); + shake256_inc_absorb(&state, pk, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_PK_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_DGST_BYTES, &state); + shake256_inc_ctx_release(&state); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/hash_shake256x4.c b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/hash_shake256x4.c new file mode 100644 index 00000000..7a86225b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/hash_shake256x4.c @@ -0,0 +1,38 @@ +#include +#include + +#include "address.h" +#include "fips202x4.h" +#include "hashx4.h" +#include "params.h" + +/* + * 4-way parallel version of prf_addr; takes 4x as much input and output + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_prf_addrx4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + const unsigned char *key, + const uint32_t addrx4[4 * 8], + const hash_state *state_seeded) { + unsigned char bufx4[4 * (PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES)]; + unsigned int j; + + for (j = 0; j < 4; j++) { + memcpy(bufx4 + j * (PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES), key, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_addr_to_bytes(bufx4 + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + j * (PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES), addrx4 + j * 8); + } + + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_shake256x4(out0, + out1, + out2, + out3, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, + bufx4 + 0 * (PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES), + bufx4 + 1 * (PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES), + bufx4 + 2 * (PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES), + bufx4 + 3 * (PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES), PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES); + + /* Avoid unused parameter warning */ + (void)state_seeded; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/hash_state.h b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/hash_state.h new file mode 100644 index 00000000..07c2c81f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/hash_state.h @@ -0,0 +1,30 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_HASH_STATE_H +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * SHAKE256 does not need this state. Because this implementation is generated + * from a shared code base, we still need to specify some hash_state as it is + * still passed around. We chose to use an `int` as a placeholder for this + * purpose. + */ + +typedef int hash_state; + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/hashx4.h b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/hashx4.h new file mode 100644 index 00000000..568a089f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/hashx4.h @@ -0,0 +1,16 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_HASHX4_H +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_HASHX4_H + +#include + +#include "hash_state.h" + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_prf_addrx4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + const unsigned char *key, + const uint32_t addrx4[4 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/params.h b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/params.h new file mode 100644 index 00000000..534826ca --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_PARAMS_H +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N 32 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FULL_HEIGHT 68 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_D 17 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_HEIGHT 10 +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_TREES 30 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N / PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LEN (PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LEN1 + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_BYTES (PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_PK_BYTES PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_TREE_HEIGHT (PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FULL_HEIGHT / PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_BYTES ((PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_PK_BYTES PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_BYTES (PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_BYTES + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_D * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FULL_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_PK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_SK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/sign.c b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/sign.c new file mode 100644 index 00000000..d558b6ca --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/sign.c @@ -0,0 +1,409 @@ +#include +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_D - 1); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, sk + 3 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); + + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + + // guarantee alignment of pk + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES / 16]; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + + // guarantee alignment of sk + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_CRYPTO_SECRETKEYBYTES / 16]; + uint8_t sk[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_CRYPTO_SECRETKEYBYTES]; + } aligned_sk; + + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_CRYPTO_SEEDBYTES / 16]; + uint8_t seed[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_CRYPTO_SEEDBYTES]; + } aligned_seed; + randombytes(aligned_seed.seed, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_CRYPTO_SEEDBYTES); + + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_crypto_sign_seed_keypair( + aligned_pk.pk, aligned_sk.sk, aligned_seed.seed); + memcpy(pk, aligned_pk.pk, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES); + memcpy(sk, aligned_sk.sk, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_CRYPTO_SECRETKEYBYTES); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + // guarantee alignment of sk + union { + __m128 *_x; + uint8_t sk[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_CRYPTO_SECRETKEYBYTES]; + } aligned_sk; + memcpy(aligned_sk.sk, sk, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_CRYPTO_SECRETKEYBYTES); + sk = aligned_sk.sk; + + // guarantee alignment of sig + union { + __m128 *_x; + uint8_t sig[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_BYTES]; + } aligned_sig; + uint8_t *orig_sig = sig; + sig = (uint8_t *)aligned_sig.sig; + + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N; + + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_D; i++) { + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_TREE_HEIGHT; + } + + memcpy(orig_sig, aligned_sig.sig, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_BYTES); + *siglen = PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_BYTES; + + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + // guarantee alignment of pk + union { + __m128 *_x; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + memcpy(aligned_pk.pk, pk, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES); + pk = aligned_pk.pk; + + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_D; i++) { + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_TREE_HEIGHT; + } + + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + + // guarantee alignment of pk + union { + __m128 *_x; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + memcpy(aligned_pk.pk, pk, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_CRYPTO_PUBLICKEYBYTES); + pk = aligned_pk.pk; + + + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_BYTES; + + if (PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_BYTES, sm + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/thash.h b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/thash.h new file mode 100644 index 00000000..6207bfba --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_THASH_H +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/thash_shake256_robust.c b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/thash_shake256_robust.c new file mode 100644 index 00000000..79c9d5d7 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/thash_shake256_robust.c @@ -0,0 +1,81 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "fips202.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N bytes. + */ +static void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8]) { + + unsigned char *bitmask = buf + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES; + unsigned int i; + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, addr); + + shake256(bitmask, inblocks * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N; i++) { + buf[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES + i] = in[i] ^ bitmask[i]; + } + + shake256(out, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thash( + out, buf, in, 1, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thash( + out, buf, in, 2, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LEN, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_TREES, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/thash_shake256_robustx4.c b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/thash_shake256_robustx4.c new file mode 100644 index 00000000..c51f9c26 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/thash_shake256_robustx4.c @@ -0,0 +1,66 @@ +#include +#include + +#include "thashx4.h" +#include "address.h" +#include "params.h" + +#include "fips202x4.h" + +/** + * 4-way parallel version of thash; takes 4x as much input and output + */ +#define thash_variant(name, inblocks) \ + void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thashx4_##name(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *pub_seed, uint32_t addrx4[4*8], \ + const hash_state *state_seeded) \ + { \ + unsigned char buf0[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N]; \ + unsigned char buf1[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N]; \ + unsigned char buf2[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N]; \ + unsigned char buf3[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N]; \ + unsigned char bitmask0[(inblocks) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N]; \ + unsigned char bitmask1[(inblocks) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N]; \ + unsigned char bitmask2[(inblocks) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N]; \ + unsigned char bitmask3[(inblocks) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N]; \ + unsigned int i; \ + \ + memcpy(buf0, pub_seed, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); \ + memcpy(buf1, pub_seed, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); \ + memcpy(buf2, pub_seed, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); \ + memcpy(buf3, pub_seed, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); \ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_addr_to_bytes(buf0 + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, addrx4 + 0*8); \ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_addr_to_bytes(buf1 + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, addrx4 + 1*8); \ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_addr_to_bytes(buf2 + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, addrx4 + 2*8); \ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_addr_to_bytes(buf3 + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, addrx4 + 3*8); \ + \ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_shake256x4(bitmask0, bitmask1, bitmask2, bitmask3, (inblocks) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, \ + buf0, buf1, buf2, buf3, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES); \ + \ + for (i = 0; i < (inblocks) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N; i++) { \ + buf0[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES + i] = in0[i] ^ bitmask0[i]; \ + buf1[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES + i] = in1[i] ^ bitmask1[i]; \ + buf2[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES + i] = in2[i] ^ bitmask2[i]; \ + buf3[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES + i] = in3[i] ^ bitmask3[i]; \ + } \ + \ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_shake256x4( \ + out0, out1, out2, out3, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, \ + buf0, buf1, buf2, buf3, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); \ + \ + /* avoid unused parameter warning */ \ + (void)state_seeded; \ + } + + +thash_variant(1, 1) +thash_variant(2, 2) +thash_variant(WOTS_LEN, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LEN) +thash_variant(FORS_TREES, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_TREES) diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/thashx4.h b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/thashx4.h new file mode 100644 index 00000000..c4964338 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/thashx4.h @@ -0,0 +1,25 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_THASHX4_H +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_THASHX4_H + +#include + +#include "hash_state.h" + +#define thashx4_header(inblocks) \ + void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thashx4_##inblocks(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *pub_seed, uint32_t addrx4[4*8], \ + const hash_state *state_seeded) + +thashx4_header(1); +thashx4_header(2); +thashx4_header(WOTS_LEN); +thashx4_header(FORS_TREES); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/utils.c b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/utils.c new file mode 100644 index 00000000..a2833551 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, leaf, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, auth_path, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thash_2( + buffer + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); + } else { + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, auth_path, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/utils.h b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/utils.h new file mode 100644 index 00000000..cf6288e7 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_UTILS_H +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/utilsx4.c b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/utilsx4.c new file mode 100644 index 00000000..2198ac1a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/utilsx4.c @@ -0,0 +1,98 @@ +#include "address.h" +#include "params.h" +#include "thashx4.h" +#include "utils.h" +#include "utilsx4.h" + +#include + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +#define treehashx4_variant(name, tree_height) \ + void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_treehashx4_##name( \ + unsigned char *rootx4, unsigned char *auth_pathx4, const unsigned char *sk_seed, \ + const unsigned char *pub_seed, const uint32_t leaf_idx[4], uint32_t idx_offset[4], \ + void (*gen_leafx4)(unsigned char * /* leaf0 */, unsigned char * /* leaf1 */, \ + unsigned char * /* leaf2 */, unsigned char * /* leaf3 */, \ + const unsigned char * /* sk_seed */, \ + const unsigned char * /* pub_seed */, uint32_t /* addr_idx0 */, \ + uint32_t /* addr_idx1 */, uint32_t /* addr_idx2 */, \ + uint32_t /* addr_idx3 */, const uint32_t[8] /* tree_addr */, \ + const hash_state * /* state_seeded */), \ + uint32_t tree_addrx4[4 * 8], const hash_state *state_seeded) { \ + unsigned char stackx4[4 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N]; \ + unsigned int heights[(tree_height) + 1]; \ + unsigned int offset = 0; \ + uint32_t idx; \ + uint32_t tree_idx; \ + unsigned int j; \ + \ + for (idx = 0; idx < (uint32_t)(1 << (tree_height)); idx++) { \ + /* Add the next leaf node to the stack. */ \ + gen_leafx4(stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, sk_seed, \ + pub_seed, idx + idx_offset[0], idx + idx_offset[1], idx + idx_offset[2], \ + idx + idx_offset[3], tree_addrx4, state_seeded); \ + offset++; \ + heights[offset - 1] = 0; \ + \ + /* If this is a node we need for the auth path.. */ \ + for (j = 0; j < 4; j++) { \ + if ((leaf_idx[j] ^ 0x1) == idx) { \ + memcpy(auth_pathx4 + j * (tree_height)*PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, \ + stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + (offset - 1) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, \ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); \ + } \ + } \ + \ + /* While the top-most nodes are of equal height.. */ \ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { \ + /* Compute index of the new node, in the next layer. */ \ + tree_idx = (idx >> (heights[offset - 1] + 1)); \ + \ + /* Set the address of the node we're creating. */ \ + for (j = 0; j < 4; j++) { \ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_tree_height(tree_addrx4 + j * 8, heights[offset - 1] + 1); \ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_tree_index(tree_addrx4 + j * 8, \ + tree_idx + (idx_offset[j] >> (heights[offset - 1] + 1))); \ + } \ + /* Hash the top-most nodes from the stack together. */ \ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thashx4_2(stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, \ + stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, \ + pub_seed, tree_addrx4, state_seeded); \ + offset--; \ + /* Note that the top-most node is now one layer higher. */ \ + heights[offset - 1]++; \ + \ + /* If this is a node we need for the auth path.. */ \ + for (j = 0; j < 4; j++) { \ + if (((leaf_idx[j] >> heights[offset - 1]) ^ 0x1) == tree_idx) { \ + memcpy(auth_pathx4 + j * (tree_height)*PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + \ + heights[offset - 1] * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, \ + stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N + (offset - 1) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, \ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); \ + } \ + } \ + } \ + } \ + \ + for (j = 0; j < 4; j++) { \ + memcpy(rootx4 + j * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); \ + } \ + } + +treehashx4_variant(FORS_HEIGHT, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_FORS_HEIGHT) diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/utilsx4.h b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/utilsx4.h new file mode 100644 index 00000000..26107b89 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/utilsx4.h @@ -0,0 +1,38 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_UTILSX4_H +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_UTILSX4_H + +#include "hash_state.h" +#include "params.h" + +#include + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_treehashx4_FORS_HEIGHT(unsigned char *rootx4, + unsigned char *auth_pathx4, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + const uint32_t leaf_idx[4], + uint32_t idx_offset[4], + void (*gen_leafx4)(unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx4[4 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/wots.c b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/wots.c new file mode 100644 index 00000000..71e4dfed --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/wots.c @@ -0,0 +1,240 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "hashx4.h" +#include "params.h" +#include "thash.h" +#include "thashx4.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], const hash_state *state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_prf_addr(sk, sk_seed, wots_addr, state_seeded); +} + +/** + * 4-way parallel version of wots_gen_sk; expects 4x as much space in sk + */ +static void wots_gen_skx4(unsigned char *skx4, const unsigned char *sk_seed, + uint32_t wots_addrx4[4 * 8], const hash_state *state_seeded) { + unsigned int j; + + /* Make sure that the hash address is actually zeroed. */ + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_hash_addr(wots_addrx4 + j * 8, 0); + } + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_prf_addrx4(skx4 + 0 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, + skx4 + 1 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, + skx4 + 2 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, + skx4 + 3 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, + sk_seed, wots_addrx4, + state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_W; i++) { + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thash_1(out, out, pub_seed, addr, state_seeded); + } +} + +/** + * 4-way parallel version of gen_chain; expects 4x as much space in out, and + * 4x as much space in inx4. Assumes start and step identical across chains. + */ +static void gen_chainx4(unsigned char *outx4, const unsigned char *inx4, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addrx4[4 * 8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + /* Initialize outx4 with the value at position 'start'. */ + memcpy(outx4, inx4, 4 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_W; i++) { + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_hash_addr(addrx4 + j * 8, i); + } + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_thashx4_1(outx4 + 0 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, + outx4 + 1 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, + outx4 + 2 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, + outx4 + 3 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, + outx4 + 0 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, + outx4 + 1 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, + outx4 + 2 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, + outx4 + 3 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, + pub_seed, addrx4, + state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { + int in = 0; + int out = 0; + unsigned char total = 0; + int bits = 0; + int consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LOGW; + output[out] = (unsigned int)(total >> bits) & (PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_W - 1); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_wots_gen_pk(unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + uint32_t addrx4[4 * 8]; + unsigned char pkbuf[4 * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N]; + + for (j = 0; j < 4; j++) { + memcpy(addrx4 + j * 8, addr, sizeof(uint32_t) * 8); + } + + /* The last iteration typically does not have complete set of 4 chains, + but because we use pkbuf, this is not an issue -- we still do as many + in parallel as possible. */ + for (i = 0; i < ((PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LEN + 3) & ~0x3); i += 4) { + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_chain_addr(addrx4 + j * 8, i + j); + } + wots_gen_skx4(pkbuf, sk_seed, addrx4, state_seeded); + gen_chainx4(pkbuf, pkbuf, 0, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_W - 1, pub_seed, addrx4, state_seeded); + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LEN) { + memcpy(pk + (i + j)*PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, pkbuf + j * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N); + } + } + } + + // Get rid of unused argument variable. + (void)state_seeded; +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_wots_sign(unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, sk_seed, addr, state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, sig + i * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, 0, lengths[i], pub_seed, addr, state_seeded); + } + + // avoid unused argument + (void)state_seeded; +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_wots_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, sig + i * PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_N, + lengths[i], PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_W - 1 - lengths[i], pub_seed, addr, + state_seeded); + } + + // avoid unused argument + (void)state_seeded; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/wots.h b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/wots.h new file mode 100644 index 00000000..a3204a9a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/avx2/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_H +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_AVX2_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/LICENSE b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..b92851bd --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-shake256-256f-robust_clean.lib +OBJECTS=address.obj wots.obj utils.obj fors.obj sign.obj hash_shake256.obj thash_shake256_robust.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/address.c b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/address.c new file mode 100644 index 00000000..1cda8cdb --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/address.h b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/address.h new file mode 100644 index 00000000..14eea7ab --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDRESS_H +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/api.h b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/api.h new file mode 100644 index 00000000..6387b054 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_API_H +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_CRYPTO_SECRETKEYBYTES 128 +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_CRYPTO_PUBLICKEYBYTES 64 +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_CRYPTO_BYTES 49216 +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_CRYPTO_SEEDBYTES 96 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/fors.c b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/fors.c new file mode 100644 index 00000000..ccab08c8 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/fors.c @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "thash.h" +#include "utils.h" + +static void fors_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t fors_leaf_addr[8], const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_prf_addr( + sk, sk_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_thash_1( + leaf, sk, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t fors_tree_addr[8], + const hash_state *hash_state_seeded) { + uint32_t fors_leaf_addr[8] = {0}; + + /* Only copy the parts that must be kept in fors_leaf_addr. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_copy_keypair_addr( + fors_leaf_addr, fors_tree_addr); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_type( + fors_leaf_addr, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_tree_index( + fors_leaf_addr, addr_idx); + + fors_gen_sk(leaf, sk_seed, fors_leaf_addr, hash_state_seeded); + fors_sk_to_leaf(leaf, leaf, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_copy_keypair_addr( + fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_copy_keypair_addr( + fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_type( + fors_tree_addr, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_type( + fors_pk_addr, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_tree_height( + fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_tree_index( + fors_tree_addr, indices[i] + idx_offset); + + /* Include the secret key part that produces the selected leaf node. */ + fors_gen_sk(sig, sk_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N; + + /* Compute the authentication path for this leaf node. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_treehash_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, sig, sk_seed, pub_seed, + indices[i], idx_offset, fors_gen_leaf, fors_tree_addr, + hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_thash_FORS_TREES( + pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_compute_root(roots + i * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, leaf, indices[i], idx_offset, sig, + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_HEIGHT, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/fors.h b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/fors.h new file mode 100644 index 00000000..07edc714 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_H +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/hash.h b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/hash.h new file mode 100644 index 00000000..7d8a2303 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_HASH_H +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/hash_shake256.c b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/hash_shake256.c new file mode 100644 index 00000000..750baedc --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/hash_shake256.c @@ -0,0 +1,106 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "fips202.h" + +/* For SHAKE256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, // NOLINT(readability-non-const-parameter) + const unsigned char *pub_seed, const unsigned char *sk_seed) { + (void)hash_state_seeded; /* Suppress an 'unused parameter' warning. */ + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* This is not necessary for SHAKE256, so we don't do anything */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_destroy_hash_function( + hash_state *hash_state_seeded) { // NOLINT(readability-non-const-parameter) + (void)hash_state_seeded; +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N bytes and an address + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, addr); + + shake256(out, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_BYTES); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed and an + * optional randomization value as well as the message. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, sk_prf, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N); + shake256_inc_absorb(&state, optrand, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(R, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, &state); + shake256_inc_ctx_release(&state); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_TREE_BITS (PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_TREE_HEIGHT * (PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_D - 1)) +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_TREE_BYTES ((PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_LEAF_BITS PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_LEAF_BYTES ((PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_DGST_BYTES (PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_TREE_BYTES + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_LEAF_BYTES) + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_DGST_BYTES]; + unsigned char *bufp = buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, R, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N); + shake256_inc_absorb(&state, pk, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_PK_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_DGST_BYTES, &state); + shake256_inc_ctx_release(&state); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/hash_state.h b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/hash_state.h new file mode 100644 index 00000000..7d92ef87 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/hash_state.h @@ -0,0 +1,30 @@ +#ifndef SPX_HASH_STATE_H +#define SPX_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * SHAKE256 does not need this state. Because this implementation is generated + * from a shared code base, we still need to specify some hash_state as it is + * still passed around. We chose to use an `int` as a placeholder for this + * purpose. + */ + +#define hash_state int + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/params.h b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/params.h new file mode 100644 index 00000000..15ce3dad --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_PARAMS_H +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N 32 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FULL_HEIGHT 68 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_D 17 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_HEIGHT 10 +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_TREES 30 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N / PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_LEN (PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_LEN1 + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_BYTES (PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_PK_BYTES PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_TREE_HEIGHT (PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FULL_HEIGHT / PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_BYTES ((PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_PK_BYTES PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_BYTES (PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_BYTES + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_D * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FULL_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_PK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_SK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/sign.c b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/sign.c new file mode 100644 index 00000000..0cff0a1c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_D - 1); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, sk + 3 * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N); + + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N; + + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_BYTES; + + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_BYTES; + + if (PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_BYTES, sm + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/thash.h b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/thash.h new file mode 100644 index 00000000..823074b1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_THASH_H +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/thash_shake256_robust.c b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/thash_shake256_robust.c new file mode 100644 index 00000000..0a89b37a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/thash_shake256_robust.c @@ -0,0 +1,81 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "fips202.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N bytes. + */ +static void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8]) { + + unsigned char *bitmask = buf + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_BYTES; + unsigned int i; + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, addr); + + shake256(bitmask, inblocks * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_BYTES); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N; i++) { + buf[PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_BYTES + i] = in[i] ^ bitmask[i]; + } + + shake256(out, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_thash( + out, buf, in, 1, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_thash( + out, buf, in, 2, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_LEN, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_TREES, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/utils.c b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/utils.c new file mode 100644 index 00000000..59d8ead5 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, leaf, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_thash_2( + buffer + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N); + } else { + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/utils.h b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/utils.h new file mode 100644 index 00000000..9eacb4cd --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_UTILS_H +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/wots.c b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/wots.c new file mode 100644 index 00000000..c5a0ca21 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/wots.c @@ -0,0 +1,167 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], + const hash_state *hash_state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_prf_addr(sk, sk_seed, wots_addr, hash_state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_W; i++) { + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_thash_1( + out, out, pub_seed, addr, hash_state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const size_t out_len, + const unsigned char *input) { + size_t in = 0; + size_t out = 0; + unsigned char total = 0; + unsigned int bits = 0; + size_t consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_LOGW; + output[out] = (unsigned int)((total >> bits) & (PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_W - 1)); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, + const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_ull_to_bytes( + csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(pk + i * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, pk + i * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, + 0, PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_W - 1, pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, 0, lengths[i], pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_N, + lengths[i], PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_W - 1 - lengths[i], pub_seed, addr, + hash_state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/wots.h b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/wots.h new file mode 100644 index 00000000..7468f387 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-robust/clean/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_H +#define PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256FROBUST_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/META.yml b/crypto_sign/sphincs/sphincs-shake256-256f-simple/META.yml new file mode 100644 index 00000000..9280230d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/META.yml @@ -0,0 +1,35 @@ +name: SPHINCS+ +type: signature +claimed-nist-level: 5 +length-public-key: 64 +length-secret-key: 128 +length-signature: 49216 +testvectors-sha256: 1b261fc7394dc847349c07bde922ac028aad94c534f51341f8202670558ed27a +nistkat-sha256: 5a8959fc0436a66d6d69cc8adb2f24936b763ae324bc97ed139ae92f9f7e03c3 +principal-submitters: + - Andreas Hülsing +auxiliary-submitters: + - Jean-Philippe Aumasson + - Daniel J. Bernstein, + - Christoph Dobraunig + - Maria Eichlseder + - Scott Fluhrer + - Stefan-Lukas Gazdag + - Panos Kampanakis + - Stefan Kölbl + - Tanja Lange + - Martin M. Lauridsen + - Florian Mendel + - Ruben Niederhagen + - Christian Rechberger + - Joost Rijneveld + - Peter Schwabe +implementations: + - name: clean + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + - name: avx2 + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + supported_platforms: + - architecture: x86_64 + required_flags: + - avx2 diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/LICENSE b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/Makefile.Microsoft_nmake new file mode 100644 index 00000000..ca4d7574 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/Makefile.Microsoft_nmake @@ -0,0 +1,27 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-shake256-256f-simple_avx2.lib +OBJECTS=address.obj wots.obj utils.obj utilsx4.obj fips202x4.obj fors.obj sign.obj hash_shake256.obj thash_shake256_simple.obj hash_shake256x4.obj thash_shake256_simplex4.obj + +KECCAK4XDIR=..\..\..\common\keccak4x +KECCAK4XOBJ=KeccakP-1600-times4-SIMD256.obj +KECCAK4X=$(KECCAK4XDIR)\$(KECCAK4XOBJ) + +CFLAGS=/nologo /arch:AVX2 /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) $(KECCAK4X) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +$(KECCAK4X): + cd $(KECCAK4XDIR) && $(MAKE) /f Makefile.Microsoft_nmake $(KECCAK4XOBJ) + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) + -DEL $(KECCAK4X) diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/address.c b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/address.c new file mode 100644 index 00000000..56c88ce9 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/address.h b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/address.h new file mode 100644 index 00000000..530575d2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDRESS_H +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/api.h b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/api.h new file mode 100644 index 00000000..007dfef2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_API_H +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES 128 +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES 64 +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_CRYPTO_BYTES 49216 +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_CRYPTO_SEEDBYTES 96 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/fips202x4.c b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/fips202x4.c new file mode 100644 index 00000000..3cfeadcf --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/fips202x4.c @@ -0,0 +1,205 @@ +#include +#include +#include + +#include "fips202.h" +#include "fips202x4.h" + +#define NROUNDS 24 +#define ROL(a, offset) (((a) << (offset)) ^ ((a) >> (64-(offset)))) + +static uint64_t load64(const unsigned char *x) { + unsigned long long r = 0, i; + + for (i = 0; i < 8; ++i) { + r |= (unsigned long long)x[i] << 8 * i; + } + return r; +} + +static void store64(uint8_t *x, uint64_t u) { + unsigned int i; + + for (i = 0; i < 8; ++i) { + x[i] = (uint8_t)u; + u >>= 8; + } +} + +/* Use implementation from the Keccak Code Package */ +extern void KeccakP1600times4_PermuteAll_24rounds(__m256i *s); +#define KeccakF1600_StatePermute4x KeccakP1600times4_PermuteAll_24rounds + +static void keccak_absorb4x(__m256i *s, + unsigned int r, + const unsigned char *m0, + const unsigned char *m1, + const unsigned char *m2, + const unsigned char *m3, + size_t mlen, + unsigned char p) { + unsigned char t0[200] = {0}; + unsigned char t1[200] = {0}; + unsigned char t2[200] = {0}; + unsigned char t3[200] = {0}; + + unsigned long long *ss = (unsigned long long *)s; + + + while (mlen >= r) { + for (size_t i = 0; i < r / 8; ++i) { + ss[4 * i + 0] ^= load64(m0 + 8 * i); + ss[4 * i + 1] ^= load64(m1 + 8 * i); + ss[4 * i + 2] ^= load64(m2 + 8 * i); + ss[4 * i + 3] ^= load64(m3 + 8 * i); + } + + KeccakF1600_StatePermute4x(s); + mlen -= r; + m0 += r; + m1 += r; + m2 += r; + m3 += r; + } + + memcpy(t0, m0, mlen); + memcpy(t1, m1, mlen); + memcpy(t2, m2, mlen); + memcpy(t3, m3, mlen); + + t0[mlen] = p; + t1[mlen] = p; + t2[mlen] = p; + t3[mlen] = p; + + t0[r - 1] |= 128; + t1[r - 1] |= 128; + t2[r - 1] |= 128; + t3[r - 1] |= 128; + + for (size_t i = 0; i < r / 8; ++i) { + ss[4 * i + 0] ^= load64(t0 + 8 * i); + ss[4 * i + 1] ^= load64(t1 + 8 * i); + ss[4 * i + 2] ^= load64(t2 + 8 * i); + ss[4 * i + 3] ^= load64(t3 + 8 * i); + } +} + + +static void keccak_squeezeblocks4x(unsigned char *h0, + unsigned char *h1, + unsigned char *h2, + unsigned char *h3, + unsigned long long int nblocks, + __m256i *s, + unsigned int r) { + unsigned int i; + + unsigned long long *ss = (unsigned long long *)s; + + while (nblocks > 0) { + KeccakF1600_StatePermute4x(s); + for (i = 0; i < (r >> 3); i++) { + store64(h0 + 8 * i, ss[4 * i + 0]); + store64(h1 + 8 * i, ss[4 * i + 1]); + store64(h2 + 8 * i, ss[4 * i + 2]); + store64(h3 + 8 * i, ss[4 * i + 3]); + } + h0 += r; + h1 += r; + h2 += r; + h3 += r; + nblocks--; + } +} + + + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_shake128x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen) { + __m256i s[25]; + unsigned char t0[SHAKE128_RATE]; + unsigned char t1[SHAKE128_RATE]; + unsigned char t2[SHAKE128_RATE]; + unsigned char t3[SHAKE128_RATE]; + unsigned int i; + + /* zero state */ + for (i = 0; i < 25; i++) { + s[i] = _mm256_xor_si256(s[i], s[i]); + } + + /* absorb 4 message of identical length in parallel */ + keccak_absorb4x(s, SHAKE128_RATE, in0, in1, in2, in3, (size_t)inlen, 0x1F); + + /* Squeeze output */ + keccak_squeezeblocks4x(out0, out1, out2, out3, outlen / SHAKE128_RATE, s, SHAKE128_RATE); + + out0 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out1 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out2 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out3 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + + if (outlen % SHAKE128_RATE) { + keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE128_RATE); + for (i = 0; i < outlen % SHAKE128_RATE; i++) { + out0[i] = t0[i]; + out1[i] = t1[i]; + out2[i] = t2[i]; + out3[i] = t3[i]; + } + } +} + + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_shake256x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen) { + __m256i s[25]; + unsigned char t0[SHAKE256_RATE]; + unsigned char t1[SHAKE256_RATE]; + unsigned char t2[SHAKE256_RATE]; + unsigned char t3[SHAKE256_RATE]; + unsigned int i; + + /* zero state */ + for (i = 0; i < 25; i++) { + s[i] = _mm256_xor_si256(s[i], s[i]); + } + + /* absorb 4 message of identical length in parallel */ + keccak_absorb4x(s, SHAKE256_RATE, in0, in1, in2, in3, (size_t)inlen, 0x1F); + + /* Squeeze output */ + keccak_squeezeblocks4x(out0, out1, out2, out3, outlen / SHAKE256_RATE, s, SHAKE256_RATE); + + out0 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out1 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out2 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out3 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + + if (outlen % SHAKE256_RATE) { + keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE256_RATE); + for (i = 0; i < outlen % SHAKE256_RATE; i++) { + out0[i] = t0[i]; + out1[i] = t1[i]; + out2[i] = t2[i]; + out3[i] = t3[i]; + } + } +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/fips202x4.h b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/fips202x4.h new file mode 100644 index 00000000..e628d87f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/fips202x4.h @@ -0,0 +1,27 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FIPS202X4_H +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FIPS202X4_H + +#include + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_shake128x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, unsigned long long inlen); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_shake256x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/fors.c b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/fors.c new file mode 100644 index 00000000..4621bef6 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/fors.c @@ -0,0 +1,206 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hashx4.h" +#include "thash.h" +#include "thashx4.h" +#include "utils.h" +#include "utilsx4.h" + +static void fors_gen_skx4(unsigned char *sk0, + unsigned char *sk1, + unsigned char *sk2, + unsigned char *sk3, const unsigned char *sk_seed, + uint32_t fors_leaf_addrx4[4 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_prf_addrx4(sk0, sk1, sk2, sk3, sk_seed, fors_leaf_addrx4, state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thash_1(leaf, sk, pub_seed, fors_leaf_addr, state_seeded); +} + +static void fors_sk_to_leafx4(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + const unsigned char *sk0, + const unsigned char *sk1, + const unsigned char *sk2, + const unsigned char *sk3, + const unsigned char *pub_seed, + uint32_t fors_leaf_addrx4[4 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thashx4_1(leaf0, leaf1, leaf2, leaf3, + sk0, sk1, sk2, sk3, pub_seed, fors_leaf_addrx4, state_seeded); +} + +static void fors_gen_leafx4(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx0, + uint32_t addr_idx1, + uint32_t addr_idx2, + uint32_t addr_idx3, + const uint32_t fors_tree_addr[8], + const hash_state *state_seeded) { + uint32_t fors_leaf_addrx4[4 * 8] = {0}; + unsigned int j; + + /* Only copy the parts that must be kept in fors_leaf_addrx4. */ + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_copy_keypair_addr(fors_leaf_addrx4 + j * 8, fors_tree_addr); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_type(fors_leaf_addrx4 + j * 8, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx4 + 0 * 8, addr_idx0); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx4 + 1 * 8, addr_idx1); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx4 + 2 * 8, addr_idx2); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_tree_index(fors_leaf_addrx4 + 3 * 8, addr_idx3); + + fors_gen_skx4(leaf0, leaf1, leaf2, leaf3, sk_seed, fors_leaf_addrx4, state_seeded); + fors_sk_to_leafx4(leaf0, leaf1, leaf2, leaf3, + leaf0, leaf1, leaf2, leaf3, pub_seed, fors_leaf_addrx4, state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_fors_sign(unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *state_seeded) { + /* Round up to multiple of 4 to prevent out-of-bounds for x4 parallelism */ + uint32_t indices[(PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_TREES + 3) & ~3] = {0}; + unsigned char roots[((PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_TREES + 3) & ~3) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N]; + /* Sign to a buffer, since we may not have a nice multiple of 4 and would + otherwise overrun the signature. */ + unsigned char sigbufx4[4 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N * (1 + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_HEIGHT)]; + uint32_t fors_tree_addrx4[4 * 8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset[4] = {0}; + unsigned int i, j; + + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_copy_keypair_addr(fors_tree_addrx4 + j * 8, fors_addr); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_type(fors_tree_addrx4 + j * 8, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < ((PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_TREES + 3) & ~0x3); i += 4) { + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_TREES) { + idx_offset[j] = (i + j) * (1 << PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_tree_height(fors_tree_addrx4 + j * 8, 0); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_tree_index(fors_tree_addrx4 + j * 8, + indices[i + j] + idx_offset[j]); + } + } + + /* Include the secret key part that produces the selected leaf nodes. */ + fors_gen_skx4(sigbufx4 + 0 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, + sigbufx4 + 1 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, + sigbufx4 + 2 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, + sigbufx4 + 3 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, + sk_seed, fors_tree_addrx4, state_seeded); + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_treehashx4_FORS_HEIGHT(roots + i * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, sigbufx4 + 4 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, sk_seed, pub_seed, + &indices[i], idx_offset, fors_gen_leafx4, fors_tree_addrx4, + state_seeded); + + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_TREES) { + memcpy(sig, sigbufx4 + j * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); + memcpy(sig + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, + sigbufx4 + 4 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + j * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_HEIGHT, + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_HEIGHT); + sig += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N * (1 + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_HEIGHT); + } + } + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_fors_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_compute_root(roots + i * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, leaf, indices[i], idx_offset, + sig, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_HEIGHT, pub_seed, fors_tree_addr, + state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/fors.h b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/fors.h new file mode 100644 index 00000000..6159e0f1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_H +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/hash.h b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/hash.h new file mode 100644 index 00000000..5c46abac --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_HASH_H +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/hash_shake256.c b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/hash_shake256.c new file mode 100644 index 00000000..58e27a87 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/hash_shake256.c @@ -0,0 +1,106 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "fips202.h" + +/* For SHAKE256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, // NOLINT(readability-non-const-parameter) + const unsigned char *pub_seed, const unsigned char *sk_seed) { + (void)hash_state_seeded; /* Suppress an 'unused parameter' warning. */ + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* This is not necessary for SHAKE256, so we don't do anything */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_destroy_hash_function( + hash_state *hash_state_seeded) { // NOLINT(readability-non-const-parameter) + (void)hash_state_seeded; +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N bytes and an address + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, addr); + + shake256(out, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_BYTES); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed and an + * optional randomization value as well as the message. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, sk_prf, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); + shake256_inc_absorb(&state, optrand, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(R, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, &state); + shake256_inc_ctx_release(&state); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_TREE_BITS (PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_TREE_HEIGHT * (PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_D - 1)) +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_TREE_BYTES ((PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_LEAF_BITS PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_LEAF_BYTES ((PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_DGST_BYTES (PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_TREE_BYTES + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_LEAF_BYTES) + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_DGST_BYTES]; + unsigned char *bufp = buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, R, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); + shake256_inc_absorb(&state, pk, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_PK_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_DGST_BYTES, &state); + shake256_inc_ctx_release(&state); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/hash_shake256x4.c b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/hash_shake256x4.c new file mode 100644 index 00000000..1ae12df3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/hash_shake256x4.c @@ -0,0 +1,38 @@ +#include +#include + +#include "address.h" +#include "fips202x4.h" +#include "hashx4.h" +#include "params.h" + +/* + * 4-way parallel version of prf_addr; takes 4x as much input and output + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_prf_addrx4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + const unsigned char *key, + const uint32_t addrx4[4 * 8], + const hash_state *state_seeded) { + unsigned char bufx4[4 * (PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_BYTES)]; + unsigned int j; + + for (j = 0; j < 4; j++) { + memcpy(bufx4 + j * (PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_BYTES), key, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_addr_to_bytes(bufx4 + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + j * (PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_BYTES), addrx4 + j * 8); + } + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_shake256x4(out0, + out1, + out2, + out3, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, + bufx4 + 0 * (PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_BYTES), + bufx4 + 1 * (PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_BYTES), + bufx4 + 2 * (PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_BYTES), + bufx4 + 3 * (PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_BYTES), PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_BYTES); + + /* Avoid unused parameter warning */ + (void)state_seeded; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/hash_state.h b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/hash_state.h new file mode 100644 index 00000000..180de807 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/hash_state.h @@ -0,0 +1,30 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_HASH_STATE_H +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * SHAKE256 does not need this state. Because this implementation is generated + * from a shared code base, we still need to specify some hash_state as it is + * still passed around. We chose to use an `int` as a placeholder for this + * purpose. + */ + +typedef int hash_state; + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/hashx4.h b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/hashx4.h new file mode 100644 index 00000000..557f1131 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/hashx4.h @@ -0,0 +1,16 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_HASHX4_H +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_HASHX4_H + +#include + +#include "hash_state.h" + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_prf_addrx4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + const unsigned char *key, + const uint32_t addrx4[4 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/params.h b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/params.h new file mode 100644 index 00000000..9dba4c5d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_PARAMS_H +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N 32 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FULL_HEIGHT 68 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_D 17 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_HEIGHT 10 +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_TREES 30 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N / PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LEN (PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LEN1 + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_BYTES (PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_PK_BYTES PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_TREE_HEIGHT (PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FULL_HEIGHT / PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_BYTES ((PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_PK_BYTES PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_BYTES (PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_BYTES + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_D * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FULL_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_PK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_SK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/sign.c b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/sign.c new file mode 100644 index 00000000..7cc68e7c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/sign.c @@ -0,0 +1,409 @@ +#include +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_D - 1); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, sk + 3 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); + + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + + // guarantee alignment of pk + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES / 16]; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + + // guarantee alignment of sk + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES / 16]; + uint8_t sk[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES]; + } aligned_sk; + + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_CRYPTO_SEEDBYTES / 16]; + uint8_t seed[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_CRYPTO_SEEDBYTES]; + } aligned_seed; + randombytes(aligned_seed.seed, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_CRYPTO_SEEDBYTES); + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_crypto_sign_seed_keypair( + aligned_pk.pk, aligned_sk.sk, aligned_seed.seed); + memcpy(pk, aligned_pk.pk, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES); + memcpy(sk, aligned_sk.sk, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + // guarantee alignment of sk + union { + __m128 *_x; + uint8_t sk[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES]; + } aligned_sk; + memcpy(aligned_sk.sk, sk, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES); + sk = aligned_sk.sk; + + // guarantee alignment of sig + union { + __m128 *_x; + uint8_t sig[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_BYTES]; + } aligned_sig; + uint8_t *orig_sig = sig; + sig = (uint8_t *)aligned_sig.sig; + + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N; + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_D; i++) { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_TREE_HEIGHT; + } + + memcpy(orig_sig, aligned_sig.sig, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_BYTES); + *siglen = PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_BYTES; + + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + // guarantee alignment of pk + union { + __m128 *_x; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + memcpy(aligned_pk.pk, pk, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES); + pk = aligned_pk.pk; + + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_D; i++) { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_TREE_HEIGHT; + } + + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + + // guarantee alignment of pk + union { + __m128 *_x; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + memcpy(aligned_pk.pk, pk, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES); + pk = aligned_pk.pk; + + + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_BYTES; + + if (PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_BYTES, sm + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/thash.h b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/thash.h new file mode 100644 index 00000000..cadd0a4e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_THASH_H +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/thash_shake256_simple.c b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/thash_shake256_simple.c new file mode 100644 index 00000000..59987e6d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/thash_shake256_simple.c @@ -0,0 +1,74 @@ +#include +#include + +#include "address.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" + +#include "fips202.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N bytes. + */ +static void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8]) { + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, addr); + memcpy(buf + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_BYTES, in, inblocks * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); + + shake256(out, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thash( + out, buf, in, 1, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thash( + out, buf, in, 2, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LEN, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_TREES, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/thash_shake256_simplex4.c b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/thash_shake256_simplex4.c new file mode 100644 index 00000000..43358518 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/thash_shake256_simplex4.c @@ -0,0 +1,47 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thashx4.h" + +#include "fips202x4.h" + +/** + * 4-way parallel version of thash; takes 4x as much input and output + */ +#define thashx4_variant(name, inblocks) \ + void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thashx4_##name( \ + unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, \ + const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, \ + const unsigned char *in3, const unsigned char *pub_seed, uint32_t addrx4[4 * 8], \ + const hash_state *state_seeded) { \ + unsigned char buf0[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N]; \ + unsigned char buf1[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N]; \ + unsigned char buf2[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N]; \ + unsigned char buf3[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N]; \ + \ + memcpy(buf0, pub_seed, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); \ + memcpy(buf1, pub_seed, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); \ + memcpy(buf2, pub_seed, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); \ + memcpy(buf3, pub_seed, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); \ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_addr_to_bytes(buf0 + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, addrx4 + 0 * 8); \ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_addr_to_bytes(buf1 + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, addrx4 + 1 * 8); \ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_addr_to_bytes(buf2 + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, addrx4 + 2 * 8); \ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_addr_to_bytes(buf3 + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, addrx4 + 3 * 8); \ + memcpy(buf0 + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_BYTES, in0, (inblocks)*PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); \ + memcpy(buf1 + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_BYTES, in1, (inblocks)*PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); \ + memcpy(buf2 + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_BYTES, in2, (inblocks)*PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); \ + memcpy(buf3 + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_BYTES, in3, (inblocks)*PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); \ + \ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_shake256x4(out0, out1, out2, out3, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, buf0, buf1, buf2, buf3, \ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); \ + \ + /* Avoid unused parameter warning */ \ + (void)state_seeded; \ + } + +thashx4_variant(1, 1) +thashx4_variant(2, 2) +thashx4_variant(WOTS_LEN, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LEN) +thashx4_variant(FORS_TREES, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_TREES) diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/thashx4.h b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/thashx4.h new file mode 100644 index 00000000..58a5b981 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/thashx4.h @@ -0,0 +1,25 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_THASHX4_H +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_THASHX4_H + +#include + +#include "hash_state.h" + +#define thashx4_header(inblocks) \ + void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thashx4_##inblocks(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *pub_seed, uint32_t addrx4[4*8], \ + const hash_state *state_seeded) + +thashx4_header(1); +thashx4_header(2); +thashx4_header(WOTS_LEN); +thashx4_header(FORS_TREES); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/utils.c b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/utils.c new file mode 100644 index 00000000..479907c2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, leaf, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, auth_path, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thash_2( + buffer + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); + } else { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, auth_path, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/utils.h b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/utils.h new file mode 100644 index 00000000..52afc752 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_UTILS_H +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/utilsx4.c b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/utilsx4.c new file mode 100644 index 00000000..5e4fb606 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/utilsx4.c @@ -0,0 +1,98 @@ +#include "address.h" +#include "params.h" +#include "thashx4.h" +#include "utils.h" +#include "utilsx4.h" + +#include + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +#define treehashx4_variant(name, tree_height) \ + void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_treehashx4_##name( \ + unsigned char *rootx4, unsigned char *auth_pathx4, const unsigned char *sk_seed, \ + const unsigned char *pub_seed, const uint32_t leaf_idx[4], uint32_t idx_offset[4], \ + void (*gen_leafx4)(unsigned char * /* leaf0 */, unsigned char * /* leaf1 */, \ + unsigned char * /* leaf2 */, unsigned char * /* leaf3 */, \ + const unsigned char * /* sk_seed */, \ + const unsigned char * /* pub_seed */, uint32_t /* addr_idx0 */, \ + uint32_t /* addr_idx1 */, uint32_t /* addr_idx2 */, \ + uint32_t /* addr_idx3 */, const uint32_t[8] /* tree_addr */, \ + const hash_state * /* state_seeded */), \ + uint32_t tree_addrx4[4 * 8], const hash_state *state_seeded) { \ + unsigned char stackx4[4 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N]; \ + unsigned int heights[(tree_height) + 1]; \ + unsigned int offset = 0; \ + uint32_t idx; \ + uint32_t tree_idx; \ + unsigned int j; \ + \ + for (idx = 0; idx < (uint32_t)(1 << (tree_height)); idx++) { \ + /* Add the next leaf node to the stack. */ \ + gen_leafx4(stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, sk_seed, \ + pub_seed, idx + idx_offset[0], idx + idx_offset[1], idx + idx_offset[2], \ + idx + idx_offset[3], tree_addrx4, state_seeded); \ + offset++; \ + heights[offset - 1] = 0; \ + \ + /* If this is a node we need for the auth path.. */ \ + for (j = 0; j < 4; j++) { \ + if ((leaf_idx[j] ^ 0x1) == idx) { \ + memcpy(auth_pathx4 + j * (tree_height)*PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, \ + stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + (offset - 1) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, \ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); \ + } \ + } \ + \ + /* While the top-most nodes are of equal height.. */ \ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { \ + /* Compute index of the new node, in the next layer. */ \ + tree_idx = (idx >> (heights[offset - 1] + 1)); \ + \ + /* Set the address of the node we're creating. */ \ + for (j = 0; j < 4; j++) { \ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_tree_height(tree_addrx4 + j * 8, heights[offset - 1] + 1); \ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_tree_index(tree_addrx4 + j * 8, \ + tree_idx + (idx_offset[j] >> (heights[offset - 1] + 1))); \ + } \ + /* Hash the top-most nodes from the stack together. */ \ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thashx4_2(stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, \ + stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, \ + pub_seed, tree_addrx4, state_seeded); \ + offset--; \ + /* Note that the top-most node is now one layer higher. */ \ + heights[offset - 1]++; \ + \ + /* If this is a node we need for the auth path.. */ \ + for (j = 0; j < 4; j++) { \ + if (((leaf_idx[j] >> heights[offset - 1]) ^ 0x1) == tree_idx) { \ + memcpy(auth_pathx4 + j * (tree_height)*PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + \ + heights[offset - 1] * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, \ + stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N + (offset - 1) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, \ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); \ + } \ + } \ + } \ + } \ + \ + for (j = 0; j < 4; j++) { \ + memcpy(rootx4 + j * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); \ + } \ + } + +treehashx4_variant(FORS_HEIGHT, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_FORS_HEIGHT) diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/utilsx4.h b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/utilsx4.h new file mode 100644 index 00000000..ce994c7c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/utilsx4.h @@ -0,0 +1,38 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_UTILSX4_H +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_UTILSX4_H + +#include "hash_state.h" +#include "params.h" + +#include + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_treehashx4_FORS_HEIGHT(unsigned char *rootx4, + unsigned char *auth_pathx4, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + const uint32_t leaf_idx[4], + uint32_t idx_offset[4], + void (*gen_leafx4)(unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx4[4 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/wots.c b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/wots.c new file mode 100644 index 00000000..7d1b6b95 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/wots.c @@ -0,0 +1,240 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "hashx4.h" +#include "params.h" +#include "thash.h" +#include "thashx4.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], const hash_state *state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_prf_addr(sk, sk_seed, wots_addr, state_seeded); +} + +/** + * 4-way parallel version of wots_gen_sk; expects 4x as much space in sk + */ +static void wots_gen_skx4(unsigned char *skx4, const unsigned char *sk_seed, + uint32_t wots_addrx4[4 * 8], const hash_state *state_seeded) { + unsigned int j; + + /* Make sure that the hash address is actually zeroed. */ + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_hash_addr(wots_addrx4 + j * 8, 0); + } + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_prf_addrx4(skx4 + 0 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, + skx4 + 1 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, + skx4 + 2 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, + skx4 + 3 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, + sk_seed, wots_addrx4, + state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_W; i++) { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thash_1(out, out, pub_seed, addr, state_seeded); + } +} + +/** + * 4-way parallel version of gen_chain; expects 4x as much space in out, and + * 4x as much space in inx4. Assumes start and step identical across chains. + */ +static void gen_chainx4(unsigned char *outx4, const unsigned char *inx4, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addrx4[4 * 8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + /* Initialize outx4 with the value at position 'start'. */ + memcpy(outx4, inx4, 4 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_W; i++) { + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_hash_addr(addrx4 + j * 8, i); + } + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_thashx4_1(outx4 + 0 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, + outx4 + 1 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, + outx4 + 2 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, + outx4 + 3 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, + outx4 + 0 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, + outx4 + 1 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, + outx4 + 2 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, + outx4 + 3 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, + pub_seed, addrx4, + state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { + int in = 0; + int out = 0; + unsigned char total = 0; + int bits = 0; + int consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LOGW; + output[out] = (unsigned int)(total >> bits) & (PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_W - 1); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_wots_gen_pk(unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + uint32_t addrx4[4 * 8]; + unsigned char pkbuf[4 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N]; + + for (j = 0; j < 4; j++) { + memcpy(addrx4 + j * 8, addr, sizeof(uint32_t) * 8); + } + + /* The last iteration typically does not have complete set of 4 chains, + but because we use pkbuf, this is not an issue -- we still do as many + in parallel as possible. */ + for (i = 0; i < ((PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LEN + 3) & ~0x3); i += 4) { + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_chain_addr(addrx4 + j * 8, i + j); + } + wots_gen_skx4(pkbuf, sk_seed, addrx4, state_seeded); + gen_chainx4(pkbuf, pkbuf, 0, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_W - 1, pub_seed, addrx4, state_seeded); + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LEN) { + memcpy(pk + (i + j)*PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, pkbuf + j * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N); + } + } + } + + // Get rid of unused argument variable. + (void)state_seeded; +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_wots_sign(unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, sk_seed, addr, state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, sig + i * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, 0, lengths[i], pub_seed, addr, state_seeded); + } + + // avoid unused argument + (void)state_seeded; +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_wots_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, sig + i * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_N, + lengths[i], PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_W - 1 - lengths[i], pub_seed, addr, + state_seeded); + } + + // avoid unused argument + (void)state_seeded; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/wots.h b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/wots.h new file mode 100644 index 00000000..69f6c9db --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/avx2/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_H +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_AVX2_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/LICENSE b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..1d3039dd --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-shake256-256f-simple_clean.lib +OBJECTS=address.obj wots.obj utils.obj fors.obj sign.obj hash_shake256.obj thash_shake256_simple.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/address.c b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/address.c new file mode 100644 index 00000000..9153447f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/address.h b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/address.h new file mode 100644 index 00000000..b665dd66 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDRESS_H +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/api.h b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/api.h new file mode 100644 index 00000000..fa6dac07 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_API_H +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_CRYPTO_SECRETKEYBYTES 128 +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_CRYPTO_PUBLICKEYBYTES 64 +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_CRYPTO_BYTES 49216 +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_CRYPTO_SEEDBYTES 96 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/fors.c b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/fors.c new file mode 100644 index 00000000..0dd068c0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/fors.c @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "thash.h" +#include "utils.h" + +static void fors_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t fors_leaf_addr[8], const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_prf_addr( + sk, sk_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_thash_1( + leaf, sk, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t fors_tree_addr[8], + const hash_state *hash_state_seeded) { + uint32_t fors_leaf_addr[8] = {0}; + + /* Only copy the parts that must be kept in fors_leaf_addr. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_copy_keypair_addr( + fors_leaf_addr, fors_tree_addr); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_type( + fors_leaf_addr, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_tree_index( + fors_leaf_addr, addr_idx); + + fors_gen_sk(leaf, sk_seed, fors_leaf_addr, hash_state_seeded); + fors_sk_to_leaf(leaf, leaf, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_copy_keypair_addr( + fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_copy_keypair_addr( + fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_type( + fors_tree_addr, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_type( + fors_pk_addr, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_tree_height( + fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_tree_index( + fors_tree_addr, indices[i] + idx_offset); + + /* Include the secret key part that produces the selected leaf node. */ + fors_gen_sk(sig, sk_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N; + + /* Compute the authentication path for this leaf node. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_treehash_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, sig, sk_seed, pub_seed, + indices[i], idx_offset, fors_gen_leaf, fors_tree_addr, + hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_thash_FORS_TREES( + pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_compute_root(roots + i * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, leaf, indices[i], idx_offset, sig, + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_HEIGHT, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/fors.h b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/fors.h new file mode 100644 index 00000000..2cb13e99 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_H +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/hash.h b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/hash.h new file mode 100644 index 00000000..ab6f5ac2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_HASH_H +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/hash_shake256.c b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/hash_shake256.c new file mode 100644 index 00000000..dfc285bf --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/hash_shake256.c @@ -0,0 +1,106 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "fips202.h" + +/* For SHAKE256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, // NOLINT(readability-non-const-parameter) + const unsigned char *pub_seed, const unsigned char *sk_seed) { + (void)hash_state_seeded; /* Suppress an 'unused parameter' warning. */ + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* This is not necessary for SHAKE256, so we don't do anything */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_destroy_hash_function( + hash_state *hash_state_seeded) { // NOLINT(readability-non-const-parameter) + (void)hash_state_seeded; +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N bytes and an address + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, addr); + + shake256(out, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_BYTES); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed and an + * optional randomization value as well as the message. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, sk_prf, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N); + shake256_inc_absorb(&state, optrand, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(R, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, &state); + shake256_inc_ctx_release(&state); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_TREE_BITS (PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_TREE_HEIGHT * (PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_D - 1)) +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_TREE_BYTES ((PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_LEAF_BITS PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_LEAF_BYTES ((PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_DGST_BYTES (PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_TREE_BYTES + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_LEAF_BYTES) + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_DGST_BYTES]; + unsigned char *bufp = buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, R, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N); + shake256_inc_absorb(&state, pk, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_PK_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_DGST_BYTES, &state); + shake256_inc_ctx_release(&state); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/hash_state.h b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/hash_state.h new file mode 100644 index 00000000..7d92ef87 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/hash_state.h @@ -0,0 +1,30 @@ +#ifndef SPX_HASH_STATE_H +#define SPX_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * SHAKE256 does not need this state. Because this implementation is generated + * from a shared code base, we still need to specify some hash_state as it is + * still passed around. We chose to use an `int` as a placeholder for this + * purpose. + */ + +#define hash_state int + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/params.h b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/params.h new file mode 100644 index 00000000..ad979313 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_PARAMS_H +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N 32 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FULL_HEIGHT 68 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_D 17 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_HEIGHT 10 +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_TREES 30 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N / PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_LEN (PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_LEN1 + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_BYTES (PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_PK_BYTES PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_TREE_HEIGHT (PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FULL_HEIGHT / PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_BYTES ((PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_PK_BYTES PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_BYTES (PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_BYTES + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_D * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FULL_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_PK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_SK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/sign.c b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/sign.c new file mode 100644 index 00000000..f459a3a2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_D - 1); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, sk + 3 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N); + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N; + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_BYTES; + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_BYTES; + + if (PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_BYTES, sm + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/thash.h b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/thash.h new file mode 100644 index 00000000..59aeb87c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_THASH_H +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/thash_shake256_simple.c b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/thash_shake256_simple.c new file mode 100644 index 00000000..7e0aec01 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/thash_shake256_simple.c @@ -0,0 +1,74 @@ +#include +#include + +#include "address.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" + +#include "fips202.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N bytes. + */ +static void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8]) { + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, addr); + memcpy(buf + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_BYTES, in, inblocks * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N); + + shake256(out, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_thash( + out, buf, in, 1, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_thash( + out, buf, in, 2, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_LEN, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_TREES, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/utils.c b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/utils.c new file mode 100644 index 00000000..1bdf401f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, leaf, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_thash_2( + buffer + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N); + } else { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/utils.h b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/utils.h new file mode 100644 index 00000000..5f5fe270 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_UTILS_H +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/wots.c b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/wots.c new file mode 100644 index 00000000..dec1ad17 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/wots.c @@ -0,0 +1,167 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], + const hash_state *hash_state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_prf_addr(sk, sk_seed, wots_addr, hash_state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_W; i++) { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_thash_1( + out, out, pub_seed, addr, hash_state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const size_t out_len, + const unsigned char *input) { + size_t in = 0; + size_t out = 0; + unsigned char total = 0; + unsigned int bits = 0; + size_t consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_LOGW; + output[out] = (unsigned int)((total >> bits) & (PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_W - 1)); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, + const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_ull_to_bytes( + csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(pk + i * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, pk + i * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, + 0, PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_W - 1, pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, 0, lengths[i], pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_N, + lengths[i], PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_W - 1 - lengths[i], pub_seed, addr, + hash_state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/wots.h b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/wots.h new file mode 100644 index 00000000..2074f0cf --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256f-simple/clean/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_H +#define PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256FSIMPLE_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/META.yml b/crypto_sign/sphincs/sphincs-shake256-256s-robust/META.yml new file mode 100644 index 00000000..75f4a9e0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/META.yml @@ -0,0 +1,35 @@ +name: SPHINCS+ +type: signature +claimed-nist-level: 5 +length-public-key: 64 +length-secret-key: 128 +length-signature: 29792 +testvectors-sha256: eea62308d71394a888e05128f078c4663dc83e128c34e0300bb16cb839d8698b +nistkat-sha256: 09004dba03b2a190a327b5404a4d75c663f025703253b78946d0a99ca1492d6f +principal-submitters: + - Andreas Hülsing +auxiliary-submitters: + - Jean-Philippe Aumasson + - Daniel J. Bernstein, + - Christoph Dobraunig + - Maria Eichlseder + - Scott Fluhrer + - Stefan-Lukas Gazdag + - Panos Kampanakis + - Stefan Kölbl + - Tanja Lange + - Martin M. Lauridsen + - Florian Mendel + - Ruben Niederhagen + - Christian Rechberger + - Joost Rijneveld + - Peter Schwabe +implementations: + - name: clean + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + - name: avx2 + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + supported_platforms: + - architecture: x86_64 + required_flags: + - avx2 diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/LICENSE b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/Makefile.Microsoft_nmake new file mode 100644 index 00000000..f8d7b1db --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/Makefile.Microsoft_nmake @@ -0,0 +1,27 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-shake256-256s-robust_avx2.lib +OBJECTS=address.obj wots.obj utils.obj utilsx4.obj fips202x4.obj fors.obj sign.obj hash_shake256.obj thash_shake256_robust.obj hash_shake256x4.obj thash_shake256_robustx4.obj + +KECCAK4XDIR=..\..\..\common\keccak4x +KECCAK4XOBJ=KeccakP-1600-times4-SIMD256.obj +KECCAK4X=$(KECCAK4XDIR)\$(KECCAK4XOBJ) + +CFLAGS=/nologo /arch:AVX2 /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) $(KECCAK4X) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +$(KECCAK4X): + cd $(KECCAK4XDIR) && $(MAKE) /f Makefile.Microsoft_nmake $(KECCAK4XOBJ) + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) + -DEL $(KECCAK4X) diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/address.c b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/address.c new file mode 100644 index 00000000..b9f84148 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/address.h b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/address.h new file mode 100644 index 00000000..b0a8cc07 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDRESS_H +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/api.h b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/api.h new file mode 100644 index 00000000..6894901a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_API_H +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_CRYPTO_SECRETKEYBYTES 128 +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES 64 +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_CRYPTO_BYTES 29792 +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_CRYPTO_SEEDBYTES 96 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/fips202x4.c b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/fips202x4.c new file mode 100644 index 00000000..06847569 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/fips202x4.c @@ -0,0 +1,205 @@ +#include +#include +#include + +#include "fips202.h" +#include "fips202x4.h" + +#define NROUNDS 24 +#define ROL(a, offset) (((a) << (offset)) ^ ((a) >> (64-(offset)))) + +static uint64_t load64(const unsigned char *x) { + unsigned long long r = 0, i; + + for (i = 0; i < 8; ++i) { + r |= (unsigned long long)x[i] << 8 * i; + } + return r; +} + +static void store64(uint8_t *x, uint64_t u) { + unsigned int i; + + for (i = 0; i < 8; ++i) { + x[i] = (uint8_t)u; + u >>= 8; + } +} + +/* Use implementation from the Keccak Code Package */ +extern void KeccakP1600times4_PermuteAll_24rounds(__m256i *s); +#define KeccakF1600_StatePermute4x KeccakP1600times4_PermuteAll_24rounds + +static void keccak_absorb4x(__m256i *s, + unsigned int r, + const unsigned char *m0, + const unsigned char *m1, + const unsigned char *m2, + const unsigned char *m3, + size_t mlen, + unsigned char p) { + unsigned char t0[200] = {0}; + unsigned char t1[200] = {0}; + unsigned char t2[200] = {0}; + unsigned char t3[200] = {0}; + + unsigned long long *ss = (unsigned long long *)s; + + + while (mlen >= r) { + for (size_t i = 0; i < r / 8; ++i) { + ss[4 * i + 0] ^= load64(m0 + 8 * i); + ss[4 * i + 1] ^= load64(m1 + 8 * i); + ss[4 * i + 2] ^= load64(m2 + 8 * i); + ss[4 * i + 3] ^= load64(m3 + 8 * i); + } + + KeccakF1600_StatePermute4x(s); + mlen -= r; + m0 += r; + m1 += r; + m2 += r; + m3 += r; + } + + memcpy(t0, m0, mlen); + memcpy(t1, m1, mlen); + memcpy(t2, m2, mlen); + memcpy(t3, m3, mlen); + + t0[mlen] = p; + t1[mlen] = p; + t2[mlen] = p; + t3[mlen] = p; + + t0[r - 1] |= 128; + t1[r - 1] |= 128; + t2[r - 1] |= 128; + t3[r - 1] |= 128; + + for (size_t i = 0; i < r / 8; ++i) { + ss[4 * i + 0] ^= load64(t0 + 8 * i); + ss[4 * i + 1] ^= load64(t1 + 8 * i); + ss[4 * i + 2] ^= load64(t2 + 8 * i); + ss[4 * i + 3] ^= load64(t3 + 8 * i); + } +} + + +static void keccak_squeezeblocks4x(unsigned char *h0, + unsigned char *h1, + unsigned char *h2, + unsigned char *h3, + unsigned long long int nblocks, + __m256i *s, + unsigned int r) { + unsigned int i; + + unsigned long long *ss = (unsigned long long *)s; + + while (nblocks > 0) { + KeccakF1600_StatePermute4x(s); + for (i = 0; i < (r >> 3); i++) { + store64(h0 + 8 * i, ss[4 * i + 0]); + store64(h1 + 8 * i, ss[4 * i + 1]); + store64(h2 + 8 * i, ss[4 * i + 2]); + store64(h3 + 8 * i, ss[4 * i + 3]); + } + h0 += r; + h1 += r; + h2 += r; + h3 += r; + nblocks--; + } +} + + + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_shake128x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen) { + __m256i s[25]; + unsigned char t0[SHAKE128_RATE]; + unsigned char t1[SHAKE128_RATE]; + unsigned char t2[SHAKE128_RATE]; + unsigned char t3[SHAKE128_RATE]; + unsigned int i; + + /* zero state */ + for (i = 0; i < 25; i++) { + s[i] = _mm256_xor_si256(s[i], s[i]); + } + + /* absorb 4 message of identical length in parallel */ + keccak_absorb4x(s, SHAKE128_RATE, in0, in1, in2, in3, (size_t)inlen, 0x1F); + + /* Squeeze output */ + keccak_squeezeblocks4x(out0, out1, out2, out3, outlen / SHAKE128_RATE, s, SHAKE128_RATE); + + out0 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out1 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out2 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out3 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + + if (outlen % SHAKE128_RATE) { + keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE128_RATE); + for (i = 0; i < outlen % SHAKE128_RATE; i++) { + out0[i] = t0[i]; + out1[i] = t1[i]; + out2[i] = t2[i]; + out3[i] = t3[i]; + } + } +} + + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_shake256x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen) { + __m256i s[25]; + unsigned char t0[SHAKE256_RATE]; + unsigned char t1[SHAKE256_RATE]; + unsigned char t2[SHAKE256_RATE]; + unsigned char t3[SHAKE256_RATE]; + unsigned int i; + + /* zero state */ + for (i = 0; i < 25; i++) { + s[i] = _mm256_xor_si256(s[i], s[i]); + } + + /* absorb 4 message of identical length in parallel */ + keccak_absorb4x(s, SHAKE256_RATE, in0, in1, in2, in3, (size_t)inlen, 0x1F); + + /* Squeeze output */ + keccak_squeezeblocks4x(out0, out1, out2, out3, outlen / SHAKE256_RATE, s, SHAKE256_RATE); + + out0 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out1 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out2 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out3 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + + if (outlen % SHAKE256_RATE) { + keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE256_RATE); + for (i = 0; i < outlen % SHAKE256_RATE; i++) { + out0[i] = t0[i]; + out1[i] = t1[i]; + out2[i] = t2[i]; + out3[i] = t3[i]; + } + } +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/fips202x4.h b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/fips202x4.h new file mode 100644 index 00000000..daa02fe2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/fips202x4.h @@ -0,0 +1,27 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FIPS202X4_H +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FIPS202X4_H + +#include + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_shake128x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, unsigned long long inlen); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_shake256x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/fors.c b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/fors.c new file mode 100644 index 00000000..ec5a0838 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/fors.c @@ -0,0 +1,206 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hashx4.h" +#include "thash.h" +#include "thashx4.h" +#include "utils.h" +#include "utilsx4.h" + +static void fors_gen_skx4(unsigned char *sk0, + unsigned char *sk1, + unsigned char *sk2, + unsigned char *sk3, const unsigned char *sk_seed, + uint32_t fors_leaf_addrx4[4 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_prf_addrx4(sk0, sk1, sk2, sk3, sk_seed, fors_leaf_addrx4, state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thash_1(leaf, sk, pub_seed, fors_leaf_addr, state_seeded); +} + +static void fors_sk_to_leafx4(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + const unsigned char *sk0, + const unsigned char *sk1, + const unsigned char *sk2, + const unsigned char *sk3, + const unsigned char *pub_seed, + uint32_t fors_leaf_addrx4[4 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thashx4_1(leaf0, leaf1, leaf2, leaf3, + sk0, sk1, sk2, sk3, pub_seed, fors_leaf_addrx4, state_seeded); +} + +static void fors_gen_leafx4(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx0, + uint32_t addr_idx1, + uint32_t addr_idx2, + uint32_t addr_idx3, + const uint32_t fors_tree_addr[8], + const hash_state *state_seeded) { + uint32_t fors_leaf_addrx4[4 * 8] = {0}; + unsigned int j; + + /* Only copy the parts that must be kept in fors_leaf_addrx4. */ + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_copy_keypair_addr(fors_leaf_addrx4 + j * 8, fors_tree_addr); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_type(fors_leaf_addrx4 + j * 8, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_tree_index(fors_leaf_addrx4 + 0 * 8, addr_idx0); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_tree_index(fors_leaf_addrx4 + 1 * 8, addr_idx1); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_tree_index(fors_leaf_addrx4 + 2 * 8, addr_idx2); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_tree_index(fors_leaf_addrx4 + 3 * 8, addr_idx3); + + fors_gen_skx4(leaf0, leaf1, leaf2, leaf3, sk_seed, fors_leaf_addrx4, state_seeded); + fors_sk_to_leafx4(leaf0, leaf1, leaf2, leaf3, + leaf0, leaf1, leaf2, leaf3, pub_seed, fors_leaf_addrx4, state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_fors_sign(unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *state_seeded) { + /* Round up to multiple of 4 to prevent out-of-bounds for x4 parallelism */ + uint32_t indices[(PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_TREES + 3) & ~3] = {0}; + unsigned char roots[((PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_TREES + 3) & ~3) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N]; + /* Sign to a buffer, since we may not have a nice multiple of 4 and would + otherwise overrun the signature. */ + unsigned char sigbufx4[4 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N * (1 + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_HEIGHT)]; + uint32_t fors_tree_addrx4[4 * 8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset[4] = {0}; + unsigned int i, j; + + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_copy_keypair_addr(fors_tree_addrx4 + j * 8, fors_addr); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_type(fors_tree_addrx4 + j * 8, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < ((PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_TREES + 3) & ~0x3); i += 4) { + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_TREES) { + idx_offset[j] = (i + j) * (1 << PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_tree_height(fors_tree_addrx4 + j * 8, 0); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_tree_index(fors_tree_addrx4 + j * 8, + indices[i + j] + idx_offset[j]); + } + } + + /* Include the secret key part that produces the selected leaf nodes. */ + fors_gen_skx4(sigbufx4 + 0 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, + sigbufx4 + 1 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, + sigbufx4 + 2 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, + sigbufx4 + 3 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, + sk_seed, fors_tree_addrx4, state_seeded); + + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_treehashx4_FORS_HEIGHT(roots + i * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, sigbufx4 + 4 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, sk_seed, pub_seed, + &indices[i], idx_offset, fors_gen_leafx4, fors_tree_addrx4, + state_seeded); + + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_TREES) { + memcpy(sig, sigbufx4 + j * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); + memcpy(sig + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, + sigbufx4 + 4 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + j * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_HEIGHT, + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_HEIGHT); + sig += PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N * (1 + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_HEIGHT); + } + } + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_fors_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_compute_root(roots + i * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, leaf, indices[i], idx_offset, + sig, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_HEIGHT, pub_seed, fors_tree_addr, + state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/fors.h b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/fors.h new file mode 100644 index 00000000..e2cefc82 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_H +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/hash.h b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/hash.h new file mode 100644 index 00000000..857218a1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_HASH_H +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/hash_shake256.c b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/hash_shake256.c new file mode 100644 index 00000000..9fbd642d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/hash_shake256.c @@ -0,0 +1,106 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "fips202.h" + +/* For SHAKE256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, // NOLINT(readability-non-const-parameter) + const unsigned char *pub_seed, const unsigned char *sk_seed) { + (void)hash_state_seeded; /* Suppress an 'unused parameter' warning. */ + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* This is not necessary for SHAKE256, so we don't do anything */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_destroy_hash_function( + hash_state *hash_state_seeded) { // NOLINT(readability-non-const-parameter) + (void)hash_state_seeded; +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N bytes and an address + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, addr); + + shake256(out, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed and an + * optional randomization value as well as the message. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, sk_prf, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); + shake256_inc_absorb(&state, optrand, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(R, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, &state); + shake256_inc_ctx_release(&state); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_TREE_BITS (PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_TREE_HEIGHT * (PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_D - 1)) +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_TREE_BYTES ((PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_LEAF_BITS PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_LEAF_BYTES ((PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_DGST_BYTES (PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_TREE_BYTES + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_LEAF_BYTES) + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_DGST_BYTES]; + unsigned char *bufp = buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, R, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); + shake256_inc_absorb(&state, pk, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_PK_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_DGST_BYTES, &state); + shake256_inc_ctx_release(&state); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/hash_shake256x4.c b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/hash_shake256x4.c new file mode 100644 index 00000000..362d2c67 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/hash_shake256x4.c @@ -0,0 +1,38 @@ +#include +#include + +#include "address.h" +#include "fips202x4.h" +#include "hashx4.h" +#include "params.h" + +/* + * 4-way parallel version of prf_addr; takes 4x as much input and output + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_prf_addrx4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + const unsigned char *key, + const uint32_t addrx4[4 * 8], + const hash_state *state_seeded) { + unsigned char bufx4[4 * (PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES)]; + unsigned int j; + + for (j = 0; j < 4; j++) { + memcpy(bufx4 + j * (PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES), key, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_addr_to_bytes(bufx4 + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + j * (PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES), addrx4 + j * 8); + } + + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_shake256x4(out0, + out1, + out2, + out3, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, + bufx4 + 0 * (PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES), + bufx4 + 1 * (PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES), + bufx4 + 2 * (PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES), + bufx4 + 3 * (PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES), PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES); + + /* Avoid unused parameter warning */ + (void)state_seeded; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/hash_state.h b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/hash_state.h new file mode 100644 index 00000000..ab3b844d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/hash_state.h @@ -0,0 +1,30 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_HASH_STATE_H +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * SHAKE256 does not need this state. Because this implementation is generated + * from a shared code base, we still need to specify some hash_state as it is + * still passed around. We chose to use an `int` as a placeholder for this + * purpose. + */ + +typedef int hash_state; + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/hashx4.h b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/hashx4.h new file mode 100644 index 00000000..28df2e85 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/hashx4.h @@ -0,0 +1,16 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_HASHX4_H +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_HASHX4_H + +#include + +#include "hash_state.h" + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_prf_addrx4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + const unsigned char *key, + const uint32_t addrx4[4 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/params.h b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/params.h new file mode 100644 index 00000000..4ac29906 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_PARAMS_H +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N 32 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FULL_HEIGHT 64 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_D 8 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_HEIGHT 14 +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_TREES 22 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N / PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LEN (PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LEN1 + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_BYTES (PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_PK_BYTES PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_TREE_HEIGHT (PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FULL_HEIGHT / PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_BYTES ((PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_PK_BYTES PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_BYTES (PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_BYTES + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_D * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FULL_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_PK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_SK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/sign.c b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/sign.c new file mode 100644 index 00000000..05c99703 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/sign.c @@ -0,0 +1,409 @@ +#include +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_D - 1); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, sk + 3 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); + + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + + // guarantee alignment of pk + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES / 16]; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + + // guarantee alignment of sk + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_CRYPTO_SECRETKEYBYTES / 16]; + uint8_t sk[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_CRYPTO_SECRETKEYBYTES]; + } aligned_sk; + + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_CRYPTO_SEEDBYTES / 16]; + uint8_t seed[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_CRYPTO_SEEDBYTES]; + } aligned_seed; + randombytes(aligned_seed.seed, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_CRYPTO_SEEDBYTES); + + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_crypto_sign_seed_keypair( + aligned_pk.pk, aligned_sk.sk, aligned_seed.seed); + memcpy(pk, aligned_pk.pk, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES); + memcpy(sk, aligned_sk.sk, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_CRYPTO_SECRETKEYBYTES); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + // guarantee alignment of sk + union { + __m128 *_x; + uint8_t sk[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_CRYPTO_SECRETKEYBYTES]; + } aligned_sk; + memcpy(aligned_sk.sk, sk, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_CRYPTO_SECRETKEYBYTES); + sk = aligned_sk.sk; + + // guarantee alignment of sig + union { + __m128 *_x; + uint8_t sig[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_BYTES]; + } aligned_sig; + uint8_t *orig_sig = sig; + sig = (uint8_t *)aligned_sig.sig; + + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N; + + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_D; i++) { + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_TREE_HEIGHT; + } + + memcpy(orig_sig, aligned_sig.sig, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_BYTES); + *siglen = PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_BYTES; + + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + // guarantee alignment of pk + union { + __m128 *_x; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + memcpy(aligned_pk.pk, pk, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES); + pk = aligned_pk.pk; + + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_D; i++) { + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_TREE_HEIGHT; + } + + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + + // guarantee alignment of pk + union { + __m128 *_x; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + memcpy(aligned_pk.pk, pk, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_CRYPTO_PUBLICKEYBYTES); + pk = aligned_pk.pk; + + + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_BYTES; + + if (PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_BYTES, sm + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/thash.h b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/thash.h new file mode 100644 index 00000000..aad7074d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_THASH_H +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/thash_shake256_robust.c b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/thash_shake256_robust.c new file mode 100644 index 00000000..3961b5b5 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/thash_shake256_robust.c @@ -0,0 +1,81 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "fips202.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N bytes. + */ +static void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8]) { + + unsigned char *bitmask = buf + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES; + unsigned int i; + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, addr); + + shake256(bitmask, inblocks * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N; i++) { + buf[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES + i] = in[i] ^ bitmask[i]; + } + + shake256(out, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thash( + out, buf, in, 1, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thash( + out, buf, in, 2, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LEN, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_TREES, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/thash_shake256_robustx4.c b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/thash_shake256_robustx4.c new file mode 100644 index 00000000..1d0740c2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/thash_shake256_robustx4.c @@ -0,0 +1,66 @@ +#include +#include + +#include "thashx4.h" +#include "address.h" +#include "params.h" + +#include "fips202x4.h" + +/** + * 4-way parallel version of thash; takes 4x as much input and output + */ +#define thash_variant(name, inblocks) \ + void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thashx4_##name(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *pub_seed, uint32_t addrx4[4*8], \ + const hash_state *state_seeded) \ + { \ + unsigned char buf0[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N]; \ + unsigned char buf1[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N]; \ + unsigned char buf2[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N]; \ + unsigned char buf3[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N]; \ + unsigned char bitmask0[(inblocks) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N]; \ + unsigned char bitmask1[(inblocks) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N]; \ + unsigned char bitmask2[(inblocks) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N]; \ + unsigned char bitmask3[(inblocks) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N]; \ + unsigned int i; \ + \ + memcpy(buf0, pub_seed, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); \ + memcpy(buf1, pub_seed, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); \ + memcpy(buf2, pub_seed, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); \ + memcpy(buf3, pub_seed, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); \ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_addr_to_bytes(buf0 + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, addrx4 + 0*8); \ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_addr_to_bytes(buf1 + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, addrx4 + 1*8); \ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_addr_to_bytes(buf2 + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, addrx4 + 2*8); \ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_addr_to_bytes(buf3 + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, addrx4 + 3*8); \ + \ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_shake256x4(bitmask0, bitmask1, bitmask2, bitmask3, (inblocks) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, \ + buf0, buf1, buf2, buf3, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES); \ + \ + for (i = 0; i < (inblocks) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N; i++) { \ + buf0[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES + i] = in0[i] ^ bitmask0[i]; \ + buf1[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES + i] = in1[i] ^ bitmask1[i]; \ + buf2[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES + i] = in2[i] ^ bitmask2[i]; \ + buf3[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES + i] = in3[i] ^ bitmask3[i]; \ + } \ + \ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_shake256x4( \ + out0, out1, out2, out3, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, \ + buf0, buf1, buf2, buf3, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); \ + \ + /* avoid unused parameter warning */ \ + (void)state_seeded; \ + } + + +thash_variant(1, 1) +thash_variant(2, 2) +thash_variant(WOTS_LEN, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LEN) +thash_variant(FORS_TREES, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_TREES) diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/thashx4.h b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/thashx4.h new file mode 100644 index 00000000..1aecff8a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/thashx4.h @@ -0,0 +1,25 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_THASHX4_H +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_THASHX4_H + +#include + +#include "hash_state.h" + +#define thashx4_header(inblocks) \ + void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thashx4_##inblocks(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *pub_seed, uint32_t addrx4[4*8], \ + const hash_state *state_seeded) + +thashx4_header(1); +thashx4_header(2); +thashx4_header(WOTS_LEN); +thashx4_header(FORS_TREES); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/utils.c b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/utils.c new file mode 100644 index 00000000..b269a2e6 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, leaf, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, auth_path, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thash_2( + buffer + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); + } else { + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, auth_path, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/utils.h b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/utils.h new file mode 100644 index 00000000..aa1092a4 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_UTILS_H +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/utilsx4.c b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/utilsx4.c new file mode 100644 index 00000000..47f2d041 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/utilsx4.c @@ -0,0 +1,98 @@ +#include "address.h" +#include "params.h" +#include "thashx4.h" +#include "utils.h" +#include "utilsx4.h" + +#include + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +#define treehashx4_variant(name, tree_height) \ + void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_treehashx4_##name( \ + unsigned char *rootx4, unsigned char *auth_pathx4, const unsigned char *sk_seed, \ + const unsigned char *pub_seed, const uint32_t leaf_idx[4], uint32_t idx_offset[4], \ + void (*gen_leafx4)(unsigned char * /* leaf0 */, unsigned char * /* leaf1 */, \ + unsigned char * /* leaf2 */, unsigned char * /* leaf3 */, \ + const unsigned char * /* sk_seed */, \ + const unsigned char * /* pub_seed */, uint32_t /* addr_idx0 */, \ + uint32_t /* addr_idx1 */, uint32_t /* addr_idx2 */, \ + uint32_t /* addr_idx3 */, const uint32_t[8] /* tree_addr */, \ + const hash_state * /* state_seeded */), \ + uint32_t tree_addrx4[4 * 8], const hash_state *state_seeded) { \ + unsigned char stackx4[4 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N]; \ + unsigned int heights[(tree_height) + 1]; \ + unsigned int offset = 0; \ + uint32_t idx; \ + uint32_t tree_idx; \ + unsigned int j; \ + \ + for (idx = 0; idx < (uint32_t)(1 << (tree_height)); idx++) { \ + /* Add the next leaf node to the stack. */ \ + gen_leafx4(stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, sk_seed, \ + pub_seed, idx + idx_offset[0], idx + idx_offset[1], idx + idx_offset[2], \ + idx + idx_offset[3], tree_addrx4, state_seeded); \ + offset++; \ + heights[offset - 1] = 0; \ + \ + /* If this is a node we need for the auth path.. */ \ + for (j = 0; j < 4; j++) { \ + if ((leaf_idx[j] ^ 0x1) == idx) { \ + memcpy(auth_pathx4 + j * (tree_height)*PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, \ + stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + (offset - 1) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, \ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); \ + } \ + } \ + \ + /* While the top-most nodes are of equal height.. */ \ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { \ + /* Compute index of the new node, in the next layer. */ \ + tree_idx = (idx >> (heights[offset - 1] + 1)); \ + \ + /* Set the address of the node we're creating. */ \ + for (j = 0; j < 4; j++) { \ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_tree_height(tree_addrx4 + j * 8, heights[offset - 1] + 1); \ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_tree_index(tree_addrx4 + j * 8, \ + tree_idx + (idx_offset[j] >> (heights[offset - 1] + 1))); \ + } \ + /* Hash the top-most nodes from the stack together. */ \ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thashx4_2(stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, \ + stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, \ + pub_seed, tree_addrx4, state_seeded); \ + offset--; \ + /* Note that the top-most node is now one layer higher. */ \ + heights[offset - 1]++; \ + \ + /* If this is a node we need for the auth path.. */ \ + for (j = 0; j < 4; j++) { \ + if (((leaf_idx[j] >> heights[offset - 1]) ^ 0x1) == tree_idx) { \ + memcpy(auth_pathx4 + j * (tree_height)*PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + \ + heights[offset - 1] * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, \ + stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N + (offset - 1) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, \ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); \ + } \ + } \ + } \ + } \ + \ + for (j = 0; j < 4; j++) { \ + memcpy(rootx4 + j * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); \ + } \ + } + +treehashx4_variant(FORS_HEIGHT, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_FORS_HEIGHT) diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/utilsx4.h b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/utilsx4.h new file mode 100644 index 00000000..d53c5939 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/utilsx4.h @@ -0,0 +1,38 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_UTILSX4_H +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_UTILSX4_H + +#include "hash_state.h" +#include "params.h" + +#include + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_treehashx4_FORS_HEIGHT(unsigned char *rootx4, + unsigned char *auth_pathx4, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + const uint32_t leaf_idx[4], + uint32_t idx_offset[4], + void (*gen_leafx4)(unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx4[4 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/wots.c b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/wots.c new file mode 100644 index 00000000..00970a0a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/wots.c @@ -0,0 +1,240 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "hashx4.h" +#include "params.h" +#include "thash.h" +#include "thashx4.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], const hash_state *state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_prf_addr(sk, sk_seed, wots_addr, state_seeded); +} + +/** + * 4-way parallel version of wots_gen_sk; expects 4x as much space in sk + */ +static void wots_gen_skx4(unsigned char *skx4, const unsigned char *sk_seed, + uint32_t wots_addrx4[4 * 8], const hash_state *state_seeded) { + unsigned int j; + + /* Make sure that the hash address is actually zeroed. */ + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_hash_addr(wots_addrx4 + j * 8, 0); + } + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_prf_addrx4(skx4 + 0 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, + skx4 + 1 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, + skx4 + 2 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, + skx4 + 3 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, + sk_seed, wots_addrx4, + state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_W; i++) { + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thash_1(out, out, pub_seed, addr, state_seeded); + } +} + +/** + * 4-way parallel version of gen_chain; expects 4x as much space in out, and + * 4x as much space in inx4. Assumes start and step identical across chains. + */ +static void gen_chainx4(unsigned char *outx4, const unsigned char *inx4, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addrx4[4 * 8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + /* Initialize outx4 with the value at position 'start'. */ + memcpy(outx4, inx4, 4 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_W; i++) { + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_hash_addr(addrx4 + j * 8, i); + } + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_thashx4_1(outx4 + 0 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, + outx4 + 1 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, + outx4 + 2 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, + outx4 + 3 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, + outx4 + 0 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, + outx4 + 1 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, + outx4 + 2 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, + outx4 + 3 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, + pub_seed, addrx4, + state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { + int in = 0; + int out = 0; + unsigned char total = 0; + int bits = 0; + int consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LOGW; + output[out] = (unsigned int)(total >> bits) & (PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_W - 1); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_wots_gen_pk(unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + uint32_t addrx4[4 * 8]; + unsigned char pkbuf[4 * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N]; + + for (j = 0; j < 4; j++) { + memcpy(addrx4 + j * 8, addr, sizeof(uint32_t) * 8); + } + + /* The last iteration typically does not have complete set of 4 chains, + but because we use pkbuf, this is not an issue -- we still do as many + in parallel as possible. */ + for (i = 0; i < ((PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LEN + 3) & ~0x3); i += 4) { + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_chain_addr(addrx4 + j * 8, i + j); + } + wots_gen_skx4(pkbuf, sk_seed, addrx4, state_seeded); + gen_chainx4(pkbuf, pkbuf, 0, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_W - 1, pub_seed, addrx4, state_seeded); + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LEN) { + memcpy(pk + (i + j)*PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, pkbuf + j * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N); + } + } + } + + // Get rid of unused argument variable. + (void)state_seeded; +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_wots_sign(unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, sk_seed, addr, state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, sig + i * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, 0, lengths[i], pub_seed, addr, state_seeded); + } + + // avoid unused argument + (void)state_seeded; +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_wots_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, sig + i * PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_N, + lengths[i], PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_W - 1 - lengths[i], pub_seed, addr, + state_seeded); + } + + // avoid unused argument + (void)state_seeded; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/wots.h b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/wots.h new file mode 100644 index 00000000..b2e4d8d5 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/avx2/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_H +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_AVX2_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/LICENSE b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..d82657dc --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-shake256-256s-robust_clean.lib +OBJECTS=address.obj wots.obj utils.obj fors.obj sign.obj hash_shake256.obj thash_shake256_robust.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/address.c b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/address.c new file mode 100644 index 00000000..87b3586c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/address.h b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/address.h new file mode 100644 index 00000000..349c31a5 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDRESS_H +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/api.h b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/api.h new file mode 100644 index 00000000..5bd36e84 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_API_H +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_CRYPTO_SECRETKEYBYTES 128 +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_CRYPTO_PUBLICKEYBYTES 64 +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_CRYPTO_BYTES 29792 +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_CRYPTO_SEEDBYTES 96 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/fors.c b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/fors.c new file mode 100644 index 00000000..7419a31c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/fors.c @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "thash.h" +#include "utils.h" + +static void fors_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t fors_leaf_addr[8], const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_prf_addr( + sk, sk_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_thash_1( + leaf, sk, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t fors_tree_addr[8], + const hash_state *hash_state_seeded) { + uint32_t fors_leaf_addr[8] = {0}; + + /* Only copy the parts that must be kept in fors_leaf_addr. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_copy_keypair_addr( + fors_leaf_addr, fors_tree_addr); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_type( + fors_leaf_addr, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_tree_index( + fors_leaf_addr, addr_idx); + + fors_gen_sk(leaf, sk_seed, fors_leaf_addr, hash_state_seeded); + fors_sk_to_leaf(leaf, leaf, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_copy_keypair_addr( + fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_copy_keypair_addr( + fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_type( + fors_tree_addr, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_type( + fors_pk_addr, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_tree_height( + fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_tree_index( + fors_tree_addr, indices[i] + idx_offset); + + /* Include the secret key part that produces the selected leaf node. */ + fors_gen_sk(sig, sk_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N; + + /* Compute the authentication path for this leaf node. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_treehash_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, sig, sk_seed, pub_seed, + indices[i], idx_offset, fors_gen_leaf, fors_tree_addr, + hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_thash_FORS_TREES( + pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_compute_root(roots + i * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, leaf, indices[i], idx_offset, sig, + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_HEIGHT, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/fors.h b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/fors.h new file mode 100644 index 00000000..b685b1e2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_H +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/hash.h b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/hash.h new file mode 100644 index 00000000..cf08f308 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_HASH_H +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/hash_shake256.c b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/hash_shake256.c new file mode 100644 index 00000000..ebf85a81 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/hash_shake256.c @@ -0,0 +1,106 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "fips202.h" + +/* For SHAKE256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, // NOLINT(readability-non-const-parameter) + const unsigned char *pub_seed, const unsigned char *sk_seed) { + (void)hash_state_seeded; /* Suppress an 'unused parameter' warning. */ + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* This is not necessary for SHAKE256, so we don't do anything */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_destroy_hash_function( + hash_state *hash_state_seeded) { // NOLINT(readability-non-const-parameter) + (void)hash_state_seeded; +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N bytes and an address + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, addr); + + shake256(out, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_BYTES); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed and an + * optional randomization value as well as the message. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, sk_prf, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N); + shake256_inc_absorb(&state, optrand, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(R, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, &state); + shake256_inc_ctx_release(&state); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_TREE_BITS (PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_TREE_HEIGHT * (PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_D - 1)) +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_TREE_BYTES ((PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_LEAF_BITS PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_LEAF_BYTES ((PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_DGST_BYTES (PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_TREE_BYTES + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_LEAF_BYTES) + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_DGST_BYTES]; + unsigned char *bufp = buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, R, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N); + shake256_inc_absorb(&state, pk, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_PK_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_DGST_BYTES, &state); + shake256_inc_ctx_release(&state); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/hash_state.h b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/hash_state.h new file mode 100644 index 00000000..7d92ef87 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/hash_state.h @@ -0,0 +1,30 @@ +#ifndef SPX_HASH_STATE_H +#define SPX_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * SHAKE256 does not need this state. Because this implementation is generated + * from a shared code base, we still need to specify some hash_state as it is + * still passed around. We chose to use an `int` as a placeholder for this + * purpose. + */ + +#define hash_state int + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/params.h b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/params.h new file mode 100644 index 00000000..d9beed9b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_PARAMS_H +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N 32 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FULL_HEIGHT 64 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_D 8 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_HEIGHT 14 +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_TREES 22 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N / PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_LEN (PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_LEN1 + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_BYTES (PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_PK_BYTES PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_TREE_HEIGHT (PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FULL_HEIGHT / PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_BYTES ((PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_PK_BYTES PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_BYTES (PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_BYTES + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_D * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FULL_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_PK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_SK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/sign.c b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/sign.c new file mode 100644 index 00000000..0492fd49 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_D - 1); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, sk + 3 * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N); + + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N; + + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_BYTES; + + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_BYTES; + + if (PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_BYTES, sm + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/thash.h b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/thash.h new file mode 100644 index 00000000..b68830c1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_THASH_H +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/thash_shake256_robust.c b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/thash_shake256_robust.c new file mode 100644 index 00000000..bdc8d5ad --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/thash_shake256_robust.c @@ -0,0 +1,81 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thash.h" + +#include "fips202.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N bytes. + */ +static void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8]) { + + unsigned char *bitmask = buf + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_BYTES; + unsigned int i; + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, addr); + + shake256(bitmask, inblocks * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_BYTES); + + for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N; i++) { + buf[PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_BYTES + i] = in[i] ^ bitmask[i]; + } + + shake256(out, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_thash( + out, buf, in, 1, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_thash( + out, buf, in, 2, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_LEN, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_TREES, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/utils.c b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/utils.c new file mode 100644 index 00000000..cf6a9842 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, leaf, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_thash_2( + buffer + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N); + } else { + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/utils.h b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/utils.h new file mode 100644 index 00000000..c5f52bef --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_UTILS_H +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/wots.c b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/wots.c new file mode 100644 index 00000000..03d8e62f --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/wots.c @@ -0,0 +1,167 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], + const hash_state *hash_state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_prf_addr(sk, sk_seed, wots_addr, hash_state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_W; i++) { + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_thash_1( + out, out, pub_seed, addr, hash_state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const size_t out_len, + const unsigned char *input) { + size_t in = 0; + size_t out = 0; + unsigned char total = 0; + unsigned int bits = 0; + size_t consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_LOGW; + output[out] = (unsigned int)((total >> bits) & (PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_W - 1)); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, + const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_ull_to_bytes( + csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(pk + i * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, pk + i * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, + 0, PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_W - 1, pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, 0, lengths[i], pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_N, + lengths[i], PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_W - 1 - lengths[i], pub_seed, addr, + hash_state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/wots.h b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/wots.h new file mode 100644 index 00000000..83328800 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-robust/clean/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_H +#define PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256SROBUST_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/META.yml b/crypto_sign/sphincs/sphincs-shake256-256s-simple/META.yml new file mode 100644 index 00000000..6dd43370 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/META.yml @@ -0,0 +1,35 @@ +name: SPHINCS+ +type: signature +claimed-nist-level: 5 +length-public-key: 64 +length-secret-key: 128 +length-signature: 29792 +testvectors-sha256: fc518be7778d0363f17a30c50efbe28841f5a795e7375e94d206f115967f30df +nistkat-sha256: f704deaf990987c306082bb28258cfb8c6f03b49940c06df582ef3fb86958e8a +principal-submitters: + - Andreas Hülsing +auxiliary-submitters: + - Jean-Philippe Aumasson + - Daniel J. Bernstein, + - Christoph Dobraunig + - Maria Eichlseder + - Scott Fluhrer + - Stefan-Lukas Gazdag + - Panos Kampanakis + - Stefan Kölbl + - Tanja Lange + - Martin M. Lauridsen + - Florian Mendel + - Ruben Niederhagen + - Christian Rechberger + - Joost Rijneveld + - Peter Schwabe +implementations: + - name: clean + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + - name: avx2 + version: https://github.com/sphincs/sphincsplus/commit/77755c94d0bc744478044d6efbb888dc13156441 + supported_platforms: + - architecture: x86_64 + required_flags: + - avx2 diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/LICENSE b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/Makefile.Microsoft_nmake new file mode 100644 index 00000000..1aeed608 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/Makefile.Microsoft_nmake @@ -0,0 +1,27 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-shake256-256s-simple_avx2.lib +OBJECTS=address.obj wots.obj utils.obj utilsx4.obj fips202x4.obj fors.obj sign.obj hash_shake256.obj thash_shake256_simple.obj hash_shake256x4.obj thash_shake256_simplex4.obj + +KECCAK4XDIR=..\..\..\common\keccak4x +KECCAK4XOBJ=KeccakP-1600-times4-SIMD256.obj +KECCAK4X=$(KECCAK4XDIR)\$(KECCAK4XOBJ) + +CFLAGS=/nologo /arch:AVX2 /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) $(KECCAK4X) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +$(KECCAK4X): + cd $(KECCAK4XDIR) && $(MAKE) /f Makefile.Microsoft_nmake $(KECCAK4XOBJ) + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) + -DEL $(KECCAK4X) diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/address.c b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/address.c new file mode 100644 index 00000000..74b2ad54 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/address.h b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/address.h new file mode 100644 index 00000000..30057aab --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDRESS_H +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/api.h b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/api.h new file mode 100644 index 00000000..f478c5ab --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_API_H +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES 128 +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES 64 +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_CRYPTO_BYTES 29792 +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_CRYPTO_SEEDBYTES 96 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/fips202x4.c b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/fips202x4.c new file mode 100644 index 00000000..18061a79 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/fips202x4.c @@ -0,0 +1,205 @@ +#include +#include +#include + +#include "fips202.h" +#include "fips202x4.h" + +#define NROUNDS 24 +#define ROL(a, offset) (((a) << (offset)) ^ ((a) >> (64-(offset)))) + +static uint64_t load64(const unsigned char *x) { + unsigned long long r = 0, i; + + for (i = 0; i < 8; ++i) { + r |= (unsigned long long)x[i] << 8 * i; + } + return r; +} + +static void store64(uint8_t *x, uint64_t u) { + unsigned int i; + + for (i = 0; i < 8; ++i) { + x[i] = (uint8_t)u; + u >>= 8; + } +} + +/* Use implementation from the Keccak Code Package */ +extern void KeccakP1600times4_PermuteAll_24rounds(__m256i *s); +#define KeccakF1600_StatePermute4x KeccakP1600times4_PermuteAll_24rounds + +static void keccak_absorb4x(__m256i *s, + unsigned int r, + const unsigned char *m0, + const unsigned char *m1, + const unsigned char *m2, + const unsigned char *m3, + size_t mlen, + unsigned char p) { + unsigned char t0[200] = {0}; + unsigned char t1[200] = {0}; + unsigned char t2[200] = {0}; + unsigned char t3[200] = {0}; + + unsigned long long *ss = (unsigned long long *)s; + + + while (mlen >= r) { + for (size_t i = 0; i < r / 8; ++i) { + ss[4 * i + 0] ^= load64(m0 + 8 * i); + ss[4 * i + 1] ^= load64(m1 + 8 * i); + ss[4 * i + 2] ^= load64(m2 + 8 * i); + ss[4 * i + 3] ^= load64(m3 + 8 * i); + } + + KeccakF1600_StatePermute4x(s); + mlen -= r; + m0 += r; + m1 += r; + m2 += r; + m3 += r; + } + + memcpy(t0, m0, mlen); + memcpy(t1, m1, mlen); + memcpy(t2, m2, mlen); + memcpy(t3, m3, mlen); + + t0[mlen] = p; + t1[mlen] = p; + t2[mlen] = p; + t3[mlen] = p; + + t0[r - 1] |= 128; + t1[r - 1] |= 128; + t2[r - 1] |= 128; + t3[r - 1] |= 128; + + for (size_t i = 0; i < r / 8; ++i) { + ss[4 * i + 0] ^= load64(t0 + 8 * i); + ss[4 * i + 1] ^= load64(t1 + 8 * i); + ss[4 * i + 2] ^= load64(t2 + 8 * i); + ss[4 * i + 3] ^= load64(t3 + 8 * i); + } +} + + +static void keccak_squeezeblocks4x(unsigned char *h0, + unsigned char *h1, + unsigned char *h2, + unsigned char *h3, + unsigned long long int nblocks, + __m256i *s, + unsigned int r) { + unsigned int i; + + unsigned long long *ss = (unsigned long long *)s; + + while (nblocks > 0) { + KeccakF1600_StatePermute4x(s); + for (i = 0; i < (r >> 3); i++) { + store64(h0 + 8 * i, ss[4 * i + 0]); + store64(h1 + 8 * i, ss[4 * i + 1]); + store64(h2 + 8 * i, ss[4 * i + 2]); + store64(h3 + 8 * i, ss[4 * i + 3]); + } + h0 += r; + h1 += r; + h2 += r; + h3 += r; + nblocks--; + } +} + + + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_shake128x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen) { + __m256i s[25]; + unsigned char t0[SHAKE128_RATE]; + unsigned char t1[SHAKE128_RATE]; + unsigned char t2[SHAKE128_RATE]; + unsigned char t3[SHAKE128_RATE]; + unsigned int i; + + /* zero state */ + for (i = 0; i < 25; i++) { + s[i] = _mm256_xor_si256(s[i], s[i]); + } + + /* absorb 4 message of identical length in parallel */ + keccak_absorb4x(s, SHAKE128_RATE, in0, in1, in2, in3, (size_t)inlen, 0x1F); + + /* Squeeze output */ + keccak_squeezeblocks4x(out0, out1, out2, out3, outlen / SHAKE128_RATE, s, SHAKE128_RATE); + + out0 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out1 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out2 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + out3 += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + + if (outlen % SHAKE128_RATE) { + keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE128_RATE); + for (i = 0; i < outlen % SHAKE128_RATE; i++) { + out0[i] = t0[i]; + out1[i] = t1[i]; + out2[i] = t2[i]; + out3[i] = t3[i]; + } + } +} + + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_shake256x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen) { + __m256i s[25]; + unsigned char t0[SHAKE256_RATE]; + unsigned char t1[SHAKE256_RATE]; + unsigned char t2[SHAKE256_RATE]; + unsigned char t3[SHAKE256_RATE]; + unsigned int i; + + /* zero state */ + for (i = 0; i < 25; i++) { + s[i] = _mm256_xor_si256(s[i], s[i]); + } + + /* absorb 4 message of identical length in parallel */ + keccak_absorb4x(s, SHAKE256_RATE, in0, in1, in2, in3, (size_t)inlen, 0x1F); + + /* Squeeze output */ + keccak_squeezeblocks4x(out0, out1, out2, out3, outlen / SHAKE256_RATE, s, SHAKE256_RATE); + + out0 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out1 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out2 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + out3 += (outlen / SHAKE256_RATE) * SHAKE256_RATE; + + if (outlen % SHAKE256_RATE) { + keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE256_RATE); + for (i = 0; i < outlen % SHAKE256_RATE; i++) { + out0[i] = t0[i]; + out1[i] = t1[i]; + out2[i] = t2[i]; + out3[i] = t3[i]; + } + } +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/fips202x4.h b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/fips202x4.h new file mode 100644 index 00000000..ebcd58d2 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/fips202x4.h @@ -0,0 +1,27 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FIPS202X4_H +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FIPS202X4_H + +#include + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_shake128x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, unsigned long long inlen); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_shake256x4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + unsigned long long outlen, + unsigned char *in0, + unsigned char *in1, + unsigned char *in2, + unsigned char *in3, + unsigned long long inlen); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/fors.c b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/fors.c new file mode 100644 index 00000000..a264e02c --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/fors.c @@ -0,0 +1,206 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hashx4.h" +#include "thash.h" +#include "thashx4.h" +#include "utils.h" +#include "utilsx4.h" + +static void fors_gen_skx4(unsigned char *sk0, + unsigned char *sk1, + unsigned char *sk2, + unsigned char *sk3, const unsigned char *sk_seed, + uint32_t fors_leaf_addrx4[4 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_prf_addrx4(sk0, sk1, sk2, sk3, sk_seed, fors_leaf_addrx4, state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thash_1(leaf, sk, pub_seed, fors_leaf_addr, state_seeded); +} + +static void fors_sk_to_leafx4(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + const unsigned char *sk0, + const unsigned char *sk1, + const unsigned char *sk2, + const unsigned char *sk3, + const unsigned char *pub_seed, + uint32_t fors_leaf_addrx4[4 * 8], + const hash_state *state_seeded) { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thashx4_1(leaf0, leaf1, leaf2, leaf3, + sk0, sk1, sk2, sk3, pub_seed, fors_leaf_addrx4, state_seeded); +} + +static void fors_gen_leafx4(unsigned char *leaf0, + unsigned char *leaf1, + unsigned char *leaf2, + unsigned char *leaf3, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx0, + uint32_t addr_idx1, + uint32_t addr_idx2, + uint32_t addr_idx3, + const uint32_t fors_tree_addr[8], + const hash_state *state_seeded) { + uint32_t fors_leaf_addrx4[4 * 8] = {0}; + unsigned int j; + + /* Only copy the parts that must be kept in fors_leaf_addrx4. */ + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_copy_keypair_addr(fors_leaf_addrx4 + j * 8, fors_tree_addr); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_type(fors_leaf_addrx4 + j * 8, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx4 + 0 * 8, addr_idx0); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx4 + 1 * 8, addr_idx1); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx4 + 2 * 8, addr_idx2); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_tree_index(fors_leaf_addrx4 + 3 * 8, addr_idx3); + + fors_gen_skx4(leaf0, leaf1, leaf2, leaf3, sk_seed, fors_leaf_addrx4, state_seeded); + fors_sk_to_leafx4(leaf0, leaf1, leaf2, leaf3, + leaf0, leaf1, leaf2, leaf3, pub_seed, fors_leaf_addrx4, state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_fors_sign(unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *state_seeded) { + /* Round up to multiple of 4 to prevent out-of-bounds for x4 parallelism */ + uint32_t indices[(PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_TREES + 3) & ~3] = {0}; + unsigned char roots[((PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_TREES + 3) & ~3) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N]; + /* Sign to a buffer, since we may not have a nice multiple of 4 and would + otherwise overrun the signature. */ + unsigned char sigbufx4[4 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N * (1 + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_HEIGHT)]; + uint32_t fors_tree_addrx4[4 * 8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset[4] = {0}; + unsigned int i, j; + + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_copy_keypair_addr(fors_tree_addrx4 + j * 8, fors_addr); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_type(fors_tree_addrx4 + j * 8, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + } + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < ((PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_TREES + 3) & ~0x3); i += 4) { + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_TREES) { + idx_offset[j] = (i + j) * (1 << PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_tree_height(fors_tree_addrx4 + j * 8, 0); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_tree_index(fors_tree_addrx4 + j * 8, + indices[i + j] + idx_offset[j]); + } + } + + /* Include the secret key part that produces the selected leaf nodes. */ + fors_gen_skx4(sigbufx4 + 0 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, + sigbufx4 + 1 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, + sigbufx4 + 2 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, + sigbufx4 + 3 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, + sk_seed, fors_tree_addrx4, state_seeded); + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_treehashx4_FORS_HEIGHT(roots + i * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, sigbufx4 + 4 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, sk_seed, pub_seed, + &indices[i], idx_offset, fors_gen_leafx4, fors_tree_addrx4, + state_seeded); + + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_TREES) { + memcpy(sig, sigbufx4 + j * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); + memcpy(sig + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, + sigbufx4 + 4 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + j * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_HEIGHT, + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_HEIGHT); + sig += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N * (1 + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_HEIGHT); + } + } + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_fors_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, + const uint32_t fors_addr[8], + const hash_state *state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_compute_root(roots + i * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, leaf, indices[i], idx_offset, + sig, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_HEIGHT, pub_seed, fors_tree_addr, + state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/fors.h b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/fors.h new file mode 100644 index 00000000..a9685320 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_H +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/hash.h b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/hash.h new file mode 100644 index 00000000..0b96519a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_HASH_H +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/hash_shake256.c b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/hash_shake256.c new file mode 100644 index 00000000..23f34d94 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/hash_shake256.c @@ -0,0 +1,106 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "fips202.h" + +/* For SHAKE256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_initialize_hash_function( + hash_state *hash_state_seeded, // NOLINT(readability-non-const-parameter) + const unsigned char *pub_seed, const unsigned char *sk_seed) { + (void)hash_state_seeded; /* Suppress an 'unused parameter' warning. */ + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* This is not necessary for SHAKE256, so we don't do anything */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_destroy_hash_function( + hash_state *hash_state_seeded) { // NOLINT(readability-non-const-parameter) + (void)hash_state_seeded; +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N bytes and an address + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, addr); + + shake256(out, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_BYTES); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed and an + * optional randomization value as well as the message. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, sk_prf, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); + shake256_inc_absorb(&state, optrand, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(R, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, &state); + shake256_inc_ctx_release(&state); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_TREE_BITS (PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_TREE_HEIGHT * (PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_D - 1)) +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_TREE_BYTES ((PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_LEAF_BITS PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_LEAF_BYTES ((PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_DGST_BYTES (PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_TREE_BYTES + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_LEAF_BYTES) + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_DGST_BYTES]; + unsigned char *bufp = buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, R, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); + shake256_inc_absorb(&state, pk, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_PK_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_DGST_BYTES, &state); + shake256_inc_ctx_release(&state); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/hash_shake256x4.c b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/hash_shake256x4.c new file mode 100644 index 00000000..a4c36572 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/hash_shake256x4.c @@ -0,0 +1,38 @@ +#include +#include + +#include "address.h" +#include "fips202x4.h" +#include "hashx4.h" +#include "params.h" + +/* + * 4-way parallel version of prf_addr; takes 4x as much input and output + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_prf_addrx4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + const unsigned char *key, + const uint32_t addrx4[4 * 8], + const hash_state *state_seeded) { + unsigned char bufx4[4 * (PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_BYTES)]; + unsigned int j; + + for (j = 0; j < 4; j++) { + memcpy(bufx4 + j * (PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_BYTES), key, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_addr_to_bytes(bufx4 + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + j * (PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_BYTES), addrx4 + j * 8); + } + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_shake256x4(out0, + out1, + out2, + out3, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, + bufx4 + 0 * (PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_BYTES), + bufx4 + 1 * (PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_BYTES), + bufx4 + 2 * (PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_BYTES), + bufx4 + 3 * (PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_BYTES), PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_BYTES); + + /* Avoid unused parameter warning */ + (void)state_seeded; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/hash_state.h b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/hash_state.h new file mode 100644 index 00000000..934fd3b3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/hash_state.h @@ -0,0 +1,30 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_HASH_STATE_H +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * SHAKE256 does not need this state. Because this implementation is generated + * from a shared code base, we still need to specify some hash_state as it is + * still passed around. We chose to use an `int` as a placeholder for this + * purpose. + */ + +typedef int hash_state; + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/hashx4.h b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/hashx4.h new file mode 100644 index 00000000..f2bbfe30 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/hashx4.h @@ -0,0 +1,16 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_HASHX4_H +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_HASHX4_H + +#include + +#include "hash_state.h" + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_prf_addrx4(unsigned char *out0, + unsigned char *out1, + unsigned char *out2, + unsigned char *out3, + const unsigned char *key, + const uint32_t addrx4[4 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/params.h b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/params.h new file mode 100644 index 00000000..992d0431 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_PARAMS_H +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N 32 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FULL_HEIGHT 64 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_D 8 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_HEIGHT 14 +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_TREES 22 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N / PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LEN (PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LEN1 + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_BYTES (PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_PK_BYTES PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_TREE_HEIGHT (PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FULL_HEIGHT / PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_BYTES ((PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_PK_BYTES PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_BYTES (PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_BYTES + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_D * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FULL_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_PK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N) +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_SK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/sign.c b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/sign.c new file mode 100644 index 00000000..d7b43ab0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/sign.c @@ -0,0 +1,409 @@ +#include +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_D - 1); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, sk + 3 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); + + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + + // guarantee alignment of pk + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES / 16]; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + + // guarantee alignment of sk + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES / 16]; + uint8_t sk[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES]; + } aligned_sk; + + union { + __m128 _x[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_CRYPTO_SEEDBYTES / 16]; + uint8_t seed[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_CRYPTO_SEEDBYTES]; + } aligned_seed; + randombytes(aligned_seed.seed, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_CRYPTO_SEEDBYTES); + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_crypto_sign_seed_keypair( + aligned_pk.pk, aligned_sk.sk, aligned_seed.seed); + memcpy(pk, aligned_pk.pk, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES); + memcpy(sk, aligned_sk.sk, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + // guarantee alignment of sk + union { + __m128 *_x; + uint8_t sk[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES]; + } aligned_sk; + memcpy(aligned_sk.sk, sk, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_CRYPTO_SECRETKEYBYTES); + sk = aligned_sk.sk; + + // guarantee alignment of sig + union { + __m128 *_x; + uint8_t sig[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_BYTES]; + } aligned_sig; + uint8_t *orig_sig = sig; + sig = (uint8_t *)aligned_sig.sig; + + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N; + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_D; i++) { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_TREE_HEIGHT; + } + + memcpy(orig_sig, aligned_sig.sig, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_BYTES); + *siglen = PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_BYTES; + + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + // guarantee alignment of pk + union { + __m128 *_x; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + memcpy(aligned_pk.pk, pk, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES); + pk = aligned_pk.pk; + + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_D; i++) { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_TREE_HEIGHT; + } + + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + + // guarantee alignment of pk + union { + __m128 *_x; + uint8_t pk[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES]; + } aligned_pk; + memcpy(aligned_pk.pk, pk, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_CRYPTO_PUBLICKEYBYTES); + pk = aligned_pk.pk; + + + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_BYTES; + + if (PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_BYTES, sm + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/thash.h b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/thash.h new file mode 100644 index 00000000..c3782cb9 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_THASH_H +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/thash_shake256_simple.c b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/thash_shake256_simple.c new file mode 100644 index 00000000..632aa39e --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/thash_shake256_simple.c @@ -0,0 +1,74 @@ +#include +#include + +#include "address.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" + +#include "fips202.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N bytes. + */ +static void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8]) { + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, addr); + memcpy(buf + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_BYTES, in, inblocks * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); + + shake256(out, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, buf, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thash( + out, buf, in, 1, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thash( + out, buf, in, 2, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LEN, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N]; + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_TREES, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/thash_shake256_simplex4.c b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/thash_shake256_simplex4.c new file mode 100644 index 00000000..6cc6705d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/thash_shake256_simplex4.c @@ -0,0 +1,47 @@ +#include +#include + +#include "address.h" +#include "params.h" +#include "thashx4.h" + +#include "fips202x4.h" + +/** + * 4-way parallel version of thash; takes 4x as much input and output + */ +#define thashx4_variant(name, inblocks) \ + void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thashx4_##name( \ + unsigned char *out0, unsigned char *out1, unsigned char *out2, unsigned char *out3, \ + const unsigned char *in0, const unsigned char *in1, const unsigned char *in2, \ + const unsigned char *in3, const unsigned char *pub_seed, uint32_t addrx4[4 * 8], \ + const hash_state *state_seeded) { \ + unsigned char buf0[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N]; \ + unsigned char buf1[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N]; \ + unsigned char buf2[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N]; \ + unsigned char buf3[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N]; \ + \ + memcpy(buf0, pub_seed, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); \ + memcpy(buf1, pub_seed, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); \ + memcpy(buf2, pub_seed, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); \ + memcpy(buf3, pub_seed, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); \ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_addr_to_bytes(buf0 + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, addrx4 + 0 * 8); \ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_addr_to_bytes(buf1 + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, addrx4 + 1 * 8); \ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_addr_to_bytes(buf2 + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, addrx4 + 2 * 8); \ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_addr_to_bytes(buf3 + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, addrx4 + 3 * 8); \ + memcpy(buf0 + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_BYTES, in0, (inblocks)*PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); \ + memcpy(buf1 + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_BYTES, in1, (inblocks)*PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); \ + memcpy(buf2 + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_BYTES, in2, (inblocks)*PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); \ + memcpy(buf3 + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_BYTES, in3, (inblocks)*PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); \ + \ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_shake256x4(out0, out1, out2, out3, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, buf0, buf1, buf2, buf3, \ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); \ + \ + /* Avoid unused parameter warning */ \ + (void)state_seeded; \ + } + +thashx4_variant(1, 1) +thashx4_variant(2, 2) +thashx4_variant(WOTS_LEN, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LEN) +thashx4_variant(FORS_TREES, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_TREES) diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/thashx4.h b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/thashx4.h new file mode 100644 index 00000000..51f197c1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/thashx4.h @@ -0,0 +1,25 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_THASHX4_H +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_THASHX4_H + +#include + +#include "hash_state.h" + +#define thashx4_header(inblocks) \ + void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thashx4_##inblocks(unsigned char *out0, \ + unsigned char *out1, \ + unsigned char *out2, \ + unsigned char *out3, \ + const unsigned char *in0, \ + const unsigned char *in1, \ + const unsigned char *in2, \ + const unsigned char *in3, \ + const unsigned char *pub_seed, uint32_t addrx4[4*8], \ + const hash_state *state_seeded) + +thashx4_header(1); +thashx4_header(2); +thashx4_header(WOTS_LEN); +thashx4_header(FORS_TREES); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/utils.c b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/utils.c new file mode 100644 index 00000000..9880740d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, leaf, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, auth_path, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thash_2( + buffer + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); + } else { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, auth_path, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/utils.h b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/utils.h new file mode 100644 index 00000000..755ad4a7 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_UTILS_H +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/utilsx4.c b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/utilsx4.c new file mode 100644 index 00000000..f3dd41e1 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/utilsx4.c @@ -0,0 +1,98 @@ +#include "address.h" +#include "params.h" +#include "thashx4.h" +#include "utils.h" +#include "utilsx4.h" + +#include + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +#define treehashx4_variant(name, tree_height) \ + void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_treehashx4_##name( \ + unsigned char *rootx4, unsigned char *auth_pathx4, const unsigned char *sk_seed, \ + const unsigned char *pub_seed, const uint32_t leaf_idx[4], uint32_t idx_offset[4], \ + void (*gen_leafx4)(unsigned char * /* leaf0 */, unsigned char * /* leaf1 */, \ + unsigned char * /* leaf2 */, unsigned char * /* leaf3 */, \ + const unsigned char * /* sk_seed */, \ + const unsigned char * /* pub_seed */, uint32_t /* addr_idx0 */, \ + uint32_t /* addr_idx1 */, uint32_t /* addr_idx2 */, \ + uint32_t /* addr_idx3 */, const uint32_t[8] /* tree_addr */, \ + const hash_state * /* state_seeded */), \ + uint32_t tree_addrx4[4 * 8], const hash_state *state_seeded) { \ + unsigned char stackx4[4 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N]; \ + unsigned int heights[(tree_height) + 1]; \ + unsigned int offset = 0; \ + uint32_t idx; \ + uint32_t tree_idx; \ + unsigned int j; \ + \ + for (idx = 0; idx < (uint32_t)(1 << (tree_height)); idx++) { \ + /* Add the next leaf node to the stack. */ \ + gen_leafx4(stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + offset * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, sk_seed, \ + pub_seed, idx + idx_offset[0], idx + idx_offset[1], idx + idx_offset[2], \ + idx + idx_offset[3], tree_addrx4, state_seeded); \ + offset++; \ + heights[offset - 1] = 0; \ + \ + /* If this is a node we need for the auth path.. */ \ + for (j = 0; j < 4; j++) { \ + if ((leaf_idx[j] ^ 0x1) == idx) { \ + memcpy(auth_pathx4 + j * (tree_height)*PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, \ + stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + (offset - 1) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, \ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); \ + } \ + } \ + \ + /* While the top-most nodes are of equal height.. */ \ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { \ + /* Compute index of the new node, in the next layer. */ \ + tree_idx = (idx >> (heights[offset - 1] + 1)); \ + \ + /* Set the address of the node we're creating. */ \ + for (j = 0; j < 4; j++) { \ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_tree_height(tree_addrx4 + j * 8, heights[offset - 1] + 1); \ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_tree_index(tree_addrx4 + j * 8, \ + tree_idx + (idx_offset[j] >> (heights[offset - 1] + 1))); \ + } \ + /* Hash the top-most nodes from the stack together. */ \ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thashx4_2(stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, \ + stackx4 + 0 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, \ + stackx4 + 1 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, \ + stackx4 + 2 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, \ + stackx4 + 3 * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + (offset - 2) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, \ + pub_seed, tree_addrx4, state_seeded); \ + offset--; \ + /* Note that the top-most node is now one layer higher. */ \ + heights[offset - 1]++; \ + \ + /* If this is a node we need for the auth path.. */ \ + for (j = 0; j < 4; j++) { \ + if (((leaf_idx[j] >> heights[offset - 1]) ^ 0x1) == tree_idx) { \ + memcpy(auth_pathx4 + j * (tree_height)*PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + \ + heights[offset - 1] * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, \ + stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N + (offset - 1) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, \ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); \ + } \ + } \ + } \ + } \ + \ + for (j = 0; j < 4; j++) { \ + memcpy(rootx4 + j * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, stackx4 + j * ((tree_height) + 1) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); \ + } \ + } + +treehashx4_variant(FORS_HEIGHT, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_FORS_HEIGHT) diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/utilsx4.h b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/utilsx4.h new file mode 100644 index 00000000..5bb8ba42 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/utilsx4.h @@ -0,0 +1,38 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_UTILSX4_H +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_UTILSX4_H + +#include "hash_state.h" +#include "params.h" + +#include + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_treehashx4_FORS_HEIGHT(unsigned char *rootx4, + unsigned char *auth_pathx4, + const unsigned char *sk_seed, + const unsigned char *pub_seed, + const uint32_t leaf_idx[4], + uint32_t idx_offset[4], + void (*gen_leafx4)(unsigned char * /* leaf0 */, + unsigned char * /* leaf1 */, + unsigned char * /* leaf2 */, + unsigned char * /* leaf3 */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx0 */, + uint32_t /* addr_idx1 */, + uint32_t /* addr_idx2 */, + uint32_t /* addr_idx3 */, + const uint32_t[8] /* tree_addr */, + const hash_state * /* state_seeded */), + uint32_t tree_addrx4[4 * 8], + const hash_state *state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/wots.c b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/wots.c new file mode 100644 index 00000000..1d05315a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/wots.c @@ -0,0 +1,240 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "hashx4.h" +#include "params.h" +#include "thash.h" +#include "thashx4.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], const hash_state *state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_prf_addr(sk, sk_seed, wots_addr, state_seeded); +} + +/** + * 4-way parallel version of wots_gen_sk; expects 4x as much space in sk + */ +static void wots_gen_skx4(unsigned char *skx4, const unsigned char *sk_seed, + uint32_t wots_addrx4[4 * 8], const hash_state *state_seeded) { + unsigned int j; + + /* Make sure that the hash address is actually zeroed. */ + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_hash_addr(wots_addrx4 + j * 8, 0); + } + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_prf_addrx4(skx4 + 0 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, + skx4 + 1 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, + skx4 + 2 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, + skx4 + 3 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, + sk_seed, wots_addrx4, + state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_W; i++) { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thash_1(out, out, pub_seed, addr, state_seeded); + } +} + +/** + * 4-way parallel version of gen_chain; expects 4x as much space in out, and + * 4x as much space in inx4. Assumes start and step identical across chains. + */ +static void gen_chainx4(unsigned char *outx4, const unsigned char *inx4, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addrx4[4 * 8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + /* Initialize outx4 with the value at position 'start'. */ + memcpy(outx4, inx4, 4 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_W; i++) { + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_hash_addr(addrx4 + j * 8, i); + } + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_thashx4_1(outx4 + 0 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, + outx4 + 1 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, + outx4 + 2 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, + outx4 + 3 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, + outx4 + 0 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, + outx4 + 1 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, + outx4 + 2 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, + outx4 + 3 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, + pub_seed, addrx4, + state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const int out_len, const unsigned char *input) { + int in = 0; + int out = 0; + unsigned char total = 0; + int bits = 0; + int consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LOGW; + output[out] = (unsigned int)(total >> bits) & (PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_W - 1); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_ull_to_bytes(csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_wots_gen_pk(unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + uint32_t i; + unsigned int j; + + uint32_t addrx4[4 * 8]; + unsigned char pkbuf[4 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N]; + + for (j = 0; j < 4; j++) { + memcpy(addrx4 + j * 8, addr, sizeof(uint32_t) * 8); + } + + /* The last iteration typically does not have complete set of 4 chains, + but because we use pkbuf, this is not an issue -- we still do as many + in parallel as possible. */ + for (i = 0; i < ((PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LEN + 3) & ~0x3); i += 4) { + for (j = 0; j < 4; j++) { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_chain_addr(addrx4 + j * 8, i + j); + } + wots_gen_skx4(pkbuf, sk_seed, addrx4, state_seeded); + gen_chainx4(pkbuf, pkbuf, 0, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_W - 1, pub_seed, addrx4, state_seeded); + for (j = 0; j < 4; j++) { + if (i + j < PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LEN) { + memcpy(pk + (i + j)*PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, pkbuf + j * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N); + } + } + } + + // Get rid of unused argument variable. + (void)state_seeded; +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_wots_sign(unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, sk_seed, addr, state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, sig + i * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, 0, lengths[i], pub_seed, addr, state_seeded); + } + + // avoid unused argument + (void)state_seeded; +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_wots_pk_from_sig(unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, sig + i * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_N, + lengths[i], PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_W - 1 - lengths[i], pub_seed, addr, + state_seeded); + } + + // avoid unused argument + (void)state_seeded; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/wots.h b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/wots.h new file mode 100644 index 00000000..f599671b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/avx2/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_H +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_AVX2_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/LICENSE b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/Makefile.Microsoft_nmake b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/Makefile.Microsoft_nmake new file mode 100644 index 00000000..2a90fa69 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libsphincs-shake256-256s-simple_clean.lib +OBJECTS=address.obj wots.obj utils.obj fors.obj sign.obj hash_shake256.obj thash_shake256_simple.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/address.c b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/address.c new file mode 100644 index 00000000..5e16da61 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/address.c @@ -0,0 +1,78 @@ +#include + +#include "address.h" +#include "params.h" +#include "utils.h" + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]) { + int i; + + for (i = 0; i < 8; i++) { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ull_to_bytes( + bytes + i * 4, 4, addr[i]); + } +} + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer) { + addr[0] = layer; +} + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree) { + addr[1] = 0; + addr[2] = (uint32_t) (tree >> 32); + addr[3] = (uint32_t) tree; +} + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_type( + uint32_t addr[8], uint32_t type) { + addr[4] = type; +} + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +/* These functions are used for OTS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair) { + addr[5] = keypair; +} + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]) { + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + out[5] = in[5]; +} + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain) { + addr[6] = chain; +} + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash) { + addr[7] = hash; +} + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height) { + addr[6] = tree_height; +} + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index) { + addr[7] = tree_index; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/address.h b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/address.h new file mode 100644 index 00000000..bd3d16c6 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/address.h @@ -0,0 +1,50 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDRESS_H +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDRESS_H + +#include + +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_TYPE_WOTS 0 +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_TYPE_WOTSPK 1 +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE 2 +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE 3 +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_TYPE_FORSPK 4 + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_addr_to_bytes( + unsigned char *bytes, const uint32_t addr[8]); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_layer_addr( + uint32_t addr[8], uint32_t layer); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_tree_addr( + uint32_t addr[8], uint64_t tree); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_type( + uint32_t addr[8], uint32_t type); + +/* Copies the layer and tree part of one address into the other */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_copy_subtree_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for WOTS and FORS addresses. */ + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_keypair_addr( + uint32_t addr[8], uint32_t keypair); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_chain_addr( + uint32_t addr[8], uint32_t chain); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_hash_addr( + uint32_t addr[8], uint32_t hash); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_copy_keypair_addr( + uint32_t out[8], const uint32_t in[8]); + +/* These functions are used for all hash tree addresses (including FORS). */ + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_tree_height( + uint32_t addr[8], uint32_t tree_height); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_tree_index( + uint32_t addr[8], uint32_t tree_index); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/api.h b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/api.h new file mode 100644 index 00000000..6ecab38b --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/api.h @@ -0,0 +1,81 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_API_H +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_API_H + +#include +#include + + + +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_CRYPTO_ALGNAME "SPHINCS+" + +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_CRYPTO_SECRETKEYBYTES 128 +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_CRYPTO_PUBLICKEYBYTES 64 +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_CRYPTO_BYTES 29792 +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_CRYPTO_SEEDBYTES 96 + + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_crypto_sign_secretkeybytes(void); + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_crypto_sign_publickeybytes(void); + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_crypto_sign_bytes(void); + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_crypto_sign_seedbytes(void); + +/* + * Generates a SPHINCS+ key pair given a seed. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed); + +/* + * Generates a SPHINCS+ key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [root || PUB_SEED] + */ +int PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/fors.c b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/fors.c new file mode 100644 index 00000000..f11bf885 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/fors.c @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "address.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "thash.h" +#include "utils.h" + +static void fors_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t fors_leaf_addr[8], const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_prf_addr( + sk, sk_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_sk_to_leaf(unsigned char *leaf, const unsigned char *sk, + const unsigned char *pub_seed, + uint32_t fors_leaf_addr[8], + const hash_state *hash_state_seeded) { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_thash_1( + leaf, sk, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +static void fors_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t fors_tree_addr[8], + const hash_state *hash_state_seeded) { + uint32_t fors_leaf_addr[8] = {0}; + + /* Only copy the parts that must be kept in fors_leaf_addr. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_copy_keypair_addr( + fors_leaf_addr, fors_tree_addr); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_type( + fors_leaf_addr, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_tree_index( + fors_leaf_addr, addr_idx); + + fors_gen_sk(leaf, sk_seed, fors_leaf_addr, hash_state_seeded); + fors_sk_to_leaf(leaf, leaf, pub_seed, fors_leaf_addr, hash_state_seeded); +} + +/** + * Interprets m as PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_HEIGHT-bit unsigned integers. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_TREES bits. + * Assumes indices has space for PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_TREES integers. + */ +static void message_to_indices(uint32_t *indices, const unsigned char *m) { + unsigned int i, j; + unsigned int offset = 0; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_TREES; i++) { + indices[i] = 0; + for (j = 0; j < PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_HEIGHT; j++) { + indices[i] ^= (((uint32_t)m[offset >> 3] >> (offset & 0x7)) & 0x1) << j; + offset++; + } + } +} + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_copy_keypair_addr( + fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_copy_keypair_addr( + fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_type( + fors_tree_addr, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_type( + fors_pk_addr, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_tree_height( + fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_tree_index( + fors_tree_addr, indices[i] + idx_offset); + + /* Include the secret key part that produces the selected leaf node. */ + fors_gen_sk(sig, sk_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N; + + /* Compute the authentication path for this leaf node. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_treehash_FORS_HEIGHT( + roots + i * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, sig, sk_seed, pub_seed, + indices[i], idx_offset, fors_gen_leaf, fors_tree_addr, + hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_thash_FORS_TREES( + pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded) { + uint32_t indices[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_TREES]; + unsigned char roots[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N]; + uint32_t fors_tree_addr[8] = {0}; + uint32_t fors_pk_addr[8] = {0}; + uint32_t idx_offset; + unsigned int i; + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_copy_keypair_addr(fors_tree_addr, fors_addr); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_copy_keypair_addr(fors_pk_addr, fors_addr); + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_type(fors_tree_addr, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_type(fors_pk_addr, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_TYPE_FORSPK); + + message_to_indices(indices, m); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_TREES; i++) { + idx_offset = i * (1 << PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_HEIGHT); + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_tree_height(fors_tree_addr, 0); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_tree_index(fors_tree_addr, indices[i] + idx_offset); + + /* Derive the leaf from the included secret key part. */ + fors_sk_to_leaf(leaf, sig, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N; + + /* Derive the corresponding root node of this tree. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_compute_root(roots + i * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, leaf, indices[i], idx_offset, sig, + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_HEIGHT, pub_seed, fors_tree_addr, hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_HEIGHT; + } + + /* Hash horizontally across all tree roots to derive the public key. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_thash_FORS_TREES(pk, roots, pub_seed, fors_pk_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/fors.h b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/fors.h new file mode 100644 index 00000000..d3164e71 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/fors.h @@ -0,0 +1,32 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_H +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_H + +#include + +#include "hash_state.h" +#include "params.h" + +/** + * Signs a message m, deriving the secret key from sk_seed and the FTS address. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_fors_sign( + unsigned char *sig, unsigned char *pk, + const unsigned char *m, + const unsigned char *sk_seed, const unsigned char *pub_seed, + const uint32_t fors_addr[8], const hash_state *hash_state_seeded); + +/** + * Derives the FORS public key from a signature. + * This can be used for verification by comparing to a known public key, or to + * subsequently verify a signature on the derived public key. The latter is the + * typical use-case when used as an FTS below an OTS in a hypertree. + * Assumes m contains at least PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_TREES bits. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_fors_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *m, + const unsigned char *pub_seed, const uint32_t fors_addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/hash.h b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/hash.h new file mode 100644 index 00000000..d51755d7 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/hash.h @@ -0,0 +1,31 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_HASH_H +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_HASH_H + +#include "hash_state.h" + +#include +#include + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, + const unsigned char *pub_seed, const unsigned char *sk_seed); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_destroy_hash_function(hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/hash_shake256.c b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/hash_shake256.c new file mode 100644 index 00000000..2ef61e2a --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/hash_shake256.c @@ -0,0 +1,106 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "params.h" +#include "utils.h" + +#include "fips202.h" + +/* For SHAKE256, there is no immediate reason to initialize at the start, + so this function is an empty operation. */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_initialize_hash_function( + hash_state *hash_state_seeded, // NOLINT(readability-non-const-parameter) + const unsigned char *pub_seed, const unsigned char *sk_seed) { + (void)hash_state_seeded; /* Suppress an 'unused parameter' warning. */ + (void)pub_seed; /* Suppress an 'unused parameter' warning. */ + (void)sk_seed; /* Suppress an 'unused parameter' warning. */ +} + +/* This is not necessary for SHAKE256, so we don't do anything */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_destroy_hash_function( + hash_state *hash_state_seeded) { // NOLINT(readability-non-const-parameter) + (void)hash_state_seeded; +} + +/* + * Computes PRF(key, addr), given a secret key of PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N bytes and an address + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_prf_addr( + unsigned char *out, const unsigned char *key, const uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_BYTES]; + + memcpy(buf, key, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, addr); + + shake256(out, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_BYTES); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message-dependent randomness R, using a secret seed and an + * optional randomization value as well as the message. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_gen_message_random( + unsigned char *R, + const unsigned char *sk_prf, const unsigned char *optrand, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, sk_prf, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N); + shake256_inc_absorb(&state, optrand, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(R, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, &state); + shake256_inc_ctx_release(&state); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} + +/** + * Computes the message hash using R, the public key, and the message. + * Outputs the message digest and the index of the leaf. The index is split in + * the tree index and the leaf index, for convenient copying to an address. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_hash_message( + unsigned char *digest, uint64_t *tree, uint32_t *leaf_idx, + const unsigned char *R, const unsigned char *pk, + const unsigned char *m, size_t mlen, + const hash_state *hash_state_seeded) { +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_TREE_BITS (PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_TREE_HEIGHT * (PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_D - 1)) +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_TREE_BYTES ((PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_TREE_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_LEAF_BITS PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_TREE_HEIGHT +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_LEAF_BYTES ((PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_LEAF_BITS + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_DGST_BYTES (PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_MSG_BYTES + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_TREE_BYTES + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_LEAF_BYTES) + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_DGST_BYTES]; + unsigned char *bufp = buf; + shake256incctx state; + + shake256_inc_init(&state); + shake256_inc_absorb(&state, R, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N); + shake256_inc_absorb(&state, pk, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_PK_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(buf, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_DGST_BYTES, &state); + shake256_inc_ctx_release(&state); + + memcpy(digest, bufp, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_MSG_BYTES); + bufp += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_MSG_BYTES; + + *tree = PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_TREE_BYTES); + *tree &= (~(uint64_t)0) >> (64 - PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_TREE_BITS); + bufp += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_TREE_BYTES; + + *leaf_idx = (uint32_t)PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_bytes_to_ull( + bufp, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_LEAF_BYTES); + *leaf_idx &= (~(uint32_t)0) >> (32 - PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_LEAF_BITS); + + (void)hash_state_seeded; /* Prevent unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/hash_state.h b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/hash_state.h new file mode 100644 index 00000000..7d92ef87 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/hash_state.h @@ -0,0 +1,30 @@ +#ifndef SPX_HASH_STATE_H +#define SPX_HASH_STATE_H + +/** + * Defines the type of the hash function state. + * + * Don't be fooled into thinking this instance of SPHINCS+ isn't stateless! + * + * From Section 7.2.2 from the SPHINCS+ round-2 specification: + * + * Each of the instances of the tweakable hash function take PK.seed as its + * first input, which is constant for a given key pair – and, thus, across + * a single signature. This leads to a lot of redundant computation. To remedy + * this, we pad PK.seed to the length of a full 64-byte SHA-256 input block. + * Because of the Merkle-Damgård construction that underlies SHA-256, this + * allows for reuse of the intermediate SHA-256 state after the initial call to + * the compression function which improves performance. + * + * We pass this hash state around in functions, because otherwise we need to + * have a global variable. + * + * SHAKE256 does not need this state. Because this implementation is generated + * from a shared code base, we still need to specify some hash_state as it is + * still passed around. We chose to use an `int` as a placeholder for this + * purpose. + */ + +#define hash_state int + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/params.h b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/params.h new file mode 100644 index 00000000..41c787b0 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/params.h @@ -0,0 +1,53 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_PARAMS_H +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_PARAMS_H + +/* Hash output length in bytes. */ +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N 32 +/* Height of the hypertree. */ +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FULL_HEIGHT 64 +/* Number of subtree layer. */ +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_D 8 +/* FORS tree dimensions. */ +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_HEIGHT 14 +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_TREES 22 +/* Winternitz parameter, */ +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_W 16 + +/* The hash function is defined by linking a different hash.c file, as opposed + to setting a #define constant. */ + +/* For clarity */ +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_BYTES 32 + +/* WOTS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_LOGW 4 + +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_LEN1 (8 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N / PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_LOGW) + +/* PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_LEN2 is floor(log(len_1 * (w - 1)) / log(w)) + 1; we precompute */ +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_LEN2 3 + +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_LEN (PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_LEN1 + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_LEN2) +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_BYTES (PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_PK_BYTES PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_BYTES + +/* Subtree size. */ +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_TREE_HEIGHT (PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FULL_HEIGHT / PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_D) + +/* FORS parameters. */ +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_MSG_BYTES ((PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_TREES + 7) / 8) +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_BYTES ((PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_HEIGHT + 1) * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_PK_BYTES PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N + +/* Resulting SPX sizes. */ +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_BYTES (PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_BYTES + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_D * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_BYTES +\ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FULL_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_PK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N) +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_SK_BYTES (2 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_PK_BYTES) + +/* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_OPTRAND_BYTES 32 + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/sign.c b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/sign.c new file mode 100644 index 00000000..2b88aadd --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/sign.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include "address.h" +#include "api.h" +#include "fors.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "randombytes.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +/** + * Computes the leaf at a given address. First generates the WOTS key pair, + * then computes leaf by hashing horizontally. + */ +static void wots_gen_leaf(unsigned char *leaf, const unsigned char *sk_seed, + const unsigned char *pub_seed, + uint32_t addr_idx, const uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + unsigned char pk[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_BYTES]; + uint32_t wots_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_TYPE_WOTSPK); + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, addr_idx); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_wots_gen_pk( + pk, sk_seed, pub_seed, wots_addr, hash_state_seeded); + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_thash_WOTS_LEN( + leaf, pk, pub_seed, wots_pk_addr, hash_state_seeded); +} + +/* + * Returns the length of a secret key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_crypto_sign_secretkeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_CRYPTO_SECRETKEYBYTES; +} + +/* + * Returns the length of a public key, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_crypto_sign_publickeybytes(void) { + return PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_CRYPTO_PUBLICKEYBYTES; +} + +/* + * Returns the length of a signature, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_crypto_sign_bytes(void) { + return PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_CRYPTO_BYTES; +} + +/* + * Returns the length of the seed required to generate a key pair, in bytes + */ +size_t PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_crypto_sign_seedbytes(void) { + return PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_CRYPTO_SEEDBYTES; +} + +/* + * Generates an SPX key pair given a seed of length + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_crypto_sign_seed_keypair( + uint8_t *pk, uint8_t *sk, const uint8_t *seed) { + /* We do not need the auth path in key generation, but it simplifies the + code to have just one treehash routine that computes both root and path + in one function. */ + unsigned char auth_path[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N]; + uint32_t top_tree_addr[8] = {0}; + hash_state hash_state_seeded; + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_layer_addr( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_D - 1); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_type( + top_tree_addr, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + + /* Initialize SK_SEED, SK_PRF and PUB_SEED from seed. */ + memcpy(sk, seed, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_CRYPTO_SEEDBYTES); + + memcpy(pk, sk + 2 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N); + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_initialize_hash_function(&hash_state_seeded, pk, sk); + + /* Compute root node of the top-most subtree. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_treehash_TREE_HEIGHT( + sk + 3 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, auth_path, sk, sk + 2 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, 0, 0, + wots_gen_leaf, top_tree_addr, &hash_state_seeded); + + memcpy(pk + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, sk + 3 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N); + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/* + * Generates an SPX key pair. + * Format sk: [SK_SEED || SK_PRF || PUB_SEED || root] + * Format pk: [PUB_SEED || root] + */ +int PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk) { + unsigned char seed[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_CRYPTO_SEEDBYTES]; + randombytes(seed, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_CRYPTO_SEEDBYTES); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_crypto_sign_seed_keypair( + pk, sk, seed); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + const unsigned char *sk_seed = sk; + const unsigned char *sk_prf = sk + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N; + const unsigned char *pk = sk + 2 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N; + const unsigned char *pub_seed = pk; + + unsigned char optrand[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N]; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_MSG_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N]; + uint32_t i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + + hash_state hash_state_seeded; + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, sk_seed); + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + + /* Optionally, signing can be made non-deterministic using optrand. + This can help counter side-channel attacks that would benefit from + getting a large number of traces when the signer uses the same nodes. */ + randombytes(optrand, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N); + /* Compute the digest randomization value. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_gen_message_random( + sig, sk_prf, optrand, m, mlen, &hash_state_seeded); + + /* Derive the message digest and leaf index from R, PK and M. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N; + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Sign the message hash using FORS. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_fors_sign( + sig, root, mhash, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_BYTES; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + /* Compute a WOTS signature. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_wots_sign( + sig, root, sk_seed, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_BYTES; + + /* Compute the authentication path for the used WOTS leaf. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_treehash_TREE_HEIGHT( + root, sig, sk_seed, pub_seed, idx_leaf, 0, + wots_gen_leaf, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_TREE_HEIGHT; + } + + *siglen = PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_BYTES; + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + const unsigned char *pub_seed = pk; + const unsigned char *pub_root = pk + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N; + unsigned char mhash[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_MSG_BYTES]; + unsigned char wots_pk[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_BYTES]; + unsigned char root[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N]; + unsigned char leaf[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N]; + unsigned int i; + uint64_t tree; + uint32_t idx_leaf; + uint32_t wots_addr[8] = {0}; + uint32_t tree_addr[8] = {0}; + uint32_t wots_pk_addr[8] = {0}; + + hash_state hash_state_seeded; + + if (siglen != PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_BYTES) { + return -1; + } + + /* This hook allows the hash function instantiation to do whatever + preparation or computation it needs, based on the public seed. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_initialize_hash_function( + &hash_state_seeded, + pub_seed, NULL); + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_type( + wots_addr, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_TYPE_WOTS); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_type( + tree_addr, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_type( + wots_pk_addr, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_TYPE_WOTSPK); + + /* Derive the message digest and leaf index from R || PK || M. */ + /* The additional PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N is a result of the hash domain separator. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_hash_message( + mhash, &tree, &idx_leaf, sig, pk, m, mlen, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N; + + /* Layer correctly defaults to 0, so no need to set_layer_addr */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_tree_addr(wots_addr, tree); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_fors_pk_from_sig( + root, sig, mhash, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_BYTES; + + /* For each subtree.. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_D; i++) { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_layer_addr(tree_addr, i); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_tree_addr(tree_addr, tree); + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_copy_subtree_addr( + wots_addr, tree_addr); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_keypair_addr( + wots_addr, idx_leaf); + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_copy_keypair_addr( + wots_pk_addr, wots_addr); + + /* The WOTS public key is only correct if the signature was correct. */ + /* Initially, root is the FORS pk, but on subsequent iterations it is + the root of the subtree below the currently processed subtree. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_wots_pk_from_sig( + wots_pk, sig, root, pub_seed, wots_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_BYTES; + + /* Compute the leaf node using the WOTS public key. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_thash_WOTS_LEN( + leaf, wots_pk, pub_seed, wots_pk_addr, &hash_state_seeded); + + /* Compute the root node of this subtree. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_compute_root( + root, leaf, idx_leaf, 0, sig, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_TREE_HEIGHT, + pub_seed, tree_addr, &hash_state_seeded); + sig += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_TREE_HEIGHT * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N; + + /* Update the indices for the next layer. */ + idx_leaf = (tree & ((1 << PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_TREE_HEIGHT) - 1)); + tree = tree >> PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_TREE_HEIGHT; + } + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_destroy_hash_function(&hash_state_seeded); + /* Check if the root node equals the root node in the public key. */ + if (memcmp(root, pub_root, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N) != 0) { + return -1; + } + + return 0; +} + + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_BYTES, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but SPHINCS+ signatures are always exactly PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_BYTES. */ + if (smlen < PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_BYTES) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_BYTES; + + if (PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_crypto_sign_verify( + sm, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_BYTES, sm + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_BYTES, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_BYTES, *mlen); + + return 0; +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/thash.h b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/thash.h new file mode 100644 index 00000000..62ccfe2d --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/thash.h @@ -0,0 +1,28 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_THASH_H +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_THASH_H + +#include "hash_state.h" + +#include + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/thash_shake256_simple.c b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/thash_shake256_simple.c new file mode 100644 index 00000000..274cfc92 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/thash_shake256_simple.c @@ -0,0 +1,74 @@ +#include +#include + +#include "address.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" + +#include "fips202.h" + +/** + * Takes an array of inblocks concatenated arrays of PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N bytes. + */ +static void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_thash( + unsigned char *out, unsigned char *buf, + const unsigned char *in, unsigned int inblocks, + const unsigned char *pub_seed, uint32_t addr[8]) { + + memcpy(buf, pub_seed, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_addr_to_bytes(buf + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, addr); + memcpy(buf + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_BYTES, in, inblocks * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N); + + shake256(out, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, buf, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_thash_1( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_BYTES + 1 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_thash( + out, buf, in, 1, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_thash_2( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_BYTES + 2 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_thash( + out, buf, in, 2, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_thash_WOTS_LEN( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_LEN * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_LEN, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_thash_FORS_TREES( + unsigned char *out, const unsigned char *in, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + + unsigned char buf[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_BYTES + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_TREES * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N]; + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_thash( + out, buf, in, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_TREES, pub_seed, addr); + + (void)hash_state_seeded; /* Avoid unused parameter warning. */ +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/utils.c b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/utils.c new file mode 100644 index 00000000..e61290d3 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/utils.c @@ -0,0 +1,199 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in) { + + /* Iterate over out in decreasing order, for big-endianness. */ + for (size_t i = outlen; i > 0; i--) { + out[i - 1] = in & 0xff; + in = in >> 8; + } +} + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen) { + unsigned long long retval = 0; + + for (size_t i = 0; i < inlen; i++) { + retval |= ((unsigned long long)in[i]) << (8 * (inlen - 1 - i)); + } + return retval; +} + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + unsigned char buffer[2 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N]; + + /* If leaf_idx is odd (last bit = 1), current path element is a right child + and auth_path has to go left. Otherwise it is the other way around. */ + if (leaf_idx & 1) { + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, leaf, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N); + } else { + memcpy(buffer, leaf, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N; + + for (i = 0; i < tree_height - 1; i++) { + leaf_idx >>= 1; + idx_offset >>= 1; + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_tree_height(addr, i + 1); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + + /* Pick the right or left neighbor, depending on parity of the node. */ + if (leaf_idx & 1) { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_thash_2( + buffer + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer, auth_path, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N); + } else { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_thash_2( + buffer, buffer, pub_seed, addr, hash_state_seeded); + memcpy(buffer + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, auth_path, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N); + } + auth_path += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N; + } + + /* The last iteration is exceptional; we do not copy an auth_path node. */ + leaf_idx >>= 1; + idx_offset >>= 1; + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_tree_height(addr, tree_height); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_tree_index( + addr, leaf_idx + idx_offset); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_thash_2( + root, buffer, pub_seed, addr, hash_state_seeded); +} + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +static void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_treehash( + unsigned char *root, unsigned char *auth_path, + unsigned char *stack, unsigned int *heights, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, uint32_t tree_height, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], + const hash_state *hash_state_seeded) { + + unsigned int offset = 0; + uint32_t idx; + uint32_t tree_idx; + + for (idx = 0; idx < (uint32_t)(1 << tree_height); idx++) { + /* Add the next leaf node to the stack. */ + gen_leaf(stack + offset * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, + sk_seed, pub_seed, idx + idx_offset, tree_addr, + hash_state_seeded); + offset++; + heights[offset - 1] = 0; + + /* If this is a node we need for the auth path.. */ + if ((leaf_idx ^ 0x1) == idx) { + memcpy(auth_path, stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N); + } + + /* While the top-most nodes are of equal height.. */ + while (offset >= 2 && heights[offset - 1] == heights[offset - 2]) { + /* Compute index of the new node, in the next layer. */ + tree_idx = (idx >> (heights[offset - 1] + 1)); + + /* Set the address of the node we're creating. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_tree_height( + tree_addr, heights[offset - 1] + 1); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_tree_index( + tree_addr, tree_idx + (idx_offset >> (heights[offset - 1] + 1))); + /* Hash the top-most nodes from the stack together. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_thash_2( + stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, stack + (offset - 2)*PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, + pub_seed, tree_addr, hash_state_seeded); + offset--; + /* Note that the top-most node is now one layer higher. */ + heights[offset - 1]++; + + /* If this is a node we need for the auth path.. */ + if (((leaf_idx >> heights[offset - 1]) ^ 0x1) == tree_idx) { + memcpy(auth_path + heights[offset - 1]*PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, + stack + (offset - 1)*PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N); + } + } + } + memcpy(root, stack, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N); +} + +/* The wrappers below ensure that we use fixed-size buffers on the stack */ + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_FORS_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded) { + + unsigned char stack[(PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_TREE_HEIGHT + 1)*PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N]; + unsigned int heights[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_TREE_HEIGHT + 1]; + + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_treehash( + root, auth_path, stack, heights, sk_seed, pub_seed, + leaf_idx, idx_offset, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_TREE_HEIGHT, gen_leaf, tree_addr, hash_state_seeded); +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/utils.h b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/utils.h new file mode 100644 index 00000000..81157a31 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/utils.h @@ -0,0 +1,64 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_UTILS_H +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_UTILS_H + +#include "hash_state.h" +#include "params.h" +#include +#include + +/** + * Converts the value of 'in' to 'outlen' bytes in big-endian byte order. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ull_to_bytes( + unsigned char *out, size_t outlen, unsigned long long in); + +/** + * Converts the inlen bytes in 'in' from big-endian byte order to an integer. + */ +unsigned long long PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_bytes_to_ull( + const unsigned char *in, size_t inlen); + +/** + * Computes a root node given a leaf and an auth path. + * Expects address to be complete other than the tree_height and tree_index. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_compute_root( + unsigned char *root, const unsigned char *leaf, + uint32_t leaf_idx, uint32_t idx_offset, + const unsigned char *auth_path, uint32_t tree_height, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * For a given leaf index, computes the authentication path and the resulting + * root node using Merkle's TreeHash algorithm. + * Expects the layer and tree parts of the tree_addr to be set, as well as the + * tree type (i.e. PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_TYPE_HASHTREE or PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ADDR_TYPE_FORSTREE). + * Applies the offset idx_offset to indices before building addresses, so that + * it is possible to continue counting indices across trees. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_treehash_FORS_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_treehash_TREE_HEIGHT( + unsigned char *root, unsigned char *auth_path, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t leaf_idx, uint32_t idx_offset, + void (*gen_leaf)( + unsigned char * /* leaf */, + const unsigned char * /* sk_seed */, + const unsigned char * /* pub_seed */, + uint32_t /* addr_idx */, const uint32_t[8] /* tree_addr */, + const hash_state * /* hash_state_seeded */), + uint32_t tree_addr[8], const hash_state *hash_state_seeded); + +#endif diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/wots.c b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/wots.c new file mode 100644 index 00000000..2e644403 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/wots.c @@ -0,0 +1,167 @@ +#include +#include + +#include "address.h" +#include "hash.h" +#include "hash_state.h" +#include "params.h" +#include "thash.h" +#include "utils.h" +#include "wots.h" + +// TODO clarify address expectations, and make them more uniform. +// TODO i.e. do we expect types to be set already? +// TODO and do we expect modifications or copies? + +/** + * Computes the starting value for a chain, i.e. the secret key. + * Expects the address to be complete up to the chain address. + */ +static void wots_gen_sk(unsigned char *sk, const unsigned char *sk_seed, + uint32_t wots_addr[8], + const hash_state *hash_state_seeded) { + /* Make sure that the hash address is actually zeroed. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_hash_addr(wots_addr, 0); + + /* Generate sk element. */ + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_prf_addr(sk, sk_seed, wots_addr, hash_state_seeded); +} + +/** + * Computes the chaining function. + * out and in have to be n-byte arrays. + * + * Interprets in as start-th value of the chain. + * addr has to contain the address of the chain. + */ +static void gen_chain(unsigned char *out, const unsigned char *in, + unsigned int start, unsigned int steps, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + /* Initialize out with the value at position 'start'. */ + memcpy(out, in, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N); + + /* Iterate 'steps' calls to the hash function. */ + for (i = start; i < (start + steps) && i < PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_W; i++) { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_hash_addr(addr, i); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_thash_1( + out, out, pub_seed, addr, hash_state_seeded); + } +} + +/** + * base_w algorithm as described in draft. + * Interprets an array of bytes as integers in base w. + * This only works when log_w is a divisor of 8. + */ +static void base_w(unsigned int *output, const size_t out_len, + const unsigned char *input) { + size_t in = 0; + size_t out = 0; + unsigned char total = 0; + unsigned int bits = 0; + size_t consumed; + + for (consumed = 0; consumed < out_len; consumed++) { + if (bits == 0) { + total = input[in]; + in++; + bits += 8; + } + bits -= PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_LOGW; + output[out] = (unsigned int)((total >> bits) & (PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_W - 1)); + out++; + } +} + +/* Computes the WOTS+ checksum over a message (in base_w). */ +static void wots_checksum(unsigned int *csum_base_w, + const unsigned int *msg_base_w) { + unsigned int csum = 0; + unsigned char csum_bytes[(PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_LOGW + 7) / 8]; + unsigned int i; + + /* Compute checksum. */ + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_LEN1; i++) { + csum += PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_W - 1 - msg_base_w[i]; + } + + /* Convert checksum to base_w. */ + /* Make sure expected empty zero bits are the least significant bits. */ + csum = csum << (8 - ((PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_LEN2 * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_LOGW) % 8)); + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_ull_to_bytes( + csum_bytes, sizeof(csum_bytes), csum); + base_w(csum_base_w, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_LEN2, csum_bytes); +} + +/* Takes a message and derives the matching chain lengths. */ +static void chain_lengths(unsigned int *lengths, const unsigned char *msg) { + base_w(lengths, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_LEN1, msg); + wots_checksum(lengths + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_LEN1, lengths); +} + +/** + * WOTS key generation. Takes a 32 byte sk_seed, expands it to WOTS private key + * elements and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + uint32_t i; + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(pk + i * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, pk + i * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, + 0, PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_W - 1, pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a n-byte message and the 32-byte sk_see to compute a signature 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_chain_addr(addr, i); + wots_gen_sk(sig + i * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, sk_seed, addr, hash_state_seeded); + gen_chain(sig + i * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, 0, lengths[i], pub_seed, addr, hash_state_seeded); + } +} + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded) { + unsigned int lengths[PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_LEN]; + uint32_t i; + + chain_lengths(lengths, msg); + + for (i = 0; i < PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_LEN; i++) { + PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_set_chain_addr(addr, i); + gen_chain(pk + i * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, sig + i * PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_N, + lengths[i], PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_W - 1 - lengths[i], pub_seed, addr, + hash_state_seeded); + } +} diff --git a/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/wots.h b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/wots.h new file mode 100644 index 00000000..fe5533a4 --- /dev/null +++ b/crypto_sign/sphincs/sphincs-shake256-256s-simple/clean/wots.h @@ -0,0 +1,41 @@ +#ifndef PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_H +#define PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_WOTS_H + +#include "hash_state.h" +#include "params.h" +#include + +/** + * WOTS key generation. Takes a 32 byte seed for the private key, expands it to + * a full WOTS private key and computes the corresponding public key. + * It requires the seed pub_seed (used to generate bitmasks and hash keys) + * and the address of this WOTS key pair. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_wots_gen_pk( + unsigned char *pk, const unsigned char *sk_seed, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +/** + * Takes a n-byte message and the 32-byte seed for the private key to compute a + * signature that is placed at 'sig'. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_wots_sign( + unsigned char *sig, const unsigned char *msg, + const unsigned char *sk_seed, const unsigned char *pub_seed, + uint32_t addr[8], const hash_state *hash_state_seeded); + +/** + * Takes a WOTS signature and an n-byte message, computes a WOTS public key. + * + * Writes the computed public key to 'pk'. + */ +void PQCLEAN_SPHINCSSHAKE256256SSIMPLE_CLEAN_wots_pk_from_sig( + unsigned char *pk, + const unsigned char *sig, const unsigned char *msg, + const unsigned char *pub_seed, uint32_t addr[8], + const hash_state *hash_state_seeded); + +#endif