From 8e27bd091573b5088d69cb33531475e98b312483 Mon Sep 17 00:00:00 2001 From: Thom Wiggers Date: Fri, 26 Jun 2020 08:01:23 +0200 Subject: [PATCH] Add MQDSS AVX2 implementations (#288) * Add AVX2 version of mqdss * Fix duplicate consistency --- crypto_sign/mqdss-48/META.yml | 9 + crypto_sign/mqdss-48/avx2/LICENSE | 116 ++++++ crypto_sign/mqdss-48/avx2/Makefile | 22 + .../mqdss-48/avx2/Makefile.Microsoft_nmake | 19 + crypto_sign/mqdss-48/avx2/api.h | 47 +++ crypto_sign/mqdss-48/avx2/gf31.c | 123 ++++++ crypto_sign/mqdss-48/avx2/gf31.h | 36 ++ crypto_sign/mqdss-48/avx2/mq.c | 251 +++++++++++ crypto_sign/mqdss-48/avx2/mq.h | 18 + crypto_sign/mqdss-48/avx2/params.h | 25 ++ crypto_sign/mqdss-48/avx2/sign.c | 389 ++++++++++++++++++ crypto_sign/mqdss-48/clean/sign.c | 1 - crypto_sign/mqdss-64/META.yml | 9 + crypto_sign/mqdss-64/avx2/LICENSE | 116 ++++++ crypto_sign/mqdss-64/avx2/Makefile | 22 + .../mqdss-64/avx2/Makefile.Microsoft_nmake | 19 + crypto_sign/mqdss-64/avx2/api.h | 47 +++ crypto_sign/mqdss-64/avx2/gf31.c | 128 ++++++ crypto_sign/mqdss-64/avx2/gf31.h | 36 ++ crypto_sign/mqdss-64/avx2/mq.c | 239 +++++++++++ crypto_sign/mqdss-64/avx2/mq.h | 18 + crypto_sign/mqdss-64/avx2/params.h | 25 ++ crypto_sign/mqdss-64/avx2/sign.c | 389 ++++++++++++++++++ crypto_sign/mqdss-64/clean/sign.c | 1 - test/duplicate_consistency/mqdss-48_clean.yml | 20 + test/duplicate_consistency/mqdss-64_clean.yml | 11 + test/test_testvectors.py | 1 + 27 files changed, 2135 insertions(+), 2 deletions(-) create mode 100644 crypto_sign/mqdss-48/avx2/LICENSE create mode 100644 crypto_sign/mqdss-48/avx2/Makefile create mode 100644 crypto_sign/mqdss-48/avx2/Makefile.Microsoft_nmake create mode 100644 crypto_sign/mqdss-48/avx2/api.h create mode 100644 crypto_sign/mqdss-48/avx2/gf31.c create mode 100644 crypto_sign/mqdss-48/avx2/gf31.h create mode 100644 crypto_sign/mqdss-48/avx2/mq.c create mode 100644 crypto_sign/mqdss-48/avx2/mq.h create mode 100644 crypto_sign/mqdss-48/avx2/params.h create mode 100644 crypto_sign/mqdss-48/avx2/sign.c create mode 100644 crypto_sign/mqdss-64/avx2/LICENSE create mode 100644 crypto_sign/mqdss-64/avx2/Makefile create mode 100644 crypto_sign/mqdss-64/avx2/Makefile.Microsoft_nmake create mode 100644 crypto_sign/mqdss-64/avx2/api.h create mode 100644 crypto_sign/mqdss-64/avx2/gf31.c create mode 100644 crypto_sign/mqdss-64/avx2/gf31.h create mode 100644 crypto_sign/mqdss-64/avx2/mq.c create mode 100644 crypto_sign/mqdss-64/avx2/mq.h create mode 100644 crypto_sign/mqdss-64/avx2/params.h create mode 100644 crypto_sign/mqdss-64/avx2/sign.c create mode 100644 test/duplicate_consistency/mqdss-48_clean.yml diff --git a/crypto_sign/mqdss-48/META.yml b/crypto_sign/mqdss-48/META.yml index 9789d348..0fba019d 100644 --- a/crypto_sign/mqdss-48/META.yml +++ b/crypto_sign/mqdss-48/META.yml @@ -16,3 +16,12 @@ auxiliary-submitters: implementations: - name: clean version: https://github.com/joostrijneveld/MQDSS/commit/00608d7610262ff07b1834885d32bc3fd27ef5e1 + - name: avx2 + version: https://github.com/joostrijneveld/MQDSS/commit/00608d7610262ff07b1834885d32bc3fd27ef5e1 + supported_platforms: + - architecture: x86_64 + required_flags: + - avx2 + - architecture: x86 + required_flags: + - avx2 diff --git a/crypto_sign/mqdss-48/avx2/LICENSE b/crypto_sign/mqdss-48/avx2/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/mqdss-48/avx2/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/mqdss-48/avx2/Makefile b/crypto_sign/mqdss-48/avx2/Makefile new file mode 100644 index 00000000..af26c1f7 --- /dev/null +++ b/crypto_sign/mqdss-48/avx2/Makefile @@ -0,0 +1,22 @@ +# This Makefile can be used with GNU Make or BSD Make + +LIB=libmqdss-48_avx2.a + +HEADERS = params.h gf31.h mq.h api.h +OBJECTS = gf31.o mq.o sign.o + +CFLAGS=-O3 -Wall -Wconversion -Wextra -Wpedantic -Wvla -Werror \ + -Wmissing-prototypes -Wredundant-decls -std=c99 -mavx2 \ + -I../../../common $(EXTRAFLAGS) + +all: $(LIB) + +%.o: %.c $(HEADERS) + $(CC) $(CFLAGS) -c -o $@ $< + +$(LIB): $(OBJECTS) + $(AR) -r $@ $(OBJECTS) + +clean: + $(RM) $(OBJECTS) + $(RM) $(LIB) diff --git a/crypto_sign/mqdss-48/avx2/Makefile.Microsoft_nmake b/crypto_sign/mqdss-48/avx2/Makefile.Microsoft_nmake new file mode 100644 index 00000000..07d51db5 --- /dev/null +++ b/crypto_sign/mqdss-48/avx2/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libmqdss-48_avx2.lib +OBJECTS=gf31.obj mq.obj sign.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX /arch:AVX2 + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/mqdss-48/avx2/api.h b/crypto_sign/mqdss-48/avx2/api.h new file mode 100644 index 00000000..82109189 --- /dev/null +++ b/crypto_sign/mqdss-48/avx2/api.h @@ -0,0 +1,47 @@ +#ifndef PQCLEAN_MQDSS48_AVX2_API_H +#define PQCLEAN_MQDSS48_AVX2_API_H + +#include +#include + +#define PQCLEAN_MQDSS48_AVX2_CRYPTO_ALGNAME "MQDSS-48" + +#define PQCLEAN_MQDSS48_AVX2_CRYPTO_SECRETKEYBYTES 16 +#define PQCLEAN_MQDSS48_AVX2_CRYPTO_PUBLICKEYBYTES 46 +#define PQCLEAN_MQDSS48_AVX2_CRYPTO_BYTES 28400 + +/* + * Generates an MQDSS key pair. + */ +int PQCLEAN_MQDSS48_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_MQDSS48_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_MQDSS48_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_MQDSS48_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_MQDSS48_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/mqdss-48/avx2/gf31.c b/crypto_sign/mqdss-48/avx2/gf31.c new file mode 100644 index 00000000..1a456e54 --- /dev/null +++ b/crypto_sign/mqdss-48/avx2/gf31.c @@ -0,0 +1,123 @@ +#include "params.h" +#include "fips202.h" +#include "gf31.h" +#include +#include +#include + +/* Given a vector of N elements in the range [0, 31], this reduces the elements + to the range [0, 30] by mapping 31 to 0 (i.e reduction mod 31) */ +void PQCLEAN_MQDSS48_AVX2_vgf31_unique(gf31 *out, gf31 *in) { + __m256i x; + __m256i _w31 = _mm256_set1_epi16(31); + int i; + + for (i = 0; i < (N >> 4); ++i) { + x = _mm256_loadu_si256((__m256i const *) (in + 16 * i)); + x = _mm256_xor_si256(x, _mm256_and_si256(_w31, _mm256_cmpeq_epi16(x, _w31))); + _mm256_storeu_si256((__m256i *)(out + i * 16), x); + } +} + +/* This function acts on vectors with 64 gf31 elements. +It performs one reduction step and guarantees output in [0, 30], +but requires input to be in [0, 32768). */ +void PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(gf31 *out, gf31 *in) { + __m256i x; + __m256i _w2114 = _mm256_set1_epi32(2114 * 65536 + 2114); + __m256i _w31 = _mm256_set1_epi16(31); + int i; + + for (i = 0; i < (N >> 4); ++i) { + x = _mm256_loadu_si256((__m256i const *) (in + 16 * i)); + x = _mm256_sub_epi16(x, _mm256_mullo_epi16(_w31, _mm256_mulhi_epi16(x, _w2114))); + x = _mm256_xor_si256(x, _mm256_and_si256(_w31, _mm256_cmpeq_epi16(x, _w31))); + _mm256_storeu_si256((__m256i *)(out + i * 16), x); + } +} + +/* Given a seed, samples len gf31 elements (in the range [0, 30]), and places + them in a vector of 16-bit elements */ +void PQCLEAN_MQDSS48_AVX2_gf31_nrand(gf31 *out, size_t len, const uint8_t *seed, size_t seedlen) { + size_t i = 0, j; + shake256ctx shakestate; + uint8_t shakeblock[SHAKE256_RATE]; + + shake256_absorb(&shakestate, seed, seedlen); + + while (i < len) { + shake256_squeezeblocks(shakeblock, 1, &shakestate); + for (j = 0; j < SHAKE256_RATE && i < len; j++) { + if ((shakeblock[j] & 31) != 31) { + out[i] = (shakeblock[j] & 31); + i++; + } + } + } + shake256_ctx_release(&shakestate); +} + +/* Given a seed, samples len gf31 elements, transposed into unsigned range, + i.e. in the range [-15, 15], and places them in an array of 8-bit integers. + This is used for the expansion of F, which wants packed elements. */ +void PQCLEAN_MQDSS48_AVX2_gf31_nrand_schar(signed char *out, size_t len, const uint8_t *seed, size_t seedlen) { + size_t i = 0, j; + shake256ctx shakestate; + uint8_t shakeblock[SHAKE256_RATE]; + + shake256_absorb(&shakestate, seed, seedlen); + + while (i < len) { + shake256_squeezeblocks(shakeblock, 1, &shakestate); + for (j = 0; j < SHAKE256_RATE && i < len; j++) { + if ((shakeblock[j] & 31) != 31) { + out[i] = (signed char)((shakeblock[j] & 31) - 15); + i++; + } + } + } + shake256_ctx_release(&shakestate); + +} + +/* Unpacks an array of packed GF31 elements to one element per gf31. + Assumes that there is sufficient empty space available at the end of the + array to unpack. Can perform in-place. */ +void PQCLEAN_MQDSS48_AVX2_gf31_nunpack(gf31 *out, const uint8_t *in, size_t n) { + size_t i; + size_t j = ((n * 5) >> 3) - 1; + unsigned int d = 0; + + for (i = n; i > 0; i--) { + out[i - 1] = (gf31)((in[j] >> d) & 31); + d += 5; + if (d > 8) { + d -= 8; + j--; + out[i - 1] = (gf31)(out[i - 1] ^ ((in[j] << (5 - d)) & 31)); + } + } +} + +/* Packs an array of GF31 elements from gf31's to concatenated 5-bit values. + Assumes that there is sufficient space available to unpack. + Can perform in-place. */ +void PQCLEAN_MQDSS48_AVX2_gf31_npack(uint8_t *out, const gf31 *in, size_t n) { + unsigned int i = 0; + unsigned int j; + int d = 3; + + /* There will be ceil(5n / 8) output blocks */ + memset(out, 0, (size_t)((5 * n + 7) & ~7U) >> 3); + + for (j = 0; j < n; j++) { + if (d < 0) { + d += 8; + out[i] = (uint8_t)((out[i] & (255 << (d - 3))) | + ((in[j] >> (8 - d)) & ~(255 << (d - 3)))); + i++; + } + out[i] = (uint8_t)((out[i] & ~(31 << d)) | ((in[j] << d) & (31 << d))); + d -= 5; + } +} diff --git a/crypto_sign/mqdss-48/avx2/gf31.h b/crypto_sign/mqdss-48/avx2/gf31.h new file mode 100644 index 00000000..91ef43b8 --- /dev/null +++ b/crypto_sign/mqdss-48/avx2/gf31.h @@ -0,0 +1,36 @@ +#ifndef MQDSS_GF31_H +#define MQDSS_GF31_H + +#include +#include + +typedef unsigned short gf31; + +/* Given a vector of elements in the range [0, 31], this reduces the elements + to the range [0, 30] by mapping 31 to 0 (i.e reduction mod 31) */ +void PQCLEAN_MQDSS48_AVX2_vgf31_unique(gf31 *out, gf31 *in); + +/* Given a vector of 16-bit integers (i.e. in [0, 65535], this reduces the + elements to the range [0, 30] by mapping 31 to 0 (i.e reduction mod 31) */ +void PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(gf31 *out, gf31 *in); + +/* Given a seed, samples len gf31 elements (in the range [0, 30]), and places + them in a vector of 16-bit elements */ +void PQCLEAN_MQDSS48_AVX2_gf31_nrand(gf31 *out, size_t len, const uint8_t *seed, size_t seedlen); + +/* Given a seed, samples len gf31 elements, transposed into unsigned range, + i.e. in the range [-15, 15], and places them in an array of 8-bit integers. + This is used for the expansion of F, which wants packed elements. */ +void PQCLEAN_MQDSS48_AVX2_gf31_nrand_schar(signed char *out, size_t len, const uint8_t *seed, size_t seedlen); + +/* Unpacks an array of packed GF31 elements to one element per gf31. + Assumes that there is sufficient empty space available at the end of the + array to unpack. Can perform in-place. */ +void PQCLEAN_MQDSS48_AVX2_gf31_nunpack(gf31 *out, const uint8_t *in, size_t n); + +/* Packs an array of GF31 elements from gf31's to concatenated 5-bit values. + Assumes that there is sufficient space available to unpack. + Can perform in-place. */ +void PQCLEAN_MQDSS48_AVX2_gf31_npack(uint8_t *out, const gf31 *in, size_t n); + +#endif diff --git a/crypto_sign/mqdss-48/avx2/mq.c b/crypto_sign/mqdss-48/avx2/mq.c new file mode 100644 index 00000000..3eb9cd3c --- /dev/null +++ b/crypto_sign/mqdss-48/avx2/mq.c @@ -0,0 +1,251 @@ +#include "mq.h" +#include "params.h" +#include +#include + +static inline __m256i reduce_16(__m256i r, __m256i _w31, __m256i _w2114) { + __m256i exp = _mm256_mulhi_epi16(r, _w2114); + return _mm256_sub_epi16(r, _mm256_mullo_epi16(_w31, exp)); +} + +/* Computes all products x_i * x_j, returns in reduced form */ +inline static +void generate_quadratic_terms( unsigned char *xij, const gf31 *x ) { + __m256i mask_2114 = _mm256_set1_epi16( 2114 ); + __m256i mask_31 = _mm256_set1_epi16( 31 ); + __m256i xi[4]; + xi[0] = _mm256_loadu_si256((__m256i const *) (x)); + xi[1] = _mm256_loadu_si256((__m256i const *) (x + 16)); + xi[2] = _mm256_loadu_si256((__m256i const *) (x + 32)); + xi[3] = _mm256_setzero_si256(); + + __m256i xixj[4]; + xixj[0] = _mm256_setzero_si256(); + xixj[1] = _mm256_setzero_si256(); + xixj[2] = _mm256_setzero_si256(); + xixj[3] = _mm256_setzero_si256(); + + int k = 0; + for (int i = 0; i < 32; i++) { + __m256i br_xi = _mm256_set1_epi16( (short)x[i] ); + for (int j = 0; j <= (i >> 4); j++) { + xixj[j] = _mm256_mullo_epi16( xi[j], br_xi ); + xixj[j] = reduce_16( xixj[j], mask_31, mask_2114 ); + } + + __m256i r = _mm256_packs_epi16(xixj[0], xixj[1]); + r = _mm256_permute4x64_epi64(r, 0xd8); // 3,1,2,0 + _mm256_storeu_si256( (__m256i *)( xij + k ), r ); + k += i + 1; + } + + for (int i = 32; i < N; i++) { + __m256i br_xi = _mm256_set1_epi16( (short)x[i] ); + for (int j = 0; j <= (i >> 4); j++) { + xixj[j] = _mm256_mullo_epi16( xi[j], br_xi ); + xixj[j] = reduce_16( xixj[j], mask_31, mask_2114 ); + } + + __m256i r0 = _mm256_packs_epi16(xixj[0], xixj[1]); + r0 = _mm256_permute4x64_epi64(r0, 0xd8); // 3,1,2,0 + _mm256_storeu_si256( (__m256i *)( xij + k ), r0 ); + __m256i r1 = _mm256_packs_epi16(xixj[2], xixj[3]); + r1 = _mm256_permute4x64_epi64(r1, 0xd8); // 3,1,2,0 + _mm256_storeu_si256( (__m256i *)( xij + 32 + k ), r1 ); + k += i + 1; + } +} + +/* Computes all terms (x_i * y_j) + (x_j * y_i), returns in reduced form */ +inline static +void generate_xiyj_p_xjyi_terms( unsigned char *xij, const gf31 *x, const gf31 *y ) { + __m256i mask_2114 = _mm256_set1_epi16( 2114 ); + __m256i mask_31 = _mm256_set1_epi16( 31 ); + __m256i xiyi[4]; + xiyi[0] = _mm256_xor_si256(_mm256_loadu_si256((__m256i const *) (x)), _mm256_slli_si256( _mm256_loadu_si256((__m256i const *) (y)), 1 )); + xiyi[1] = _mm256_xor_si256(_mm256_loadu_si256((__m256i const *) (x + 16)), _mm256_slli_si256( _mm256_loadu_si256((__m256i const *) (y + 16)), 1 )); + xiyi[2] = _mm256_xor_si256(_mm256_loadu_si256((__m256i const *) (x + 32)), _mm256_slli_si256( _mm256_loadu_si256((__m256i const *) (y + 32)), 1 )); + xiyi[3] = _mm256_setzero_si256(); + + __m256i xixj[4]; + xixj[0] = _mm256_setzero_si256(); + xixj[1] = _mm256_setzero_si256(); + xixj[2] = _mm256_setzero_si256(); + xixj[3] = _mm256_setzero_si256(); + + int k = 0; + for (int i = 0; i < 32; i++) { + __m256i br_yixi = _mm256_set1_epi16( (short)((x[i] << 8)^y[i]) ); + for (int j = 0; j <= (i >> 4); j++) { + xixj[j] = _mm256_maddubs_epi16( xiyi[j], br_yixi ); + xixj[j] = reduce_16( xixj[j], mask_31, mask_2114 ); + } + + __m256i r = _mm256_packs_epi16(xixj[0], xixj[1]); + r = _mm256_permute4x64_epi64(r, 0xd8); // 3,1,2,0 + _mm256_storeu_si256( (__m256i *)( xij + k ), r ); + k += i + 1; + } + + for (int i = 32; i < N; i++) { + __m256i br_yixi = _mm256_set1_epi16( (short)((x[i] << 8)^y[i]) ); + for (int j = 0; j <= (i >> 4); j++) { + xixj[j] = _mm256_maddubs_epi16( xiyi[j], br_yixi ); + xixj[j] = reduce_16( xixj[j], mask_31, mask_2114 ); + } + + __m256i r0 = _mm256_packs_epi16(xixj[0], xixj[1]); + r0 = _mm256_permute4x64_epi64(r0, 0xd8); // 3,1,2,0 + _mm256_storeu_si256( (__m256i *)( xij + k ), r0 ); + __m256i r1 = _mm256_packs_epi16(xixj[2], xixj[3]); + r1 = _mm256_permute4x64_epi64(r1, 0xd8); // 3,1,2,0 + _mm256_storeu_si256( (__m256i *)( xij + 32 + k ), r1 ); + k += i + 1; + } +} + +#define EVAL_YMM_0(xx) {\ + __m128i tmp = _mm256_castsi256_si128(xx); \ + for (int macro_i = 0; macro_i < 8; macro_i++) { \ + __m256i _xi = _mm256_broadcastw_epi16(tmp); \ + tmp = _mm_srli_si128(tmp, 2); \ + for (int macro_j = 0; macro_j < (N/16); macro_j++) { \ + __m256i coeff = _mm256_loadu_si256((__m256i const *) F); \ + F += 32; \ + yy[macro_j] = _mm256_add_epi16(yy[macro_j], _mm256_maddubs_epi16(_xi, coeff)); \ + } \ + } \ + } + +#define EVAL_YMM_1(xx) {\ + __m128i tmp = _mm256_extracti128_si256(xx, 1); \ + for (int macro_i = 0; macro_i < 8; macro_i++) { \ + __m256i _xi = _mm256_broadcastw_epi16(tmp); \ + tmp = _mm_srli_si128(tmp, 2); \ + for (int macro_j = 0; macro_j < (N/16); macro_j++) { \ + __m256i coeff = _mm256_loadu_si256((__m256i const *) F); \ + F += 32; \ + yy[macro_j] = _mm256_add_epi16(yy[macro_j], _mm256_maddubs_epi16(_xi, coeff)); \ + } \ + } \ + } + +#define REDUCE_(yy) { \ + (yy)[0] = reduce_16((yy)[0], mask_reduce, mask_2114); \ + (yy)[1] = reduce_16((yy)[1], mask_reduce, mask_2114); \ + (yy)[2] = reduce_16((yy)[2], mask_reduce, mask_2114); \ + } + +/* Evaluates the MQ function on a vector of N gf31 elements x (expected to be + in reduced 5-bit representation). Expects the coefficients in F to be in + signed representation (i.e. [-15, 15], packed bytewise). + Outputs M gf31 elements in unique 16-bit representation as fx. */ +void PQCLEAN_MQDSS48_AVX2_MQ(gf31 *fx, const gf31 *x, const signed char *F) { + __m256i mask_2114 = _mm256_set1_epi32(2114 * 65536 + 2114); + __m256i mask_reduce = _mm256_srli_epi16(_mm256_cmpeq_epi16(mask_2114, mask_2114), 11); + + __m256i xi[4]; + xi[0] = _mm256_loadu_si256((__m256i const *) (x)); + xi[1] = _mm256_loadu_si256((__m256i const *) (x + 16)); + xi[2] = _mm256_loadu_si256((__m256i const *) (x + 32)); + xi[3] = _mm256_setzero_si256(); + + __m256i _zero = _mm256_setzero_si256(); + xi[0] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_zero, xi[0])), xi[0]); + xi[1] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_zero, xi[1])), xi[1]); + xi[2] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_zero, xi[2])), xi[2]); + + __m256i x1 = _mm256_packs_epi16(xi[0], xi[1]); + x1 = _mm256_permute4x64_epi64(x1, 0xd8); // 3,1,2,0 + __m256i x2 = _mm256_packs_epi16(xi[2], xi[3]); + x2 = _mm256_permute4x64_epi64(x2, 0xd8); // 3,1,2,0 + + __m256i yy[M / 16]; + yy[0] = _zero; + yy[1] = _zero; + yy[2] = _zero; + + EVAL_YMM_0(x1) + EVAL_YMM_1(x1) + EVAL_YMM_0(x2) + REDUCE_(yy) + + __m256i xixj[38]; + generate_quadratic_terms( (unsigned char *) xixj, x ); + for (int i = 0 ; i < 36 ; i += 2) { + EVAL_YMM_0(xixj[i]) + EVAL_YMM_1(xixj[i]) + EVAL_YMM_0(xixj[i + 1]) + EVAL_YMM_1(xixj[i + 1]) + REDUCE_(yy) + } + EVAL_YMM_0(xixj[36]) { + __m128i tmp = _mm256_extracti128_si256(xixj[36], 1); + for (int i = 0; i < 4; i++) { + __m256i _xi = _mm256_broadcastw_epi16(tmp); + tmp = _mm_srli_si128(tmp, 2); + for (int j = 0; j < (N / 16); j++) { + __m256i coeff = _mm256_loadu_si256((__m256i const *) F); + F += 32; + yy[j] = _mm256_add_epi16(yy[j], _mm256_maddubs_epi16(_xi, coeff)); + } + } + } + REDUCE_(yy) + + yy[0] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[0])), yy[0]); + yy[1] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[1])), yy[1]); + yy[2] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[2])), yy[2]); + + for (int i = 0; i < (N / 16); ++i) { + _mm256_storeu_si256((__m256i *)(fx + i * 16), yy[i]); + } +} + +/* Evaluates the bilinear polar form of the MQ function (i.e. G) on a vector of + N gf31 elements x (expected to be in reduced 5-bit representation). Expects + the coefficients in F to be in signed representation (i.e. [-15, 15], packed + bytewise). Outputs M gf31 elements in unique 16-bit representation as fx. */ +void PQCLEAN_MQDSS48_AVX2_G(gf31 *fx, const gf31 *x, const gf31 *y, const signed char *F) { + __m256i mask_2114 = _mm256_set1_epi32(2114 * 65536 + 2114); + __m256i mask_reduce = _mm256_srli_epi16(_mm256_cmpeq_epi16(mask_2114, mask_2114), 11); + __m256i _zero = _mm256_setzero_si256(); + + __m256i yy[(M / 16)]; + yy[0] = _zero; + yy[1] = _zero; + yy[2] = _zero; + + F += N * M; + + __m256i xixj[38]; + generate_xiyj_p_xjyi_terms( (unsigned char *) xixj, x, y ); + for (int i = 0 ; i < 36 ; i += 2) { + EVAL_YMM_0(xixj[i]) + EVAL_YMM_1(xixj[i]) + EVAL_YMM_0(xixj[i + 1]) + EVAL_YMM_1(xixj[i + 1]) + REDUCE_(yy) + } + EVAL_YMM_0(xixj[36]) { + __m128i tmp = _mm256_extracti128_si256(xixj[36], 1); + for (int i = 0; i < 4; i++) { + __m256i _xi = _mm256_broadcastw_epi16(tmp); + tmp = _mm_srli_si128(tmp, 2); + for (int j = 0; j < (N / 16); j++) { + __m256i coeff = _mm256_loadu_si256((__m256i const *) F); + F += 32; + yy[j] = _mm256_add_epi16(yy[j], _mm256_maddubs_epi16(_xi, coeff)); + } + } + } + REDUCE_(yy) + + yy[0] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[0])), yy[0]); + yy[1] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[1])), yy[1]); + yy[2] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[2])), yy[2]); + + for (int i = 0; i < (N / 16); ++i) { + _mm256_storeu_si256((__m256i *)(fx + i * 16), yy[i]); + } +} diff --git a/crypto_sign/mqdss-48/avx2/mq.h b/crypto_sign/mqdss-48/avx2/mq.h new file mode 100644 index 00000000..4975740d --- /dev/null +++ b/crypto_sign/mqdss-48/avx2/mq.h @@ -0,0 +1,18 @@ +#ifndef MQDSS_MQ_H +#define MQDSS_MQ_H + +#include "gf31.h" + +/* Evaluates the MQ function on a vector of N gf31 elements x (expected to be + in reduced 5-bit representation). Expects the coefficients in F to be in + signed representation (i.e. [-15, 15], packed bytewise). + Outputs M gf31 elements in unique 16-bit representation as fx. */ +void PQCLEAN_MQDSS48_AVX2_MQ(gf31 *fx, const gf31 *x, const signed char *F); + +/* Evaluates the bilinear polar form of the MQ function (i.e. G) on a vector of + N gf31 elements x (expected to be in reduced 5-bit representation). Expects + the coefficients in F to be in signed representation (i.e. [-15, 15], packed + bytewise). Outputs M gf31 elements in unique 16-bit representation as fx. */ +void PQCLEAN_MQDSS48_AVX2_G(gf31 *fx, const gf31 *x, const gf31 *y, const signed char *F); + +#endif diff --git a/crypto_sign/mqdss-48/avx2/params.h b/crypto_sign/mqdss-48/avx2/params.h new file mode 100644 index 00000000..94e47077 --- /dev/null +++ b/crypto_sign/mqdss-48/avx2/params.h @@ -0,0 +1,25 @@ +#ifndef MQDSS_PARAMS_H +#define MQDSS_PARAMS_H + +#define N 48 +#define M N +#define F_LEN (M * (((N * (N + 1)) >> 1) + N)) /* Number of elements in F */ + +#define ROUNDS 184 + +/* Number of bytes that N, M and F_LEN elements require when packed into a byte + array, 5-bit elements packed continuously. */ +/* Assumes N and M to be multiples of 8 */ +#define NPACKED_BYTES ((N * 5) >> 3) +#define MPACKED_BYTES ((M * 5) >> 3) +#define FPACKED_BYTES ((F_LEN * 5) >> 3) + +#define HASH_BYTES 32 +#define SEED_BYTES 16 +#define PK_BYTES (SEED_BYTES + MPACKED_BYTES) +#define SK_BYTES SEED_BYTES + +// R, sigma_0, ROUNDS * (t1, r{0,1}, e1, c, rho) +#define SIG_LEN (2 * HASH_BYTES + ROUNDS * (2*NPACKED_BYTES + MPACKED_BYTES + HASH_BYTES + HASH_BYTES)) + +#endif diff --git a/crypto_sign/mqdss-48/avx2/sign.c b/crypto_sign/mqdss-48/avx2/sign.c new file mode 100644 index 00000000..f454a254 --- /dev/null +++ b/crypto_sign/mqdss-48/avx2/sign.c @@ -0,0 +1,389 @@ +#include +#include +#include + +#include "api.h" +#include "fips202.h" +#include "gf31.h" +#include "mq.h" +#include "params.h" +#include "randombytes.h" + +/* Takes an array of len bytes and computes a hash digest. + This is used as a hash function in the Fiat-Shamir transform. */ +static void H(unsigned char *out, const unsigned char *in, const size_t len) { + shake256(out, HASH_BYTES, in, len); +} + +/* Takes two arrays of N packed elements and an array of M packed elements, + and computes a HASH_BYTES commitment. */ +static void com_0(unsigned char *c, + const unsigned char *rho, + const unsigned char *inn, const unsigned char *inn2, + const unsigned char *inm) { + unsigned char buffer[HASH_BYTES + 2 * NPACKED_BYTES + MPACKED_BYTES]; + memcpy(buffer, rho, HASH_BYTES); + memcpy(buffer + HASH_BYTES, inn, NPACKED_BYTES); + memcpy(buffer + HASH_BYTES + NPACKED_BYTES, inn2, NPACKED_BYTES); + memcpy(buffer + HASH_BYTES + 2 * NPACKED_BYTES, inm, MPACKED_BYTES); + shake256(c, HASH_BYTES, buffer, HASH_BYTES + 2 * NPACKED_BYTES + MPACKED_BYTES); +} + +/* Takes an array of N packed elements and an array of M packed elements, + and computes a HASH_BYTES commitment. */ +static void com_1(unsigned char *c, + const unsigned char *rho, + const unsigned char *inn, const unsigned char *inm) { + unsigned char buffer[HASH_BYTES + NPACKED_BYTES + MPACKED_BYTES]; + memcpy(buffer, rho, HASH_BYTES); + memcpy(buffer + HASH_BYTES, inn, NPACKED_BYTES); + memcpy(buffer + HASH_BYTES + NPACKED_BYTES, inm, MPACKED_BYTES); + shake256(c, HASH_BYTES, buffer, HASH_BYTES + NPACKED_BYTES + MPACKED_BYTES); +} + +/* + * Generates an MQDSS key pair. + */ +int PQCLEAN_MQDSS48_AVX2_crypto_sign_keypair(uint8_t *pk, uint8_t *sk) { + signed char F[F_LEN]; + unsigned char skbuf[SEED_BYTES * 2]; + gf31 sk_gf31[N]; + gf31 pk_gf31[M]; + + // Expand sk to obtain a seed for F and the secret input s. + // We also expand to obtain a value for sampling r0, t0 and e0 during + // signature generation, but that is not relevant here. + randombytes(sk, SEED_BYTES); + shake256(skbuf, SEED_BYTES * 2, sk, SEED_BYTES); + + memcpy(pk, skbuf, SEED_BYTES); + PQCLEAN_MQDSS48_AVX2_gf31_nrand_schar(F, F_LEN, pk, SEED_BYTES); + PQCLEAN_MQDSS48_AVX2_gf31_nrand(sk_gf31, N, skbuf + SEED_BYTES, SEED_BYTES); + PQCLEAN_MQDSS48_AVX2_MQ(pk_gf31, sk_gf31, F); + PQCLEAN_MQDSS48_AVX2_vgf31_unique(pk_gf31, pk_gf31); + PQCLEAN_MQDSS48_AVX2_gf31_npack(pk + SEED_BYTES, pk_gf31, M); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_MQDSS48_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + + signed char F[F_LEN]; + unsigned char skbuf[SEED_BYTES * 4]; + gf31 pk_gf31[M]; + unsigned char pk[SEED_BYTES + MPACKED_BYTES]; + // Concatenated for convenient hashing. + unsigned char D_sigma0_h0_sigma1[HASH_BYTES * 3 + ROUNDS * (NPACKED_BYTES + MPACKED_BYTES)]; + unsigned char *D = D_sigma0_h0_sigma1; + unsigned char *sigma0 = D_sigma0_h0_sigma1 + HASH_BYTES; + unsigned char *h0 = D_sigma0_h0_sigma1 + 2 * HASH_BYTES; + unsigned char *t1packed = D_sigma0_h0_sigma1 + 3 * HASH_BYTES; + unsigned char *e1packed = D_sigma0_h0_sigma1 + 3 * HASH_BYTES + ROUNDS * NPACKED_BYTES; + shake256ctx shakestate; + unsigned char shakeblock[SHAKE256_RATE]; + unsigned char h1[((ROUNDS + 7) & ~7) >> 3]; + unsigned char rnd_seed[HASH_BYTES + SEED_BYTES]; + unsigned char rho[2 * ROUNDS * HASH_BYTES]; + unsigned char *rho0 = rho; + unsigned char *rho1 = rho + ROUNDS * HASH_BYTES; + gf31 sk_gf31[N]; + gf31 rnd[(2 * N + M) * ROUNDS]; // Concatenated for easy RNG. + gf31 *r0 = rnd; + gf31 *t0 = rnd + N * ROUNDS; + gf31 *e0 = rnd + 2 * N * ROUNDS; + gf31 r1[N * ROUNDS]; + gf31 t1[N * ROUNDS]; + gf31 e1[M * ROUNDS]; + gf31 gx[M * ROUNDS]; + unsigned char packbuf0[NPACKED_BYTES]; + unsigned char packbuf1[NPACKED_BYTES]; + unsigned char packbuf2[MPACKED_BYTES]; + unsigned char c[HASH_BYTES * ROUNDS * 2]; + gf31 alpha; + int alpha_count = 0; + int b; + int i, j; + shake256incctx state; + + shake256(skbuf, SEED_BYTES * 4, sk, SEED_BYTES); + + PQCLEAN_MQDSS48_AVX2_gf31_nrand_schar(F, F_LEN, skbuf, SEED_BYTES); + + shake256_inc_init(&state); + shake256_inc_absorb(&state, sk, SEED_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(sig, HASH_BYTES, &state); // Compute R. + shake256_inc_ctx_release(&state); + + memcpy(pk, skbuf, SEED_BYTES); + PQCLEAN_MQDSS48_AVX2_gf31_nrand(sk_gf31, N, skbuf + SEED_BYTES, SEED_BYTES); + PQCLEAN_MQDSS48_AVX2_MQ(pk_gf31, sk_gf31, F); + PQCLEAN_MQDSS48_AVX2_vgf31_unique(pk_gf31, pk_gf31); + PQCLEAN_MQDSS48_AVX2_gf31_npack(pk + SEED_BYTES, pk_gf31, M); + + shake256_inc_init(&state); + shake256_inc_absorb(&state, pk, PK_BYTES); + shake256_inc_absorb(&state, sig, HASH_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(D, HASH_BYTES, &state); + shake256_inc_ctx_release(&state); + + sig += HASH_BYTES; // Compensate for prefixed R. + + memcpy(rnd_seed, skbuf + 2 * SEED_BYTES, SEED_BYTES); + memcpy(rnd_seed + SEED_BYTES, D, HASH_BYTES); + shake256(rho, 2 * ROUNDS * HASH_BYTES, rnd_seed, SEED_BYTES + HASH_BYTES); + + memcpy(rnd_seed, skbuf + 3 * SEED_BYTES, SEED_BYTES); + memcpy(rnd_seed + SEED_BYTES, D, HASH_BYTES); + PQCLEAN_MQDSS48_AVX2_gf31_nrand(rnd, (2 * N + M) * ROUNDS, rnd_seed, SEED_BYTES + HASH_BYTES); + + for (i = 0; i < ROUNDS; i++) { + for (j = 0; j < N; j++) { + r1[j + i * N] = (gf31)(31 + sk_gf31[j] - r0[j + i * N]); + } + PQCLEAN_MQDSS48_AVX2_G(gx + i * M, t0 + i * N, r1 + i * N, F); + } + for (i = 0; i < ROUNDS * M; i++) { + gx[i] = (gf31)(gx[i] + e0[i]); + } + for (i = 0; i < ROUNDS; i++) { + PQCLEAN_MQDSS48_AVX2_gf31_npack(packbuf0, r0 + i * N, N); + PQCLEAN_MQDSS48_AVX2_gf31_npack(packbuf1, t0 + i * N, N); + PQCLEAN_MQDSS48_AVX2_gf31_npack(packbuf2, e0 + i * M, M); + com_0(c + HASH_BYTES * (2 * i + 0), rho0 + i * HASH_BYTES, packbuf0, packbuf1, packbuf2); + PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(r1 + i * N, r1 + i * N); + PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(gx + i * M, gx + i * M); + PQCLEAN_MQDSS48_AVX2_gf31_npack(packbuf0, r1 + i * N, N); + PQCLEAN_MQDSS48_AVX2_gf31_npack(packbuf1, gx + i * M, M); + com_1(c + HASH_BYTES * (2 * i + 1), rho1 + i * HASH_BYTES, packbuf0, packbuf1); + } + + H(sigma0, c, HASH_BYTES * ROUNDS * 2); // Compute sigma_0. + shake256_absorb(&shakestate, D_sigma0_h0_sigma1, 2 * HASH_BYTES); + shake256_squeezeblocks(shakeblock, 1, &shakestate); + + memcpy(h0, shakeblock, HASH_BYTES); + + memcpy(sig, sigma0, HASH_BYTES); + sig += HASH_BYTES; // Compensate for sigma_0. + + for (i = 0; i < ROUNDS; i++) { + do { + alpha = shakeblock[alpha_count] & 31; + alpha_count++; + if (alpha_count == SHAKE256_RATE) { + alpha_count = 0; + shake256_squeezeblocks(shakeblock, 1, &shakestate); + } + } while (alpha == 31); + for (j = 0; j < N; j++) { + t1[i * N + j] = (gf31)(alpha * r0[j + i * N] - t0[j + i * N] + 31); + } + PQCLEAN_MQDSS48_AVX2_MQ(e1 + i * M, r0 + i * N, F); + for (j = 0; j < N; j++) { + e1[i * N + j] = (gf31)(alpha * e1[j + i * M] - e0[j + i * M] + 31); + } + PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(t1 + i * N, t1 + i * N); + PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(e1 + i * N, e1 + i * N); + } + shake256_ctx_release(&shakestate); + + PQCLEAN_MQDSS48_AVX2_gf31_npack(t1packed, t1, N * ROUNDS); + PQCLEAN_MQDSS48_AVX2_gf31_npack(e1packed, e1, M * ROUNDS); + + memcpy(sig, t1packed, NPACKED_BYTES * ROUNDS); + sig += NPACKED_BYTES * ROUNDS; + memcpy(sig, e1packed, MPACKED_BYTES * ROUNDS); + sig += MPACKED_BYTES * ROUNDS; + + shake256(h1, ((ROUNDS + 7) & ~7) >> 3, D_sigma0_h0_sigma1, 3 * HASH_BYTES + ROUNDS * (NPACKED_BYTES + MPACKED_BYTES)); + + for (i = 0; i < ROUNDS; i++) { + b = (h1[(i >> 3)] >> (i & 7)) & 1; + if (b == 0) { + PQCLEAN_MQDSS48_AVX2_gf31_npack(sig, r0 + i * N, N); + } else if (b == 1) { + PQCLEAN_MQDSS48_AVX2_gf31_npack(sig, r1 + i * N, N); + } + memcpy(sig + NPACKED_BYTES, c + HASH_BYTES * (2 * i + (1 - b)), HASH_BYTES); + memcpy(sig + NPACKED_BYTES + HASH_BYTES, rho + (i + b * ROUNDS) * HASH_BYTES, HASH_BYTES); + sig += NPACKED_BYTES + 2 * HASH_BYTES; + } + + *siglen = SIG_LEN; + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_MQDSS48_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + + gf31 r[N]; + gf31 t[N]; + gf31 e[M]; + signed char F[F_LEN]; + gf31 pk_gf31[M]; + // Concatenated for convenient hashing. + unsigned char D_sigma0_h0_sigma1[HASH_BYTES * 3 + ROUNDS * (NPACKED_BYTES + MPACKED_BYTES)]; + unsigned char *D = D_sigma0_h0_sigma1; + unsigned char *sigma0 = D_sigma0_h0_sigma1 + HASH_BYTES; + unsigned char *h0 = D_sigma0_h0_sigma1 + 2 * HASH_BYTES; + unsigned char *t1packed = D_sigma0_h0_sigma1 + 3 * HASH_BYTES; + unsigned char *e1packed = D_sigma0_h0_sigma1 + 3 * HASH_BYTES + ROUNDS * NPACKED_BYTES; + unsigned char h1[((ROUNDS + 7) & ~7) >> 3]; + unsigned char c[HASH_BYTES * ROUNDS * 2]; + memset(c, 0, HASH_BYTES * 2); + gf31 x[N]; + gf31 y[M]; + gf31 z[M]; + unsigned char packbuf0[NPACKED_BYTES]; + unsigned char packbuf1[MPACKED_BYTES]; + shake256ctx shakestate; + unsigned char shakeblock[SHAKE256_RATE]; + int i, j; + gf31 alpha; + int alpha_count = 0; + int b; + shake256incctx state; + + if (siglen != SIG_LEN) { + return -1; + } + + shake256_inc_init(&state); + shake256_inc_absorb(&state, pk, PK_BYTES); + shake256_inc_absorb(&state, sig, HASH_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(D, HASH_BYTES, &state); + shake256_inc_ctx_release(&state); + + sig += HASH_BYTES; + + PQCLEAN_MQDSS48_AVX2_gf31_nrand_schar(F, F_LEN, pk, SEED_BYTES); + pk += SEED_BYTES; + PQCLEAN_MQDSS48_AVX2_gf31_nunpack(pk_gf31, pk, M); + + memcpy(sigma0, sig, HASH_BYTES); + + shake256_absorb(&shakestate, D_sigma0_h0_sigma1, 2 * HASH_BYTES); + shake256_squeezeblocks(shakeblock, 1, &shakestate); + + memcpy(h0, shakeblock, HASH_BYTES); + + sig += HASH_BYTES; + + memcpy(t1packed, sig, ROUNDS * NPACKED_BYTES); + sig += ROUNDS * NPACKED_BYTES; + memcpy(e1packed, sig, ROUNDS * MPACKED_BYTES); + sig += ROUNDS * MPACKED_BYTES; + + shake256(h1, ((ROUNDS + 7) & ~7) >> 3, D_sigma0_h0_sigma1, 3 * HASH_BYTES + ROUNDS * (NPACKED_BYTES + MPACKED_BYTES)); + + for (i = 0; i < ROUNDS; i++) { + do { + alpha = shakeblock[alpha_count] & 31; + alpha_count++; + if (alpha_count == SHAKE256_RATE) { + alpha_count = 0; + shake256_squeezeblocks(shakeblock, 1, &shakestate); + } + } while (alpha == 31); + b = (h1[(i >> 3)] >> (i & 7)) & 1; + + PQCLEAN_MQDSS48_AVX2_gf31_nunpack(r, sig, N); + PQCLEAN_MQDSS48_AVX2_gf31_nunpack(t, t1packed + NPACKED_BYTES * i, N); + PQCLEAN_MQDSS48_AVX2_gf31_nunpack(e, e1packed + MPACKED_BYTES * i, M); + + if (b == 0) { + PQCLEAN_MQDSS48_AVX2_MQ(y, r, F); + for (j = 0; j < N; j++) { + x[j] = (gf31)(alpha * r[j] - t[j] + 31); + } + for (j = 0; j < N; j++) { + y[j] = (gf31)(alpha * y[j] - e[j] + 31); + } + PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(x, x); + PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(y, y); + PQCLEAN_MQDSS48_AVX2_gf31_npack(packbuf0, x, N); + PQCLEAN_MQDSS48_AVX2_gf31_npack(packbuf1, y, M); + com_0(c + HASH_BYTES * (2 * i + 0), sig + HASH_BYTES + NPACKED_BYTES, sig, packbuf0, packbuf1); + } else { + PQCLEAN_MQDSS48_AVX2_MQ(y, r, F); + PQCLEAN_MQDSS48_AVX2_G(z, t, r, F); + for (j = 0; j < N; j++) { + y[j] = (gf31)(alpha * (31 + pk_gf31[j] - y[j]) - z[j] - e[j] + 62); + } + PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(y, y); + PQCLEAN_MQDSS48_AVX2_gf31_npack(packbuf0, y, M); + com_1(c + HASH_BYTES * (2 * i + 1), sig + HASH_BYTES + NPACKED_BYTES, sig, packbuf0); + } + memcpy(c + HASH_BYTES * (2 * i + (1 - b)), sig + NPACKED_BYTES, HASH_BYTES); + sig += NPACKED_BYTES + 2 * HASH_BYTES; + } + shake256_ctx_release(&shakestate); + + H(c, c, HASH_BYTES * ROUNDS * 2); + if (memcmp(c, sigma0, HASH_BYTES) != 0) { + return -1; + } + + return 0; +} + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_MQDSS48_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_MQDSS48_AVX2_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + SIG_LEN, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_MQDSS48_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but MQDSS signatures are always exactly SIG_LEN. */ + if (smlen < SIG_LEN) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - SIG_LEN; + + if (PQCLEAN_MQDSS48_AVX2_crypto_sign_verify( + sm, SIG_LEN, sm + SIG_LEN, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + SIG_LEN, *mlen); + + return 0; +} diff --git a/crypto_sign/mqdss-48/clean/sign.c b/crypto_sign/mqdss-48/clean/sign.c index fe4f2b75..57b11d66 100644 --- a/crypto_sign/mqdss-48/clean/sign.c +++ b/crypto_sign/mqdss-48/clean/sign.c @@ -1,4 +1,3 @@ -#include #include #include #include diff --git a/crypto_sign/mqdss-64/META.yml b/crypto_sign/mqdss-64/META.yml index 28feaa8c..be2a6a1f 100644 --- a/crypto_sign/mqdss-64/META.yml +++ b/crypto_sign/mqdss-64/META.yml @@ -16,3 +16,12 @@ auxiliary-submitters: implementations: - name: clean version: https://github.com/joostrijneveld/MQDSS/commit/00608d7610262ff07b1834885d32bc3fd27ef5e1 + - name: avx2 + version: https://github.com/joostrijneveld/MQDSS/commit/00608d7610262ff07b1834885d32bc3fd27ef5e1 + supported_platforms: + - architecture: x86_64 + required_flags: + - avx2 + - architecture: x86 + required_flags: + - avx2 diff --git a/crypto_sign/mqdss-64/avx2/LICENSE b/crypto_sign/mqdss-64/avx2/LICENSE new file mode 100644 index 00000000..670154e3 --- /dev/null +++ b/crypto_sign/mqdss-64/avx2/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/crypto_sign/mqdss-64/avx2/Makefile b/crypto_sign/mqdss-64/avx2/Makefile new file mode 100644 index 00000000..940ebbd4 --- /dev/null +++ b/crypto_sign/mqdss-64/avx2/Makefile @@ -0,0 +1,22 @@ +# This Makefile can be used with GNU Make or BSD Make + +LIB=libmqdss-64_avx2.a + +HEADERS = params.h gf31.h mq.h api.h +OBJECTS = gf31.o mq.o sign.o + +CFLAGS=-O3 -Wall -Wconversion -Wextra -Wpedantic -Wvla -Werror \ + -Wmissing-prototypes -Wredundant-decls -std=c99 -mavx2 \ + -I../../../common $(EXTRAFLAGS) + +all: $(LIB) + +%.o: %.c $(HEADERS) + $(CC) $(CFLAGS) -c -o $@ $< + +$(LIB): $(OBJECTS) + $(AR) -r $@ $(OBJECTS) + +clean: + $(RM) $(OBJECTS) + $(RM) $(LIB) diff --git a/crypto_sign/mqdss-64/avx2/Makefile.Microsoft_nmake b/crypto_sign/mqdss-64/avx2/Makefile.Microsoft_nmake new file mode 100644 index 00000000..9a3e768c --- /dev/null +++ b/crypto_sign/mqdss-64/avx2/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libmqdss-64_clean.lib +OBJECTS=gf31.obj mq.obj sign.obj + +CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX /arch:AVX2 + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_sign/mqdss-64/avx2/api.h b/crypto_sign/mqdss-64/avx2/api.h new file mode 100644 index 00000000..91326baa --- /dev/null +++ b/crypto_sign/mqdss-64/avx2/api.h @@ -0,0 +1,47 @@ +#ifndef PQCLEAN_MQDSS64_AVX2_API_H +#define PQCLEAN_MQDSS64_AVX2_API_H + +#include +#include + +#define PQCLEAN_MQDSS64_AVX2_CRYPTO_ALGNAME "MQDSS-64" + +#define PQCLEAN_MQDSS64_AVX2_CRYPTO_SECRETKEYBYTES 24 +#define PQCLEAN_MQDSS64_AVX2_CRYPTO_PUBLICKEYBYTES 64 +#define PQCLEAN_MQDSS64_AVX2_CRYPTO_BYTES 59928 + +/* + * Generates an MQDSS key pair. + */ +int PQCLEAN_MQDSS64_AVX2_crypto_sign_keypair( + uint8_t *pk, uint8_t *sk); + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_MQDSS64_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_MQDSS64_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk); + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_MQDSS64_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk); + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_MQDSS64_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk); + +#endif diff --git a/crypto_sign/mqdss-64/avx2/gf31.c b/crypto_sign/mqdss-64/avx2/gf31.c new file mode 100644 index 00000000..5f65eb77 --- /dev/null +++ b/crypto_sign/mqdss-64/avx2/gf31.c @@ -0,0 +1,128 @@ +#include "params.h" +#include "fips202.h" +#include "gf31.h" +#include +#include +#include +#include + +/* Given a vector of N elements in the range [0, 31], this reduces the elements + to the range [0, 30] by mapping 31 to 0 (i.e reduction mod 31) */ +void PQCLEAN_MQDSS64_AVX2_vgf31_unique(gf31 *out, gf31 *in) { + __m256i x; + __m256i _w31 = _mm256_set1_epi16(31); + int i; + + for (i = 0; i < (N >> 4); ++i) { + x = _mm256_loadu_si256((__m256i const *) (in + 16 * i)); + x = _mm256_xor_si256(x, _mm256_and_si256(_w31, _mm256_cmpeq_epi16(x, _w31))); + _mm256_storeu_si256((__m256i *)(out + i * 16), x); + } +} + +/* This function acts on vectors with 64 gf31 elements. +It performs one reduction step and guarantees output in [0, 30], +but requires input to be in [0, 32768). */ +void PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(gf31 *out, gf31 *in) { + __m256i x; + __m256i _w2114 = _mm256_set1_epi32(2114 * 65536 + 2114); + __m256i _w31 = _mm256_set1_epi16(31); + int i; + + for (i = 0; i < (N >> 4); ++i) { + x = _mm256_loadu_si256((__m256i const *) (in + 16 * i)); + x = _mm256_sub_epi16(x, _mm256_mullo_epi16(_w31, _mm256_mulhi_epi16(x, _w2114))); + x = _mm256_xor_si256(x, _mm256_and_si256(_w31, _mm256_cmpeq_epi16(x, _w31))); + _mm256_storeu_si256((__m256i *)(out + i * 16), x); + } +} + +/* Given a seed, samples len gf31 elements (in the range [0, 30]), and places + them in a vector of 16-bit elements */ +void PQCLEAN_MQDSS64_AVX2_gf31_nrand(gf31 *out, size_t len, const uint8_t *seed, size_t seedlen) { + size_t i = 0, j; + shake256ctx shakestate; + uint8_t shakeblock[SHAKE256_RATE]; + + shake256_absorb(&shakestate, seed, seedlen); + + while (i < len) { + shake256_squeezeblocks(shakeblock, 1, &shakestate); + for (j = 0; j < SHAKE256_RATE && i < len; j++) { + if ((shakeblock[j] & 31) != 31) { + out[i] = (shakeblock[j] & 31); + i++; + } + } + } + shake256_ctx_release(&shakestate); +} + +/* Given a seed, samples len gf31 elements, transposed into unsigned range, + i.e. in the range [-15, 15], and places them in an array of 8-bit integers. + This is used for the expansion of F, which wants packed elements. */ +void PQCLEAN_MQDSS64_AVX2_gf31_nrand_schar(signed char *out, size_t len, const uint8_t *seed, size_t seedlen) { + size_t i = 0, j; + shake256ctx shakestate; + uint8_t shakeblock[SHAKE256_RATE]; + + shake256_absorb(&shakestate, seed, seedlen); + + while (i < len) { + shake256_squeezeblocks(shakeblock, 1, &shakestate); + for (j = 0; j < SHAKE256_RATE && i < len; j++) { + if ((shakeblock[j] & 31) != 31) { + out[i] = (signed char)((shakeblock[j] & 31) - 15); + i++; + } + } + } + shake256_ctx_release(&shakestate); + +} + +/* Unpacks an array of packed GF31 elements to one element per gf31. + Assumes that there is sufficient empty space available at the end of the + array to unpack. Can perform in-place. */ +void PQCLEAN_MQDSS64_AVX2_gf31_nunpack(gf31 *out, const uint8_t *in, size_t n) { + size_t i; + size_t j = ((n * 5) >> 3) - 1; + unsigned int d = 0; + + for (i = n; i > 0; i--) { + out[i - 1] = (gf31)((in[j] >> d) & 31); + d += 5; + if (d > 8) { + d -= 8; + j--; + out[i - 1] = (gf31)(out[i - 1] ^ ((in[j] << (5 - d)) & 31)); + } + } +} + +/* Packs an array of GF31 elements from gf31's to concatenated 5-bit values. + Assumes that there is sufficient space available to unpack. + Can perform in-place. */ +void PQCLEAN_MQDSS64_AVX2_gf31_npack(uint8_t *out, const gf31 *in, size_t n) { + unsigned int i = 0; + unsigned int j; + int d = 3; + + for (j = 0; j < n; j++) { + assert(in[j] < 31); + } + + /* There will be ceil(5n / 8) output blocks */ + memset(out, 0, (size_t)((5 * n + 7) & ~7U) >> 3); + + for (j = 0; j < n; j++) { + if (d < 0) { + d += 8; + out[i] = (uint8_t)((out[i] & (255 << (d - 3))) | + ((in[j] >> (8 - d)) & ~(255 << (d - 3)))); + i++; + } + out[i] = (uint8_t)((out[i] & ~(31 << d)) | ((in[j] << d) & (31 << d))); + d -= 5; + } +} diff --git a/crypto_sign/mqdss-64/avx2/gf31.h b/crypto_sign/mqdss-64/avx2/gf31.h new file mode 100644 index 00000000..556df9be --- /dev/null +++ b/crypto_sign/mqdss-64/avx2/gf31.h @@ -0,0 +1,36 @@ +#ifndef MQDSS_GF31_H +#define MQDSS_GF31_H + +#include +#include + +typedef unsigned short gf31; + +/* Given a vector of elements in the range [0, 31], this reduces the elements + to the range [0, 30] by mapping 31 to 0 (i.e reduction mod 31) */ +void PQCLEAN_MQDSS64_AVX2_vgf31_unique(gf31 *out, gf31 *in); + +/* Given a vector of 16-bit integers (i.e. in [0, 65535], this reduces the + elements to the range [0, 30] by mapping 31 to 0 (i.e reduction mod 31) */ +void PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(gf31 *out, gf31 *in); + +/* Given a seed, samples len gf31 elements (in the range [0, 30]), and places + them in a vector of 16-bit elements */ +void PQCLEAN_MQDSS64_AVX2_gf31_nrand(gf31 *out, size_t len, const uint8_t *seed, size_t seedlen); + +/* Given a seed, samples len gf31 elements, transposed into unsigned range, + i.e. in the range [-15, 15], and places them in an array of 8-bit integers. + This is used for the expansion of F, which wants packed elements. */ +void PQCLEAN_MQDSS64_AVX2_gf31_nrand_schar(signed char *out, size_t len, const uint8_t *seed, size_t seedlen); + +/* Unpacks an array of packed GF31 elements to one element per gf31. + Assumes that there is sufficient empty space available at the end of the + array to unpack. Can perform in-place. */ +void PQCLEAN_MQDSS64_AVX2_gf31_nunpack(gf31 *out, const uint8_t *in, size_t n); + +/* Packs an array of GF31 elements from gf31's to concatenated 5-bit values. + Assumes that there is sufficient space available to unpack. + Can perform in-place. */ +void PQCLEAN_MQDSS64_AVX2_gf31_npack(uint8_t *out, const gf31 *in, size_t n); + +#endif diff --git a/crypto_sign/mqdss-64/avx2/mq.c b/crypto_sign/mqdss-64/avx2/mq.c new file mode 100644 index 00000000..b44e85fe --- /dev/null +++ b/crypto_sign/mqdss-64/avx2/mq.c @@ -0,0 +1,239 @@ +#include "mq.h" +#include "params.h" +#include +#include + +static inline __m256i reduce_16(__m256i r, __m256i _w31, __m256i _w2114) { + __m256i exp = _mm256_mulhi_epi16(r, _w2114); + return _mm256_sub_epi16(r, _mm256_mullo_epi16(_w31, exp)); +} + +/* Computes all products x_i * x_j, returns in reduced form */ +inline static +void generate_quadratic_terms( unsigned char *xij, const gf31 *x ) { + __m256i mask_2114 = _mm256_set1_epi16( 2114 ); + __m256i mask_31 = _mm256_set1_epi16( 31 ); + __m256i xi[4]; + xi[0] = _mm256_loadu_si256((__m256i const *) (x)); + xi[1] = _mm256_loadu_si256((__m256i const *) (x + 16)); + xi[2] = _mm256_loadu_si256((__m256i const *) (x + 32)); + xi[3] = _mm256_loadu_si256((__m256i const *) (x + 48)); + + __m256i xixj[4]; + xixj[0] = _mm256_setzero_si256(); + xixj[1] = _mm256_setzero_si256(); + xixj[2] = _mm256_setzero_si256(); + xixj[3] = _mm256_setzero_si256(); + + int k = 0; + for (int i = 0; i < 32; i++) { + __m256i br_xi = _mm256_set1_epi16( (short)x[i] ); + for (int j = 0; j <= (i >> 4); j++) { + xixj[j] = _mm256_mullo_epi16( xi[j], br_xi ); + xixj[j] = reduce_16( xixj[j], mask_31, mask_2114 ); + } + + __m256i r = _mm256_packs_epi16(xixj[0], xixj[1]); + r = _mm256_permute4x64_epi64(r, 0xd8); // 3,1,2,0 + _mm256_storeu_si256( (__m256i *)( xij + k ), r ); + k += i + 1; + } + + for (int i = 32; i < N; i++) { + __m256i br_xi = _mm256_set1_epi16( (short)x[i] ); + for (int j = 0; j <= (i >> 4); j++) { + xixj[j] = _mm256_mullo_epi16( xi[j], br_xi ); + xixj[j] = reduce_16( xixj[j], mask_31, mask_2114 ); + } + + __m256i r0 = _mm256_packs_epi16(xixj[0], xixj[1]); + r0 = _mm256_permute4x64_epi64(r0, 0xd8); // 3,1,2,0 + _mm256_storeu_si256( (__m256i *)( xij + k ), r0 ); + __m256i r1 = _mm256_packs_epi16(xixj[2], xixj[3]); + r1 = _mm256_permute4x64_epi64(r1, 0xd8); // 3,1,2,0 + _mm256_storeu_si256( (__m256i *)( xij + 32 + k ), r1 ); + k += i + 1; + } +} + +/* Computes all terms (x_i * y_j) + (x_j * y_i), returns in reduced form */ +inline static +void generate_xiyj_p_xjyi_terms( unsigned char *xij, const gf31 *x, const gf31 *y ) { + __m256i mask_2114 = _mm256_set1_epi16( 2114 ); + __m256i mask_31 = _mm256_set1_epi16( 31 ); + __m256i xiyi[4]; + xiyi[0] = _mm256_xor_si256(_mm256_loadu_si256((__m256i const *) (x)), _mm256_slli_si256( _mm256_loadu_si256((__m256i const *) (y)), 1 )); + xiyi[1] = _mm256_xor_si256(_mm256_loadu_si256((__m256i const *) (x + 16)), _mm256_slli_si256( _mm256_loadu_si256((__m256i const *) (y + 16)), 1 )); + xiyi[2] = _mm256_xor_si256(_mm256_loadu_si256((__m256i const *) (x + 32)), _mm256_slli_si256( _mm256_loadu_si256((__m256i const *) (y + 32)), 1 )); + xiyi[3] = _mm256_xor_si256(_mm256_loadu_si256((__m256i const *) (x + 48)), _mm256_slli_si256( _mm256_loadu_si256((__m256i const *) (y + 48)), 1 )); + + __m256i xixj[4]; + xixj[0] = _mm256_setzero_si256(); + xixj[1] = _mm256_setzero_si256(); + xixj[2] = _mm256_setzero_si256(); + xixj[3] = _mm256_setzero_si256(); + + int k = 0; + for (int i = 0; i < 32; i++) { + __m256i br_yixi = _mm256_set1_epi16( (short)((x[i] << 8)^y[i]) ); + for (int j = 0; j <= (i >> 4); j++) { + xixj[j] = _mm256_maddubs_epi16( xiyi[j], br_yixi ); + xixj[j] = reduce_16( xixj[j], mask_31, mask_2114 ); + } + + __m256i r = _mm256_packs_epi16(xixj[0], xixj[1]); + r = _mm256_permute4x64_epi64(r, 0xd8); // 3,1,2,0 + _mm256_storeu_si256( (__m256i *)( xij + k ), r ); + k += i + 1; + } + + for (int i = 32; i < N; i++) { + __m256i br_yixi = _mm256_set1_epi16( (short)((x[i] << 8)^y[i]) ); + for (int j = 0; j <= (i >> 4); j++) { + xixj[j] = _mm256_maddubs_epi16( xiyi[j], br_yixi ); + xixj[j] = reduce_16( xixj[j], mask_31, mask_2114 ); + } + + __m256i r0 = _mm256_packs_epi16(xixj[0], xixj[1]); + r0 = _mm256_permute4x64_epi64(r0, 0xd8); // 3,1,2,0 + _mm256_storeu_si256( (__m256i *)( xij + k ), r0 ); + __m256i r1 = _mm256_packs_epi16(xixj[2], xixj[3]); + r1 = _mm256_permute4x64_epi64(r1, 0xd8); // 3,1,2,0 + _mm256_storeu_si256( (__m256i *)( xij + 32 + k ), r1 ); + k += i + 1; + } +} + +#define EVAL_YMM_0(xx) {\ + __m128i tmp = _mm256_castsi256_si128(xx); \ + for (int macro_i = 0; macro_i < 8; macro_i++) { \ + __m256i _xi = _mm256_broadcastw_epi16(tmp); \ + tmp = _mm_srli_si128(tmp, 2); \ + for (int macro_j = 0; macro_j < (N/16); macro_j++) { \ + __m256i coeff = _mm256_loadu_si256((__m256i const *) F); \ + F += 32; \ + yy[macro_j] = _mm256_add_epi16(yy[macro_j], _mm256_maddubs_epi16(_xi, coeff)); \ + } \ + } \ + } + +#define EVAL_YMM_1(xx) {\ + __m128i tmp = _mm256_extracti128_si256(xx, 1); \ + for (int macro_i = 0; macro_i < 8; macro_i++) { \ + __m256i _xi = _mm256_broadcastw_epi16(tmp); \ + tmp = _mm_srli_si128(tmp, 2); \ + for (int macro_j = 0; macro_j < (N/16); macro_j++) { \ + __m256i coeff = _mm256_loadu_si256((__m256i const *) F); \ + F += 32; \ + yy[macro_j] = _mm256_add_epi16(yy[macro_j], _mm256_maddubs_epi16(_xi, coeff)); \ + } \ + } \ + } + +#define REDUCE_(yy) { \ + (yy)[0] = reduce_16((yy)[0], mask_reduce, mask_2114); \ + (yy)[1] = reduce_16((yy)[1], mask_reduce, mask_2114); \ + (yy)[2] = reduce_16((yy)[2], mask_reduce, mask_2114); \ + (yy)[3] = reduce_16((yy)[3], mask_reduce, mask_2114); \ + } + + +/* Evaluates the MQ function on a vector of N gf31 elements x (expected to be + in reduced 5-bit representation). Expects the coefficients in F to be in + signed representation (i.e. [-15, 15], packed bytewise). + Outputs M gf31 elements in unique 16-bit representation as fx. */ +void PQCLEAN_MQDSS64_AVX2_MQ(gf31 *fx, const gf31 *x, const signed char *F) { + __m256i mask_2114 = _mm256_set1_epi32(2114 * 65536 + 2114); + __m256i mask_reduce = _mm256_srli_epi16(_mm256_cmpeq_epi16(mask_2114, mask_2114), 11); + + __m256i xi[4]; + xi[0] = _mm256_loadu_si256((__m256i const *) (x)); + xi[1] = _mm256_loadu_si256((__m256i const *) (x + 16)); + xi[2] = _mm256_loadu_si256((__m256i const *) (x + 32)); + xi[3] = _mm256_loadu_si256((__m256i const *) (x + 48)); + + __m256i _zero = _mm256_setzero_si256(); + xi[0] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_zero, xi[0])), xi[0]); + xi[1] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_zero, xi[1])), xi[1]); + xi[2] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_zero, xi[2])), xi[2]); + xi[3] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_zero, xi[3])), xi[3]); + + __m256i x1 = _mm256_packs_epi16(xi[0], xi[1]); + x1 = _mm256_permute4x64_epi64(x1, 0xd8); // 3,1,2,0 + __m256i x2 = _mm256_packs_epi16(xi[2], xi[3]); + x2 = _mm256_permute4x64_epi64(x2, 0xd8); // 3,1,2,0 + + __m256i yy[M / 16]; + yy[0] = _zero; + yy[1] = _zero; + yy[2] = _zero; + yy[3] = _zero; + + EVAL_YMM_0(x1) + EVAL_YMM_1(x1) + EVAL_YMM_0(x2) + EVAL_YMM_1(x2) + REDUCE_(yy) + + __m256i xixj[65]; + generate_quadratic_terms( (unsigned char *) xixj, x ); + for (int i = 0 ; i < 64 ; i += 2) { + EVAL_YMM_0(xixj[i]) + EVAL_YMM_1(xixj[i]) + EVAL_YMM_0(xixj[i + 1]) + EVAL_YMM_1(xixj[i + 1]) + REDUCE_(yy) + } + EVAL_YMM_0(xixj[64]) + EVAL_YMM_1(xixj[64]) + REDUCE_(yy) + + yy[0] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[0])), yy[0]); + yy[1] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[1])), yy[1]); + yy[2] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[2])), yy[2]); + yy[3] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[3])), yy[3]); + + for (int i = 0; i < (N / 16); ++i) { + _mm256_storeu_si256((__m256i *)(fx + i * 16), yy[i]); + } +} + +/* Evaluates the bilinear polar form of the MQ function (i.e. G) on a vector of + N gf31 elements x (expected to be in reduced 5-bit representation). Expects + the coefficients in F to be in signed representation (i.e. [-15, 15], packed + bytewise). Outputs M gf31 elements in unique 16-bit representation as fx. */ +void PQCLEAN_MQDSS64_AVX2_G(gf31 *fx, const gf31 *x, const gf31 *y, const signed char *F) { + __m256i mask_2114 = _mm256_set1_epi32(2114 * 65536 + 2114); + __m256i mask_reduce = _mm256_srli_epi16(_mm256_cmpeq_epi16(mask_2114, mask_2114), 11); + __m256i _zero = _mm256_setzero_si256(); + + __m256i yy[(M / 16)]; + yy[0] = _zero; + yy[1] = _zero; + yy[2] = _zero; + yy[3] = _zero; + + F += N * M; + + __m256i xixj[65]; + generate_xiyj_p_xjyi_terms( (unsigned char *) xixj, x, y ); + for (int i = 0 ; i < 64 ; i += 2) { + EVAL_YMM_0(xixj[i]) + EVAL_YMM_1(xixj[i]) + EVAL_YMM_0(xixj[i + 1]) + EVAL_YMM_1(xixj[i + 1]) + REDUCE_(yy) + } + EVAL_YMM_0(xixj[64]) + EVAL_YMM_1(xixj[64]) + REDUCE_(yy) + + yy[0] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[0])), yy[0]); + yy[1] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[1])), yy[1]); + yy[2] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[2])), yy[2]); + yy[3] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[3])), yy[3]); + + for (int i = 0; i < (N / 16); ++i) { + _mm256_storeu_si256((__m256i *)(fx + i * 16), yy[i]); + } +} diff --git a/crypto_sign/mqdss-64/avx2/mq.h b/crypto_sign/mqdss-64/avx2/mq.h new file mode 100644 index 00000000..179555a3 --- /dev/null +++ b/crypto_sign/mqdss-64/avx2/mq.h @@ -0,0 +1,18 @@ +#ifndef MQDSS_MQ_H +#define MQDSS_MQ_H + +#include "gf31.h" + +/* Evaluates the MQ function on a vector of N gf31 elements x (expected to be + in reduced 5-bit representation). Expects the coefficients in F to be in + signed representation (i.e. [-15, 15], packed bytewise). + Outputs M gf31 elements in unique 16-bit representation as fx. */ +void PQCLEAN_MQDSS64_AVX2_MQ(gf31 *fx, const gf31 *x, const signed char *F); + +/* Evaluates the bilinear polar form of the MQ function (i.e. G) on a vector of + N gf31 elements x (expected to be in reduced 5-bit representation). Expects + the coefficients in F to be in signed representation (i.e. [-15, 15], packed + bytewise). Outputs M gf31 elements in unique 16-bit representation as fx. */ +void PQCLEAN_MQDSS64_AVX2_G(gf31 *fx, const gf31 *x, const gf31 *y, const signed char *F); + +#endif diff --git a/crypto_sign/mqdss-64/avx2/params.h b/crypto_sign/mqdss-64/avx2/params.h new file mode 100644 index 00000000..d0278f01 --- /dev/null +++ b/crypto_sign/mqdss-64/avx2/params.h @@ -0,0 +1,25 @@ +#ifndef MQDSS_PARAMS_H +#define MQDSS_PARAMS_H + +#define N 64 +#define M N +#define F_LEN (M * (((N * (N + 1)) >> 1) + N)) /* Number of elements in F */ + +#define ROUNDS 277 + +/* Number of bytes that N, M and F_LEN elements require when packed into a byte + array, 5-bit elements packed continuously. */ +/* Assumes N and M to be multiples of 8 */ +#define NPACKED_BYTES ((N * 5) >> 3) +#define MPACKED_BYTES ((M * 5) >> 3) +#define FPACKED_BYTES ((F_LEN * 5) >> 3) + +#define HASH_BYTES 48 +#define SEED_BYTES 24 +#define PK_BYTES (SEED_BYTES + MPACKED_BYTES) +#define SK_BYTES SEED_BYTES + +// R, sigma_0, ROUNDS * (t1, r{0,1}, e1, c, rho) +#define SIG_LEN (2 * HASH_BYTES + ROUNDS * (2*NPACKED_BYTES + MPACKED_BYTES + HASH_BYTES + HASH_BYTES)) + +#endif diff --git a/crypto_sign/mqdss-64/avx2/sign.c b/crypto_sign/mqdss-64/avx2/sign.c new file mode 100644 index 00000000..47940a5b --- /dev/null +++ b/crypto_sign/mqdss-64/avx2/sign.c @@ -0,0 +1,389 @@ +#include +#include +#include + +#include "api.h" +#include "fips202.h" +#include "gf31.h" +#include "mq.h" +#include "params.h" +#include "randombytes.h" + +/* Takes an array of len bytes and computes a hash digest. + This is used as a hash function in the Fiat-Shamir transform. */ +static void H(unsigned char *out, const unsigned char *in, const size_t len) { + shake256(out, HASH_BYTES, in, len); +} + +/* Takes two arrays of N packed elements and an array of M packed elements, + and computes a HASH_BYTES commitment. */ +static void com_0(unsigned char *c, + const unsigned char *rho, + const unsigned char *inn, const unsigned char *inn2, + const unsigned char *inm) { + unsigned char buffer[HASH_BYTES + 2 * NPACKED_BYTES + MPACKED_BYTES]; + memcpy(buffer, rho, HASH_BYTES); + memcpy(buffer + HASH_BYTES, inn, NPACKED_BYTES); + memcpy(buffer + HASH_BYTES + NPACKED_BYTES, inn2, NPACKED_BYTES); + memcpy(buffer + HASH_BYTES + 2 * NPACKED_BYTES, inm, MPACKED_BYTES); + shake256(c, HASH_BYTES, buffer, HASH_BYTES + 2 * NPACKED_BYTES + MPACKED_BYTES); +} + +/* Takes an array of N packed elements and an array of M packed elements, + and computes a HASH_BYTES commitment. */ +static void com_1(unsigned char *c, + const unsigned char *rho, + const unsigned char *inn, const unsigned char *inm) { + unsigned char buffer[HASH_BYTES + NPACKED_BYTES + MPACKED_BYTES]; + memcpy(buffer, rho, HASH_BYTES); + memcpy(buffer + HASH_BYTES, inn, NPACKED_BYTES); + memcpy(buffer + HASH_BYTES + NPACKED_BYTES, inm, MPACKED_BYTES); + shake256(c, HASH_BYTES, buffer, HASH_BYTES + NPACKED_BYTES + MPACKED_BYTES); +} + +/* + * Generates an MQDSS key pair. + */ +int PQCLEAN_MQDSS64_AVX2_crypto_sign_keypair(uint8_t *pk, uint8_t *sk) { + signed char F[F_LEN]; + unsigned char skbuf[SEED_BYTES * 2]; + gf31 sk_gf31[N]; + gf31 pk_gf31[M]; + + // Expand sk to obtain a seed for F and the secret input s. + // We also expand to obtain a value for sampling r0, t0 and e0 during + // signature generation, but that is not relevant here. + randombytes(sk, SEED_BYTES); + shake256(skbuf, SEED_BYTES * 2, sk, SEED_BYTES); + + memcpy(pk, skbuf, SEED_BYTES); + PQCLEAN_MQDSS64_AVX2_gf31_nrand_schar(F, F_LEN, pk, SEED_BYTES); + PQCLEAN_MQDSS64_AVX2_gf31_nrand(sk_gf31, N, skbuf + SEED_BYTES, SEED_BYTES); + PQCLEAN_MQDSS64_AVX2_MQ(pk_gf31, sk_gf31, F); + PQCLEAN_MQDSS64_AVX2_vgf31_unique(pk_gf31, pk_gf31); + PQCLEAN_MQDSS64_AVX2_gf31_npack(pk + SEED_BYTES, pk_gf31, M); + + return 0; +} + +/** + * Returns an array containing a detached signature. + */ +int PQCLEAN_MQDSS64_AVX2_crypto_sign_signature( + uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + + signed char F[F_LEN]; + unsigned char skbuf[SEED_BYTES * 4]; + gf31 pk_gf31[M]; + unsigned char pk[SEED_BYTES + MPACKED_BYTES]; + // Concatenated for convenient hashing. + unsigned char D_sigma0_h0_sigma1[HASH_BYTES * 3 + ROUNDS * (NPACKED_BYTES + MPACKED_BYTES)]; + unsigned char *D = D_sigma0_h0_sigma1; + unsigned char *sigma0 = D_sigma0_h0_sigma1 + HASH_BYTES; + unsigned char *h0 = D_sigma0_h0_sigma1 + 2 * HASH_BYTES; + unsigned char *t1packed = D_sigma0_h0_sigma1 + 3 * HASH_BYTES; + unsigned char *e1packed = D_sigma0_h0_sigma1 + 3 * HASH_BYTES + ROUNDS * NPACKED_BYTES; + shake256ctx shakestate; + unsigned char shakeblock[SHAKE256_RATE]; + unsigned char h1[((ROUNDS + 7) & ~7) >> 3]; + unsigned char rnd_seed[HASH_BYTES + SEED_BYTES]; + unsigned char rho[2 * ROUNDS * HASH_BYTES]; + unsigned char *rho0 = rho; + unsigned char *rho1 = rho + ROUNDS * HASH_BYTES; + gf31 sk_gf31[N]; + gf31 rnd[(2 * N + M) * ROUNDS]; // Concatenated for easy RNG. + gf31 *r0 = rnd; + gf31 *t0 = rnd + N * ROUNDS; + gf31 *e0 = rnd + 2 * N * ROUNDS; + gf31 r1[N * ROUNDS]; + gf31 t1[N * ROUNDS]; + gf31 e1[M * ROUNDS]; + gf31 gx[M * ROUNDS]; + unsigned char packbuf0[NPACKED_BYTES]; + unsigned char packbuf1[NPACKED_BYTES]; + unsigned char packbuf2[MPACKED_BYTES]; + unsigned char c[HASH_BYTES * ROUNDS * 2]; + gf31 alpha; + int alpha_count = 0; + int b; + int i, j; + shake256incctx state; + + shake256(skbuf, SEED_BYTES * 4, sk, SEED_BYTES); + + PQCLEAN_MQDSS64_AVX2_gf31_nrand_schar(F, F_LEN, skbuf, SEED_BYTES); + + shake256_inc_init(&state); + shake256_inc_absorb(&state, sk, SEED_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(sig, HASH_BYTES, &state); // Compute R. + shake256_inc_ctx_release(&state); + + memcpy(pk, skbuf, SEED_BYTES); + PQCLEAN_MQDSS64_AVX2_gf31_nrand(sk_gf31, N, skbuf + SEED_BYTES, SEED_BYTES); + PQCLEAN_MQDSS64_AVX2_MQ(pk_gf31, sk_gf31, F); + PQCLEAN_MQDSS64_AVX2_vgf31_unique(pk_gf31, pk_gf31); + PQCLEAN_MQDSS64_AVX2_gf31_npack(pk + SEED_BYTES, pk_gf31, M); + + shake256_inc_init(&state); + shake256_inc_absorb(&state, pk, PK_BYTES); + shake256_inc_absorb(&state, sig, HASH_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(D, HASH_BYTES, &state); + shake256_inc_ctx_release(&state); + + sig += HASH_BYTES; // Compensate for prefixed R. + + memcpy(rnd_seed, skbuf + 2 * SEED_BYTES, SEED_BYTES); + memcpy(rnd_seed + SEED_BYTES, D, HASH_BYTES); + shake256(rho, 2 * ROUNDS * HASH_BYTES, rnd_seed, SEED_BYTES + HASH_BYTES); + + memcpy(rnd_seed, skbuf + 3 * SEED_BYTES, SEED_BYTES); + memcpy(rnd_seed + SEED_BYTES, D, HASH_BYTES); + PQCLEAN_MQDSS64_AVX2_gf31_nrand(rnd, (2 * N + M) * ROUNDS, rnd_seed, SEED_BYTES + HASH_BYTES); + + for (i = 0; i < ROUNDS; i++) { + for (j = 0; j < N; j++) { + r1[j + i * N] = (gf31)(31 + sk_gf31[j] - r0[j + i * N]); + } + PQCLEAN_MQDSS64_AVX2_G(gx + i * M, t0 + i * N, r1 + i * N, F); + } + for (i = 0; i < ROUNDS * M; i++) { + gx[i] = (gf31)(gx[i] + e0[i]); + } + for (i = 0; i < ROUNDS; i++) { + PQCLEAN_MQDSS64_AVX2_gf31_npack(packbuf0, r0 + i * N, N); + PQCLEAN_MQDSS64_AVX2_gf31_npack(packbuf1, t0 + i * N, N); + PQCLEAN_MQDSS64_AVX2_gf31_npack(packbuf2, e0 + i * M, M); + com_0(c + HASH_BYTES * (2 * i + 0), rho0 + i * HASH_BYTES, packbuf0, packbuf1, packbuf2); + PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(r1 + i * N, r1 + i * N); + PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(gx + i * M, gx + i * M); + PQCLEAN_MQDSS64_AVX2_gf31_npack(packbuf0, r1 + i * N, N); + PQCLEAN_MQDSS64_AVX2_gf31_npack(packbuf1, gx + i * M, M); + com_1(c + HASH_BYTES * (2 * i + 1), rho1 + i * HASH_BYTES, packbuf0, packbuf1); + } + + H(sigma0, c, HASH_BYTES * ROUNDS * 2); // Compute sigma_0. + shake256_absorb(&shakestate, D_sigma0_h0_sigma1, 2 * HASH_BYTES); + shake256_squeezeblocks(shakeblock, 1, &shakestate); + + memcpy(h0, shakeblock, HASH_BYTES); + + memcpy(sig, sigma0, HASH_BYTES); + sig += HASH_BYTES; // Compensate for sigma_0. + + for (i = 0; i < ROUNDS; i++) { + do { + alpha = shakeblock[alpha_count] & 31; + alpha_count++; + if (alpha_count == SHAKE256_RATE) { + alpha_count = 0; + shake256_squeezeblocks(shakeblock, 1, &shakestate); + } + } while (alpha == 31); + for (j = 0; j < N; j++) { + t1[i * N + j] = (gf31)(alpha * r0[j + i * N] - t0[j + i * N] + 31); + } + PQCLEAN_MQDSS64_AVX2_MQ(e1 + i * M, r0 + i * N, F); + for (j = 0; j < N; j++) { + e1[i * N + j] = (gf31)(alpha * e1[j + i * M] - e0[j + i * M] + 31); + } + PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(t1 + i * N, t1 + i * N); + PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(e1 + i * N, e1 + i * N); + } + shake256_ctx_release(&shakestate); + + PQCLEAN_MQDSS64_AVX2_gf31_npack(t1packed, t1, N * ROUNDS); + PQCLEAN_MQDSS64_AVX2_gf31_npack(e1packed, e1, M * ROUNDS); + + memcpy(sig, t1packed, NPACKED_BYTES * ROUNDS); + sig += NPACKED_BYTES * ROUNDS; + memcpy(sig, e1packed, MPACKED_BYTES * ROUNDS); + sig += MPACKED_BYTES * ROUNDS; + + shake256(h1, ((ROUNDS + 7) & ~7) >> 3, D_sigma0_h0_sigma1, 3 * HASH_BYTES + ROUNDS * (NPACKED_BYTES + MPACKED_BYTES)); + + for (i = 0; i < ROUNDS; i++) { + b = (h1[(i >> 3)] >> (i & 7)) & 1; + if (b == 0) { + PQCLEAN_MQDSS64_AVX2_gf31_npack(sig, r0 + i * N, N); + } else if (b == 1) { + PQCLEAN_MQDSS64_AVX2_gf31_npack(sig, r1 + i * N, N); + } + memcpy(sig + NPACKED_BYTES, c + HASH_BYTES * (2 * i + (1 - b)), HASH_BYTES); + memcpy(sig + NPACKED_BYTES + HASH_BYTES, rho + (i + b * ROUNDS) * HASH_BYTES, HASH_BYTES); + sig += NPACKED_BYTES + 2 * HASH_BYTES; + } + + *siglen = SIG_LEN; + return 0; +} + +/** + * Verifies a detached signature and message under a given public key. + */ +int PQCLEAN_MQDSS64_AVX2_crypto_sign_verify( + const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, const uint8_t *pk) { + + gf31 r[N]; + gf31 t[N]; + gf31 e[M]; + signed char F[F_LEN]; + gf31 pk_gf31[M]; + // Concatenated for convenient hashing. + unsigned char D_sigma0_h0_sigma1[HASH_BYTES * 3 + ROUNDS * (NPACKED_BYTES + MPACKED_BYTES)]; + unsigned char *D = D_sigma0_h0_sigma1; + unsigned char *sigma0 = D_sigma0_h0_sigma1 + HASH_BYTES; + unsigned char *h0 = D_sigma0_h0_sigma1 + 2 * HASH_BYTES; + unsigned char *t1packed = D_sigma0_h0_sigma1 + 3 * HASH_BYTES; + unsigned char *e1packed = D_sigma0_h0_sigma1 + 3 * HASH_BYTES + ROUNDS * NPACKED_BYTES; + unsigned char h1[((ROUNDS + 7) & ~7) >> 3]; + unsigned char c[HASH_BYTES * ROUNDS * 2]; + memset(c, 0, HASH_BYTES * 2); + gf31 x[N]; + gf31 y[M]; + gf31 z[M]; + unsigned char packbuf0[NPACKED_BYTES]; + unsigned char packbuf1[MPACKED_BYTES]; + shake256ctx shakestate; + unsigned char shakeblock[SHAKE256_RATE]; + int i, j; + gf31 alpha; + int alpha_count = 0; + int b; + shake256incctx state; + + if (siglen != SIG_LEN) { + return -1; + } + + shake256_inc_init(&state); + shake256_inc_absorb(&state, pk, PK_BYTES); + shake256_inc_absorb(&state, sig, HASH_BYTES); + shake256_inc_absorb(&state, m, mlen); + shake256_inc_finalize(&state); + shake256_inc_squeeze(D, HASH_BYTES, &state); + shake256_inc_ctx_release(&state); + + sig += HASH_BYTES; + + PQCLEAN_MQDSS64_AVX2_gf31_nrand_schar(F, F_LEN, pk, SEED_BYTES); + pk += SEED_BYTES; + PQCLEAN_MQDSS64_AVX2_gf31_nunpack(pk_gf31, pk, M); + + memcpy(sigma0, sig, HASH_BYTES); + + shake256_absorb(&shakestate, D_sigma0_h0_sigma1, 2 * HASH_BYTES); + shake256_squeezeblocks(shakeblock, 1, &shakestate); + + memcpy(h0, shakeblock, HASH_BYTES); + + sig += HASH_BYTES; + + memcpy(t1packed, sig, ROUNDS * NPACKED_BYTES); + sig += ROUNDS * NPACKED_BYTES; + memcpy(e1packed, sig, ROUNDS * MPACKED_BYTES); + sig += ROUNDS * MPACKED_BYTES; + + shake256(h1, ((ROUNDS + 7) & ~7) >> 3, D_sigma0_h0_sigma1, 3 * HASH_BYTES + ROUNDS * (NPACKED_BYTES + MPACKED_BYTES)); + + for (i = 0; i < ROUNDS; i++) { + do { + alpha = shakeblock[alpha_count] & 31; + alpha_count++; + if (alpha_count == SHAKE256_RATE) { + alpha_count = 0; + shake256_squeezeblocks(shakeblock, 1, &shakestate); + } + } while (alpha == 31); + b = (h1[(i >> 3)] >> (i & 7)) & 1; + + PQCLEAN_MQDSS64_AVX2_gf31_nunpack(r, sig, N); + PQCLEAN_MQDSS64_AVX2_gf31_nunpack(t, t1packed + NPACKED_BYTES * i, N); + PQCLEAN_MQDSS64_AVX2_gf31_nunpack(e, e1packed + MPACKED_BYTES * i, M); + + if (b == 0) { + PQCLEAN_MQDSS64_AVX2_MQ(y, r, F); + for (j = 0; j < N; j++) { + x[j] = (gf31)(alpha * r[j] - t[j] + 31); + } + for (j = 0; j < N; j++) { + y[j] = (gf31)(alpha * y[j] - e[j] + 31); + } + PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(x, x); + PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(y, y); + PQCLEAN_MQDSS64_AVX2_gf31_npack(packbuf0, x, N); + PQCLEAN_MQDSS64_AVX2_gf31_npack(packbuf1, y, M); + com_0(c + HASH_BYTES * (2 * i + 0), sig + HASH_BYTES + NPACKED_BYTES, sig, packbuf0, packbuf1); + } else { + PQCLEAN_MQDSS64_AVX2_MQ(y, r, F); + PQCLEAN_MQDSS64_AVX2_G(z, t, r, F); + for (j = 0; j < N; j++) { + y[j] = (gf31)(alpha * (31 + pk_gf31[j] - y[j]) - z[j] - e[j] + 62); + } + PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(y, y); + PQCLEAN_MQDSS64_AVX2_gf31_npack(packbuf0, y, M); + com_1(c + HASH_BYTES * (2 * i + 1), sig + HASH_BYTES + NPACKED_BYTES, sig, packbuf0); + } + memcpy(c + HASH_BYTES * (2 * i + (1 - b)), sig + NPACKED_BYTES, HASH_BYTES); + sig += NPACKED_BYTES + 2 * HASH_BYTES; + } + shake256_ctx_release(&shakestate); + + H(c, c, HASH_BYTES * ROUNDS * 2); + if (memcmp(c, sigma0, HASH_BYTES) != 0) { + return -1; + } + + return 0; +} + +/** + * Returns an array containing the signature followed by the message. + */ +int PQCLEAN_MQDSS64_AVX2_crypto_sign( + uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, const uint8_t *sk) { + size_t siglen; + + PQCLEAN_MQDSS64_AVX2_crypto_sign_signature( + sm, &siglen, m, mlen, sk); + + memmove(sm + SIG_LEN, m, mlen); + *smlen = siglen + mlen; + + return 0; +} + +/** + * Verifies a given signature-message pair under a given public key. + */ +int PQCLEAN_MQDSS64_AVX2_crypto_sign_open( + uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, const uint8_t *pk) { + /* The API caller does not necessarily know what size a signature should be + but MQDSS signatures are always exactly SIG_LEN. */ + if (smlen < SIG_LEN) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + *mlen = smlen - SIG_LEN; + + if (PQCLEAN_MQDSS64_AVX2_crypto_sign_verify( + sm, SIG_LEN, sm + SIG_LEN, *mlen, pk)) { + memset(m, 0, smlen); + *mlen = 0; + return -1; + } + + /* If verification was successful, move the message to the right place. */ + memmove(m, sm + SIG_LEN, *mlen); + + return 0; +} diff --git a/crypto_sign/mqdss-64/clean/sign.c b/crypto_sign/mqdss-64/clean/sign.c index 702cbdf8..16bbe254 100644 --- a/crypto_sign/mqdss-64/clean/sign.c +++ b/crypto_sign/mqdss-64/clean/sign.c @@ -1,4 +1,3 @@ -#include #include #include #include diff --git a/test/duplicate_consistency/mqdss-48_clean.yml b/test/duplicate_consistency/mqdss-48_clean.yml new file mode 100644 index 00000000..a5d2e758 --- /dev/null +++ b/test/duplicate_consistency/mqdss-48_clean.yml @@ -0,0 +1,20 @@ +consistency_checks: +- source: + scheme: mqdss-48 + implementation: avx2 + files: + - api.h + - mq.h + - LICENSE + - mq.h + - sign.c + - params.h +- source: + scheme: mqdss-64 + implementation: clean + files: + - gf31.c + - gf31.h + - LICENSE + - mq.c + - mq.h diff --git a/test/duplicate_consistency/mqdss-64_clean.yml b/test/duplicate_consistency/mqdss-64_clean.yml index ff84f477..79021ca0 100644 --- a/test/duplicate_consistency/mqdss-64_clean.yml +++ b/test/duplicate_consistency/mqdss-64_clean.yml @@ -9,3 +9,14 @@ consistency_checks: - mq.c - mq.h - sign.c +- source: + scheme: mqdss-64 + implementation: avx2 + files: + - api.h + - mq.h + - LICENSE + - mq.h + - sign.c + - params.h + diff --git a/test/test_testvectors.py b/test/test_testvectors.py index a6bc0855..37e46133 100644 --- a/test/test_testvectors.py +++ b/test/test_testvectors.py @@ -40,6 +40,7 @@ def test_testvectors(implementation, impl_path, test_dir, init, destr): implementation.name, '.exe' if os.name == 'nt' else '' ))], + print_output=False, ).replace('\r', '') assert(implementation.scheme.metadata()['testvectors-sha256'].lower() == hashlib.sha256(out.encode('utf-8')).hexdigest().lower())