diff --git a/crypto_sign/mqdss-48/META.yml b/crypto_sign/mqdss-48/META.yml
index 9789d348..0fba019d 100644
--- a/crypto_sign/mqdss-48/META.yml
+++ b/crypto_sign/mqdss-48/META.yml
@@ -16,3 +16,12 @@ auxiliary-submitters:
implementations:
- name: clean
version: https://github.com/joostrijneveld/MQDSS/commit/00608d7610262ff07b1834885d32bc3fd27ef5e1
+ - name: avx2
+ version: https://github.com/joostrijneveld/MQDSS/commit/00608d7610262ff07b1834885d32bc3fd27ef5e1
+ supported_platforms:
+ - architecture: x86_64
+ required_flags:
+ - avx2
+ - architecture: x86
+ required_flags:
+ - avx2
diff --git a/crypto_sign/mqdss-48/avx2/LICENSE b/crypto_sign/mqdss-48/avx2/LICENSE
new file mode 100644
index 00000000..670154e3
--- /dev/null
+++ b/crypto_sign/mqdss-48/avx2/LICENSE
@@ -0,0 +1,116 @@
+CC0 1.0 Universal
+
+Statement of Purpose
+
+The laws of most jurisdictions throughout the world automatically confer
+exclusive Copyright and Related Rights (defined below) upon the creator and
+subsequent owner(s) (each and all, an "owner") of an original work of
+authorship and/or a database (each, a "Work").
+
+Certain owners wish to permanently relinquish those rights to a Work for the
+purpose of contributing to a commons of creative, cultural and scientific
+works ("Commons") that the public can reliably and without fear of later
+claims of infringement build upon, modify, incorporate in other works, reuse
+and redistribute as freely as possible in any form whatsoever and for any
+purposes, including without limitation commercial purposes. These owners may
+contribute to the Commons to promote the ideal of a free culture and the
+further production of creative, cultural and scientific works, or to gain
+reputation or greater distribution for their Work in part through the use and
+efforts of others.
+
+For these and/or other purposes and motivations, and without any expectation
+of additional consideration or compensation, the person associating CC0 with a
+Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
+and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
+and publicly distribute the Work under its terms, with knowledge of his or her
+Copyright and Related Rights in the Work and the meaning and intended legal
+effect of CC0 on those rights.
+
+1. Copyright and Related Rights. A Work made available under CC0 may be
+protected by copyright and related or neighboring rights ("Copyright and
+Related Rights"). Copyright and Related Rights include, but are not limited
+to, the following:
+
+ i. the right to reproduce, adapt, distribute, perform, display, communicate,
+ and translate a Work;
+
+ ii. moral rights retained by the original author(s) and/or performer(s);
+
+ iii. publicity and privacy rights pertaining to a person's image or likeness
+ depicted in a Work;
+
+ iv. rights protecting against unfair competition in regards to a Work,
+ subject to the limitations in paragraph 4(a), below;
+
+ v. rights protecting the extraction, dissemination, use and reuse of data in
+ a Work;
+
+ vi. database rights (such as those arising under Directive 96/9/EC of the
+ European Parliament and of the Council of 11 March 1996 on the legal
+ protection of databases, and under any national implementation thereof,
+ including any amended or successor version of such directive); and
+
+ vii. other similar, equivalent or corresponding rights throughout the world
+ based on applicable law or treaty, and any national implementations thereof.
+
+2. Waiver. To the greatest extent permitted by, but not in contravention of,
+applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
+unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
+and Related Rights and associated claims and causes of action, whether now
+known or unknown (including existing as well as future claims and causes of
+action), in the Work (i) in all territories worldwide, (ii) for the maximum
+duration provided by applicable law or treaty (including future time
+extensions), (iii) in any current or future medium and for any number of
+copies, and (iv) for any purpose whatsoever, including without limitation
+commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
+the Waiver for the benefit of each member of the public at large and to the
+detriment of Affirmer's heirs and successors, fully intending that such Waiver
+shall not be subject to revocation, rescission, cancellation, termination, or
+any other legal or equitable action to disrupt the quiet enjoyment of the Work
+by the public as contemplated by Affirmer's express Statement of Purpose.
+
+3. Public License Fallback. Should any part of the Waiver for any reason be
+judged legally invalid or ineffective under applicable law, then the Waiver
+shall be preserved to the maximum extent permitted taking into account
+Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
+is so judged Affirmer hereby grants to each affected person a royalty-free,
+non transferable, non sublicensable, non exclusive, irrevocable and
+unconditional license to exercise Affirmer's Copyright and Related Rights in
+the Work (i) in all territories worldwide, (ii) for the maximum duration
+provided by applicable law or treaty (including future time extensions), (iii)
+in any current or future medium and for any number of copies, and (iv) for any
+purpose whatsoever, including without limitation commercial, advertising or
+promotional purposes (the "License"). The License shall be deemed effective as
+of the date CC0 was applied by Affirmer to the Work. Should any part of the
+License for any reason be judged legally invalid or ineffective under
+applicable law, such partial invalidity or ineffectiveness shall not
+invalidate the remainder of the License, and in such case Affirmer hereby
+affirms that he or she will not (i) exercise any of his or her remaining
+Copyright and Related Rights in the Work or (ii) assert any associated claims
+and causes of action with respect to the Work, in either case contrary to
+Affirmer's express Statement of Purpose.
+
+4. Limitations and Disclaimers.
+
+ a. No trademark or patent rights held by Affirmer are waived, abandoned,
+ surrendered, licensed or otherwise affected by this document.
+
+ b. Affirmer offers the Work as-is and makes no representations or warranties
+ of any kind concerning the Work, express, implied, statutory or otherwise,
+ including without limitation warranties of title, merchantability, fitness
+ for a particular purpose, non infringement, or the absence of latent or
+ other defects, accuracy, or the present or absence of errors, whether or not
+ discoverable, all to the greatest extent permissible under applicable law.
+
+ c. Affirmer disclaims responsibility for clearing rights of other persons
+ that may apply to the Work or any use thereof, including without limitation
+ any person's Copyright and Related Rights in the Work. Further, Affirmer
+ disclaims responsibility for obtaining any necessary consents, permissions
+ or other rights required for any use of the Work.
+
+ d. Affirmer understands and acknowledges that Creative Commons is not a
+ party to this document and has no duty or obligation with respect to this
+ CC0 or use of the Work.
+
+For more information, please see
+
diff --git a/crypto_sign/mqdss-48/avx2/Makefile b/crypto_sign/mqdss-48/avx2/Makefile
new file mode 100644
index 00000000..af26c1f7
--- /dev/null
+++ b/crypto_sign/mqdss-48/avx2/Makefile
@@ -0,0 +1,22 @@
+# This Makefile can be used with GNU Make or BSD Make
+
+LIB=libmqdss-48_avx2.a
+
+HEADERS = params.h gf31.h mq.h api.h
+OBJECTS = gf31.o mq.o sign.o
+
+CFLAGS=-O3 -Wall -Wconversion -Wextra -Wpedantic -Wvla -Werror \
+ -Wmissing-prototypes -Wredundant-decls -std=c99 -mavx2 \
+ -I../../../common $(EXTRAFLAGS)
+
+all: $(LIB)
+
+%.o: %.c $(HEADERS)
+ $(CC) $(CFLAGS) -c -o $@ $<
+
+$(LIB): $(OBJECTS)
+ $(AR) -r $@ $(OBJECTS)
+
+clean:
+ $(RM) $(OBJECTS)
+ $(RM) $(LIB)
diff --git a/crypto_sign/mqdss-48/avx2/Makefile.Microsoft_nmake b/crypto_sign/mqdss-48/avx2/Makefile.Microsoft_nmake
new file mode 100644
index 00000000..07d51db5
--- /dev/null
+++ b/crypto_sign/mqdss-48/avx2/Makefile.Microsoft_nmake
@@ -0,0 +1,19 @@
+# This Makefile can be used with Microsoft Visual Studio's nmake using the command:
+# nmake /f Makefile.Microsoft_nmake
+
+LIBRARY=libmqdss-48_avx2.lib
+OBJECTS=gf31.obj mq.obj sign.obj
+
+CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX /arch:AVX2
+
+all: $(LIBRARY)
+
+# Make sure objects are recompiled if headers change.
+$(OBJECTS): *.h
+
+$(LIBRARY): $(OBJECTS)
+ LIB.EXE /NOLOGO /WX /OUT:$@ $**
+
+clean:
+ -DEL $(OBJECTS)
+ -DEL $(LIBRARY)
diff --git a/crypto_sign/mqdss-48/avx2/api.h b/crypto_sign/mqdss-48/avx2/api.h
new file mode 100644
index 00000000..82109189
--- /dev/null
+++ b/crypto_sign/mqdss-48/avx2/api.h
@@ -0,0 +1,47 @@
+#ifndef PQCLEAN_MQDSS48_AVX2_API_H
+#define PQCLEAN_MQDSS48_AVX2_API_H
+
+#include
+#include
+
+#define PQCLEAN_MQDSS48_AVX2_CRYPTO_ALGNAME "MQDSS-48"
+
+#define PQCLEAN_MQDSS48_AVX2_CRYPTO_SECRETKEYBYTES 16
+#define PQCLEAN_MQDSS48_AVX2_CRYPTO_PUBLICKEYBYTES 46
+#define PQCLEAN_MQDSS48_AVX2_CRYPTO_BYTES 28400
+
+/*
+ * Generates an MQDSS key pair.
+ */
+int PQCLEAN_MQDSS48_AVX2_crypto_sign_keypair(
+ uint8_t *pk, uint8_t *sk);
+
+/**
+ * Returns an array containing a detached signature.
+ */
+int PQCLEAN_MQDSS48_AVX2_crypto_sign_signature(
+ uint8_t *sig, size_t *siglen,
+ const uint8_t *m, size_t mlen, const uint8_t *sk);
+
+/**
+ * Verifies a detached signature and message under a given public key.
+ */
+int PQCLEAN_MQDSS48_AVX2_crypto_sign_verify(
+ const uint8_t *sig, size_t siglen,
+ const uint8_t *m, size_t mlen, const uint8_t *pk);
+
+/**
+ * Returns an array containing the signature followed by the message.
+ */
+int PQCLEAN_MQDSS48_AVX2_crypto_sign(
+ uint8_t *sm, size_t *smlen,
+ const uint8_t *m, size_t mlen, const uint8_t *sk);
+
+/**
+ * Verifies a given signature-message pair under a given public key.
+ */
+int PQCLEAN_MQDSS48_AVX2_crypto_sign_open(
+ uint8_t *m, size_t *mlen,
+ const uint8_t *sm, size_t smlen, const uint8_t *pk);
+
+#endif
diff --git a/crypto_sign/mqdss-48/avx2/gf31.c b/crypto_sign/mqdss-48/avx2/gf31.c
new file mode 100644
index 00000000..1a456e54
--- /dev/null
+++ b/crypto_sign/mqdss-48/avx2/gf31.c
@@ -0,0 +1,123 @@
+#include "params.h"
+#include "fips202.h"
+#include "gf31.h"
+#include
+#include
+#include
+
+/* Given a vector of N elements in the range [0, 31], this reduces the elements
+ to the range [0, 30] by mapping 31 to 0 (i.e reduction mod 31) */
+void PQCLEAN_MQDSS48_AVX2_vgf31_unique(gf31 *out, gf31 *in) {
+ __m256i x;
+ __m256i _w31 = _mm256_set1_epi16(31);
+ int i;
+
+ for (i = 0; i < (N >> 4); ++i) {
+ x = _mm256_loadu_si256((__m256i const *) (in + 16 * i));
+ x = _mm256_xor_si256(x, _mm256_and_si256(_w31, _mm256_cmpeq_epi16(x, _w31)));
+ _mm256_storeu_si256((__m256i *)(out + i * 16), x);
+ }
+}
+
+/* This function acts on vectors with 64 gf31 elements.
+It performs one reduction step and guarantees output in [0, 30],
+but requires input to be in [0, 32768). */
+void PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(gf31 *out, gf31 *in) {
+ __m256i x;
+ __m256i _w2114 = _mm256_set1_epi32(2114 * 65536 + 2114);
+ __m256i _w31 = _mm256_set1_epi16(31);
+ int i;
+
+ for (i = 0; i < (N >> 4); ++i) {
+ x = _mm256_loadu_si256((__m256i const *) (in + 16 * i));
+ x = _mm256_sub_epi16(x, _mm256_mullo_epi16(_w31, _mm256_mulhi_epi16(x, _w2114)));
+ x = _mm256_xor_si256(x, _mm256_and_si256(_w31, _mm256_cmpeq_epi16(x, _w31)));
+ _mm256_storeu_si256((__m256i *)(out + i * 16), x);
+ }
+}
+
+/* Given a seed, samples len gf31 elements (in the range [0, 30]), and places
+ them in a vector of 16-bit elements */
+void PQCLEAN_MQDSS48_AVX2_gf31_nrand(gf31 *out, size_t len, const uint8_t *seed, size_t seedlen) {
+ size_t i = 0, j;
+ shake256ctx shakestate;
+ uint8_t shakeblock[SHAKE256_RATE];
+
+ shake256_absorb(&shakestate, seed, seedlen);
+
+ while (i < len) {
+ shake256_squeezeblocks(shakeblock, 1, &shakestate);
+ for (j = 0; j < SHAKE256_RATE && i < len; j++) {
+ if ((shakeblock[j] & 31) != 31) {
+ out[i] = (shakeblock[j] & 31);
+ i++;
+ }
+ }
+ }
+ shake256_ctx_release(&shakestate);
+}
+
+/* Given a seed, samples len gf31 elements, transposed into unsigned range,
+ i.e. in the range [-15, 15], and places them in an array of 8-bit integers.
+ This is used for the expansion of F, which wants packed elements. */
+void PQCLEAN_MQDSS48_AVX2_gf31_nrand_schar(signed char *out, size_t len, const uint8_t *seed, size_t seedlen) {
+ size_t i = 0, j;
+ shake256ctx shakestate;
+ uint8_t shakeblock[SHAKE256_RATE];
+
+ shake256_absorb(&shakestate, seed, seedlen);
+
+ while (i < len) {
+ shake256_squeezeblocks(shakeblock, 1, &shakestate);
+ for (j = 0; j < SHAKE256_RATE && i < len; j++) {
+ if ((shakeblock[j] & 31) != 31) {
+ out[i] = (signed char)((shakeblock[j] & 31) - 15);
+ i++;
+ }
+ }
+ }
+ shake256_ctx_release(&shakestate);
+
+}
+
+/* Unpacks an array of packed GF31 elements to one element per gf31.
+ Assumes that there is sufficient empty space available at the end of the
+ array to unpack. Can perform in-place. */
+void PQCLEAN_MQDSS48_AVX2_gf31_nunpack(gf31 *out, const uint8_t *in, size_t n) {
+ size_t i;
+ size_t j = ((n * 5) >> 3) - 1;
+ unsigned int d = 0;
+
+ for (i = n; i > 0; i--) {
+ out[i - 1] = (gf31)((in[j] >> d) & 31);
+ d += 5;
+ if (d > 8) {
+ d -= 8;
+ j--;
+ out[i - 1] = (gf31)(out[i - 1] ^ ((in[j] << (5 - d)) & 31));
+ }
+ }
+}
+
+/* Packs an array of GF31 elements from gf31's to concatenated 5-bit values.
+ Assumes that there is sufficient space available to unpack.
+ Can perform in-place. */
+void PQCLEAN_MQDSS48_AVX2_gf31_npack(uint8_t *out, const gf31 *in, size_t n) {
+ unsigned int i = 0;
+ unsigned int j;
+ int d = 3;
+
+ /* There will be ceil(5n / 8) output blocks */
+ memset(out, 0, (size_t)((5 * n + 7) & ~7U) >> 3);
+
+ for (j = 0; j < n; j++) {
+ if (d < 0) {
+ d += 8;
+ out[i] = (uint8_t)((out[i] & (255 << (d - 3))) |
+ ((in[j] >> (8 - d)) & ~(255 << (d - 3))));
+ i++;
+ }
+ out[i] = (uint8_t)((out[i] & ~(31 << d)) | ((in[j] << d) & (31 << d)));
+ d -= 5;
+ }
+}
diff --git a/crypto_sign/mqdss-48/avx2/gf31.h b/crypto_sign/mqdss-48/avx2/gf31.h
new file mode 100644
index 00000000..91ef43b8
--- /dev/null
+++ b/crypto_sign/mqdss-48/avx2/gf31.h
@@ -0,0 +1,36 @@
+#ifndef MQDSS_GF31_H
+#define MQDSS_GF31_H
+
+#include
+#include
+
+typedef unsigned short gf31;
+
+/* Given a vector of elements in the range [0, 31], this reduces the elements
+ to the range [0, 30] by mapping 31 to 0 (i.e reduction mod 31) */
+void PQCLEAN_MQDSS48_AVX2_vgf31_unique(gf31 *out, gf31 *in);
+
+/* Given a vector of 16-bit integers (i.e. in [0, 65535], this reduces the
+ elements to the range [0, 30] by mapping 31 to 0 (i.e reduction mod 31) */
+void PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(gf31 *out, gf31 *in);
+
+/* Given a seed, samples len gf31 elements (in the range [0, 30]), and places
+ them in a vector of 16-bit elements */
+void PQCLEAN_MQDSS48_AVX2_gf31_nrand(gf31 *out, size_t len, const uint8_t *seed, size_t seedlen);
+
+/* Given a seed, samples len gf31 elements, transposed into unsigned range,
+ i.e. in the range [-15, 15], and places them in an array of 8-bit integers.
+ This is used for the expansion of F, which wants packed elements. */
+void PQCLEAN_MQDSS48_AVX2_gf31_nrand_schar(signed char *out, size_t len, const uint8_t *seed, size_t seedlen);
+
+/* Unpacks an array of packed GF31 elements to one element per gf31.
+ Assumes that there is sufficient empty space available at the end of the
+ array to unpack. Can perform in-place. */
+void PQCLEAN_MQDSS48_AVX2_gf31_nunpack(gf31 *out, const uint8_t *in, size_t n);
+
+/* Packs an array of GF31 elements from gf31's to concatenated 5-bit values.
+ Assumes that there is sufficient space available to unpack.
+ Can perform in-place. */
+void PQCLEAN_MQDSS48_AVX2_gf31_npack(uint8_t *out, const gf31 *in, size_t n);
+
+#endif
diff --git a/crypto_sign/mqdss-48/avx2/mq.c b/crypto_sign/mqdss-48/avx2/mq.c
new file mode 100644
index 00000000..3eb9cd3c
--- /dev/null
+++ b/crypto_sign/mqdss-48/avx2/mq.c
@@ -0,0 +1,251 @@
+#include "mq.h"
+#include "params.h"
+#include
+#include
+
+static inline __m256i reduce_16(__m256i r, __m256i _w31, __m256i _w2114) {
+ __m256i exp = _mm256_mulhi_epi16(r, _w2114);
+ return _mm256_sub_epi16(r, _mm256_mullo_epi16(_w31, exp));
+}
+
+/* Computes all products x_i * x_j, returns in reduced form */
+inline static
+void generate_quadratic_terms( unsigned char *xij, const gf31 *x ) {
+ __m256i mask_2114 = _mm256_set1_epi16( 2114 );
+ __m256i mask_31 = _mm256_set1_epi16( 31 );
+ __m256i xi[4];
+ xi[0] = _mm256_loadu_si256((__m256i const *) (x));
+ xi[1] = _mm256_loadu_si256((__m256i const *) (x + 16));
+ xi[2] = _mm256_loadu_si256((__m256i const *) (x + 32));
+ xi[3] = _mm256_setzero_si256();
+
+ __m256i xixj[4];
+ xixj[0] = _mm256_setzero_si256();
+ xixj[1] = _mm256_setzero_si256();
+ xixj[2] = _mm256_setzero_si256();
+ xixj[3] = _mm256_setzero_si256();
+
+ int k = 0;
+ for (int i = 0; i < 32; i++) {
+ __m256i br_xi = _mm256_set1_epi16( (short)x[i] );
+ for (int j = 0; j <= (i >> 4); j++) {
+ xixj[j] = _mm256_mullo_epi16( xi[j], br_xi );
+ xixj[j] = reduce_16( xixj[j], mask_31, mask_2114 );
+ }
+
+ __m256i r = _mm256_packs_epi16(xixj[0], xixj[1]);
+ r = _mm256_permute4x64_epi64(r, 0xd8); // 3,1,2,0
+ _mm256_storeu_si256( (__m256i *)( xij + k ), r );
+ k += i + 1;
+ }
+
+ for (int i = 32; i < N; i++) {
+ __m256i br_xi = _mm256_set1_epi16( (short)x[i] );
+ for (int j = 0; j <= (i >> 4); j++) {
+ xixj[j] = _mm256_mullo_epi16( xi[j], br_xi );
+ xixj[j] = reduce_16( xixj[j], mask_31, mask_2114 );
+ }
+
+ __m256i r0 = _mm256_packs_epi16(xixj[0], xixj[1]);
+ r0 = _mm256_permute4x64_epi64(r0, 0xd8); // 3,1,2,0
+ _mm256_storeu_si256( (__m256i *)( xij + k ), r0 );
+ __m256i r1 = _mm256_packs_epi16(xixj[2], xixj[3]);
+ r1 = _mm256_permute4x64_epi64(r1, 0xd8); // 3,1,2,0
+ _mm256_storeu_si256( (__m256i *)( xij + 32 + k ), r1 );
+ k += i + 1;
+ }
+}
+
+/* Computes all terms (x_i * y_j) + (x_j * y_i), returns in reduced form */
+inline static
+void generate_xiyj_p_xjyi_terms( unsigned char *xij, const gf31 *x, const gf31 *y ) {
+ __m256i mask_2114 = _mm256_set1_epi16( 2114 );
+ __m256i mask_31 = _mm256_set1_epi16( 31 );
+ __m256i xiyi[4];
+ xiyi[0] = _mm256_xor_si256(_mm256_loadu_si256((__m256i const *) (x)), _mm256_slli_si256( _mm256_loadu_si256((__m256i const *) (y)), 1 ));
+ xiyi[1] = _mm256_xor_si256(_mm256_loadu_si256((__m256i const *) (x + 16)), _mm256_slli_si256( _mm256_loadu_si256((__m256i const *) (y + 16)), 1 ));
+ xiyi[2] = _mm256_xor_si256(_mm256_loadu_si256((__m256i const *) (x + 32)), _mm256_slli_si256( _mm256_loadu_si256((__m256i const *) (y + 32)), 1 ));
+ xiyi[3] = _mm256_setzero_si256();
+
+ __m256i xixj[4];
+ xixj[0] = _mm256_setzero_si256();
+ xixj[1] = _mm256_setzero_si256();
+ xixj[2] = _mm256_setzero_si256();
+ xixj[3] = _mm256_setzero_si256();
+
+ int k = 0;
+ for (int i = 0; i < 32; i++) {
+ __m256i br_yixi = _mm256_set1_epi16( (short)((x[i] << 8)^y[i]) );
+ for (int j = 0; j <= (i >> 4); j++) {
+ xixj[j] = _mm256_maddubs_epi16( xiyi[j], br_yixi );
+ xixj[j] = reduce_16( xixj[j], mask_31, mask_2114 );
+ }
+
+ __m256i r = _mm256_packs_epi16(xixj[0], xixj[1]);
+ r = _mm256_permute4x64_epi64(r, 0xd8); // 3,1,2,0
+ _mm256_storeu_si256( (__m256i *)( xij + k ), r );
+ k += i + 1;
+ }
+
+ for (int i = 32; i < N; i++) {
+ __m256i br_yixi = _mm256_set1_epi16( (short)((x[i] << 8)^y[i]) );
+ for (int j = 0; j <= (i >> 4); j++) {
+ xixj[j] = _mm256_maddubs_epi16( xiyi[j], br_yixi );
+ xixj[j] = reduce_16( xixj[j], mask_31, mask_2114 );
+ }
+
+ __m256i r0 = _mm256_packs_epi16(xixj[0], xixj[1]);
+ r0 = _mm256_permute4x64_epi64(r0, 0xd8); // 3,1,2,0
+ _mm256_storeu_si256( (__m256i *)( xij + k ), r0 );
+ __m256i r1 = _mm256_packs_epi16(xixj[2], xixj[3]);
+ r1 = _mm256_permute4x64_epi64(r1, 0xd8); // 3,1,2,0
+ _mm256_storeu_si256( (__m256i *)( xij + 32 + k ), r1 );
+ k += i + 1;
+ }
+}
+
+#define EVAL_YMM_0(xx) {\
+ __m128i tmp = _mm256_castsi256_si128(xx); \
+ for (int macro_i = 0; macro_i < 8; macro_i++) { \
+ __m256i _xi = _mm256_broadcastw_epi16(tmp); \
+ tmp = _mm_srli_si128(tmp, 2); \
+ for (int macro_j = 0; macro_j < (N/16); macro_j++) { \
+ __m256i coeff = _mm256_loadu_si256((__m256i const *) F); \
+ F += 32; \
+ yy[macro_j] = _mm256_add_epi16(yy[macro_j], _mm256_maddubs_epi16(_xi, coeff)); \
+ } \
+ } \
+ }
+
+#define EVAL_YMM_1(xx) {\
+ __m128i tmp = _mm256_extracti128_si256(xx, 1); \
+ for (int macro_i = 0; macro_i < 8; macro_i++) { \
+ __m256i _xi = _mm256_broadcastw_epi16(tmp); \
+ tmp = _mm_srli_si128(tmp, 2); \
+ for (int macro_j = 0; macro_j < (N/16); macro_j++) { \
+ __m256i coeff = _mm256_loadu_si256((__m256i const *) F); \
+ F += 32; \
+ yy[macro_j] = _mm256_add_epi16(yy[macro_j], _mm256_maddubs_epi16(_xi, coeff)); \
+ } \
+ } \
+ }
+
+#define REDUCE_(yy) { \
+ (yy)[0] = reduce_16((yy)[0], mask_reduce, mask_2114); \
+ (yy)[1] = reduce_16((yy)[1], mask_reduce, mask_2114); \
+ (yy)[2] = reduce_16((yy)[2], mask_reduce, mask_2114); \
+ }
+
+/* Evaluates the MQ function on a vector of N gf31 elements x (expected to be
+ in reduced 5-bit representation). Expects the coefficients in F to be in
+ signed representation (i.e. [-15, 15], packed bytewise).
+ Outputs M gf31 elements in unique 16-bit representation as fx. */
+void PQCLEAN_MQDSS48_AVX2_MQ(gf31 *fx, const gf31 *x, const signed char *F) {
+ __m256i mask_2114 = _mm256_set1_epi32(2114 * 65536 + 2114);
+ __m256i mask_reduce = _mm256_srli_epi16(_mm256_cmpeq_epi16(mask_2114, mask_2114), 11);
+
+ __m256i xi[4];
+ xi[0] = _mm256_loadu_si256((__m256i const *) (x));
+ xi[1] = _mm256_loadu_si256((__m256i const *) (x + 16));
+ xi[2] = _mm256_loadu_si256((__m256i const *) (x + 32));
+ xi[3] = _mm256_setzero_si256();
+
+ __m256i _zero = _mm256_setzero_si256();
+ xi[0] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_zero, xi[0])), xi[0]);
+ xi[1] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_zero, xi[1])), xi[1]);
+ xi[2] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_zero, xi[2])), xi[2]);
+
+ __m256i x1 = _mm256_packs_epi16(xi[0], xi[1]);
+ x1 = _mm256_permute4x64_epi64(x1, 0xd8); // 3,1,2,0
+ __m256i x2 = _mm256_packs_epi16(xi[2], xi[3]);
+ x2 = _mm256_permute4x64_epi64(x2, 0xd8); // 3,1,2,0
+
+ __m256i yy[M / 16];
+ yy[0] = _zero;
+ yy[1] = _zero;
+ yy[2] = _zero;
+
+ EVAL_YMM_0(x1)
+ EVAL_YMM_1(x1)
+ EVAL_YMM_0(x2)
+ REDUCE_(yy)
+
+ __m256i xixj[38];
+ generate_quadratic_terms( (unsigned char *) xixj, x );
+ for (int i = 0 ; i < 36 ; i += 2) {
+ EVAL_YMM_0(xixj[i])
+ EVAL_YMM_1(xixj[i])
+ EVAL_YMM_0(xixj[i + 1])
+ EVAL_YMM_1(xixj[i + 1])
+ REDUCE_(yy)
+ }
+ EVAL_YMM_0(xixj[36]) {
+ __m128i tmp = _mm256_extracti128_si256(xixj[36], 1);
+ for (int i = 0; i < 4; i++) {
+ __m256i _xi = _mm256_broadcastw_epi16(tmp);
+ tmp = _mm_srli_si128(tmp, 2);
+ for (int j = 0; j < (N / 16); j++) {
+ __m256i coeff = _mm256_loadu_si256((__m256i const *) F);
+ F += 32;
+ yy[j] = _mm256_add_epi16(yy[j], _mm256_maddubs_epi16(_xi, coeff));
+ }
+ }
+ }
+ REDUCE_(yy)
+
+ yy[0] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[0])), yy[0]);
+ yy[1] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[1])), yy[1]);
+ yy[2] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[2])), yy[2]);
+
+ for (int i = 0; i < (N / 16); ++i) {
+ _mm256_storeu_si256((__m256i *)(fx + i * 16), yy[i]);
+ }
+}
+
+/* Evaluates the bilinear polar form of the MQ function (i.e. G) on a vector of
+ N gf31 elements x (expected to be in reduced 5-bit representation). Expects
+ the coefficients in F to be in signed representation (i.e. [-15, 15], packed
+ bytewise). Outputs M gf31 elements in unique 16-bit representation as fx. */
+void PQCLEAN_MQDSS48_AVX2_G(gf31 *fx, const gf31 *x, const gf31 *y, const signed char *F) {
+ __m256i mask_2114 = _mm256_set1_epi32(2114 * 65536 + 2114);
+ __m256i mask_reduce = _mm256_srli_epi16(_mm256_cmpeq_epi16(mask_2114, mask_2114), 11);
+ __m256i _zero = _mm256_setzero_si256();
+
+ __m256i yy[(M / 16)];
+ yy[0] = _zero;
+ yy[1] = _zero;
+ yy[2] = _zero;
+
+ F += N * M;
+
+ __m256i xixj[38];
+ generate_xiyj_p_xjyi_terms( (unsigned char *) xixj, x, y );
+ for (int i = 0 ; i < 36 ; i += 2) {
+ EVAL_YMM_0(xixj[i])
+ EVAL_YMM_1(xixj[i])
+ EVAL_YMM_0(xixj[i + 1])
+ EVAL_YMM_1(xixj[i + 1])
+ REDUCE_(yy)
+ }
+ EVAL_YMM_0(xixj[36]) {
+ __m128i tmp = _mm256_extracti128_si256(xixj[36], 1);
+ for (int i = 0; i < 4; i++) {
+ __m256i _xi = _mm256_broadcastw_epi16(tmp);
+ tmp = _mm_srli_si128(tmp, 2);
+ for (int j = 0; j < (N / 16); j++) {
+ __m256i coeff = _mm256_loadu_si256((__m256i const *) F);
+ F += 32;
+ yy[j] = _mm256_add_epi16(yy[j], _mm256_maddubs_epi16(_xi, coeff));
+ }
+ }
+ }
+ REDUCE_(yy)
+
+ yy[0] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[0])), yy[0]);
+ yy[1] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[1])), yy[1]);
+ yy[2] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[2])), yy[2]);
+
+ for (int i = 0; i < (N / 16); ++i) {
+ _mm256_storeu_si256((__m256i *)(fx + i * 16), yy[i]);
+ }
+}
diff --git a/crypto_sign/mqdss-48/avx2/mq.h b/crypto_sign/mqdss-48/avx2/mq.h
new file mode 100644
index 00000000..4975740d
--- /dev/null
+++ b/crypto_sign/mqdss-48/avx2/mq.h
@@ -0,0 +1,18 @@
+#ifndef MQDSS_MQ_H
+#define MQDSS_MQ_H
+
+#include "gf31.h"
+
+/* Evaluates the MQ function on a vector of N gf31 elements x (expected to be
+ in reduced 5-bit representation). Expects the coefficients in F to be in
+ signed representation (i.e. [-15, 15], packed bytewise).
+ Outputs M gf31 elements in unique 16-bit representation as fx. */
+void PQCLEAN_MQDSS48_AVX2_MQ(gf31 *fx, const gf31 *x, const signed char *F);
+
+/* Evaluates the bilinear polar form of the MQ function (i.e. G) on a vector of
+ N gf31 elements x (expected to be in reduced 5-bit representation). Expects
+ the coefficients in F to be in signed representation (i.e. [-15, 15], packed
+ bytewise). Outputs M gf31 elements in unique 16-bit representation as fx. */
+void PQCLEAN_MQDSS48_AVX2_G(gf31 *fx, const gf31 *x, const gf31 *y, const signed char *F);
+
+#endif
diff --git a/crypto_sign/mqdss-48/avx2/params.h b/crypto_sign/mqdss-48/avx2/params.h
new file mode 100644
index 00000000..94e47077
--- /dev/null
+++ b/crypto_sign/mqdss-48/avx2/params.h
@@ -0,0 +1,25 @@
+#ifndef MQDSS_PARAMS_H
+#define MQDSS_PARAMS_H
+
+#define N 48
+#define M N
+#define F_LEN (M * (((N * (N + 1)) >> 1) + N)) /* Number of elements in F */
+
+#define ROUNDS 184
+
+/* Number of bytes that N, M and F_LEN elements require when packed into a byte
+ array, 5-bit elements packed continuously. */
+/* Assumes N and M to be multiples of 8 */
+#define NPACKED_BYTES ((N * 5) >> 3)
+#define MPACKED_BYTES ((M * 5) >> 3)
+#define FPACKED_BYTES ((F_LEN * 5) >> 3)
+
+#define HASH_BYTES 32
+#define SEED_BYTES 16
+#define PK_BYTES (SEED_BYTES + MPACKED_BYTES)
+#define SK_BYTES SEED_BYTES
+
+// R, sigma_0, ROUNDS * (t1, r{0,1}, e1, c, rho)
+#define SIG_LEN (2 * HASH_BYTES + ROUNDS * (2*NPACKED_BYTES + MPACKED_BYTES + HASH_BYTES + HASH_BYTES))
+
+#endif
diff --git a/crypto_sign/mqdss-48/avx2/sign.c b/crypto_sign/mqdss-48/avx2/sign.c
new file mode 100644
index 00000000..f454a254
--- /dev/null
+++ b/crypto_sign/mqdss-48/avx2/sign.c
@@ -0,0 +1,389 @@
+#include
+#include
+#include
+
+#include "api.h"
+#include "fips202.h"
+#include "gf31.h"
+#include "mq.h"
+#include "params.h"
+#include "randombytes.h"
+
+/* Takes an array of len bytes and computes a hash digest.
+ This is used as a hash function in the Fiat-Shamir transform. */
+static void H(unsigned char *out, const unsigned char *in, const size_t len) {
+ shake256(out, HASH_BYTES, in, len);
+}
+
+/* Takes two arrays of N packed elements and an array of M packed elements,
+ and computes a HASH_BYTES commitment. */
+static void com_0(unsigned char *c,
+ const unsigned char *rho,
+ const unsigned char *inn, const unsigned char *inn2,
+ const unsigned char *inm) {
+ unsigned char buffer[HASH_BYTES + 2 * NPACKED_BYTES + MPACKED_BYTES];
+ memcpy(buffer, rho, HASH_BYTES);
+ memcpy(buffer + HASH_BYTES, inn, NPACKED_BYTES);
+ memcpy(buffer + HASH_BYTES + NPACKED_BYTES, inn2, NPACKED_BYTES);
+ memcpy(buffer + HASH_BYTES + 2 * NPACKED_BYTES, inm, MPACKED_BYTES);
+ shake256(c, HASH_BYTES, buffer, HASH_BYTES + 2 * NPACKED_BYTES + MPACKED_BYTES);
+}
+
+/* Takes an array of N packed elements and an array of M packed elements,
+ and computes a HASH_BYTES commitment. */
+static void com_1(unsigned char *c,
+ const unsigned char *rho,
+ const unsigned char *inn, const unsigned char *inm) {
+ unsigned char buffer[HASH_BYTES + NPACKED_BYTES + MPACKED_BYTES];
+ memcpy(buffer, rho, HASH_BYTES);
+ memcpy(buffer + HASH_BYTES, inn, NPACKED_BYTES);
+ memcpy(buffer + HASH_BYTES + NPACKED_BYTES, inm, MPACKED_BYTES);
+ shake256(c, HASH_BYTES, buffer, HASH_BYTES + NPACKED_BYTES + MPACKED_BYTES);
+}
+
+/*
+ * Generates an MQDSS key pair.
+ */
+int PQCLEAN_MQDSS48_AVX2_crypto_sign_keypair(uint8_t *pk, uint8_t *sk) {
+ signed char F[F_LEN];
+ unsigned char skbuf[SEED_BYTES * 2];
+ gf31 sk_gf31[N];
+ gf31 pk_gf31[M];
+
+ // Expand sk to obtain a seed for F and the secret input s.
+ // We also expand to obtain a value for sampling r0, t0 and e0 during
+ // signature generation, but that is not relevant here.
+ randombytes(sk, SEED_BYTES);
+ shake256(skbuf, SEED_BYTES * 2, sk, SEED_BYTES);
+
+ memcpy(pk, skbuf, SEED_BYTES);
+ PQCLEAN_MQDSS48_AVX2_gf31_nrand_schar(F, F_LEN, pk, SEED_BYTES);
+ PQCLEAN_MQDSS48_AVX2_gf31_nrand(sk_gf31, N, skbuf + SEED_BYTES, SEED_BYTES);
+ PQCLEAN_MQDSS48_AVX2_MQ(pk_gf31, sk_gf31, F);
+ PQCLEAN_MQDSS48_AVX2_vgf31_unique(pk_gf31, pk_gf31);
+ PQCLEAN_MQDSS48_AVX2_gf31_npack(pk + SEED_BYTES, pk_gf31, M);
+
+ return 0;
+}
+
+/**
+ * Returns an array containing a detached signature.
+ */
+int PQCLEAN_MQDSS48_AVX2_crypto_sign_signature(
+ uint8_t *sig, size_t *siglen,
+ const uint8_t *m, size_t mlen, const uint8_t *sk) {
+
+ signed char F[F_LEN];
+ unsigned char skbuf[SEED_BYTES * 4];
+ gf31 pk_gf31[M];
+ unsigned char pk[SEED_BYTES + MPACKED_BYTES];
+ // Concatenated for convenient hashing.
+ unsigned char D_sigma0_h0_sigma1[HASH_BYTES * 3 + ROUNDS * (NPACKED_BYTES + MPACKED_BYTES)];
+ unsigned char *D = D_sigma0_h0_sigma1;
+ unsigned char *sigma0 = D_sigma0_h0_sigma1 + HASH_BYTES;
+ unsigned char *h0 = D_sigma0_h0_sigma1 + 2 * HASH_BYTES;
+ unsigned char *t1packed = D_sigma0_h0_sigma1 + 3 * HASH_BYTES;
+ unsigned char *e1packed = D_sigma0_h0_sigma1 + 3 * HASH_BYTES + ROUNDS * NPACKED_BYTES;
+ shake256ctx shakestate;
+ unsigned char shakeblock[SHAKE256_RATE];
+ unsigned char h1[((ROUNDS + 7) & ~7) >> 3];
+ unsigned char rnd_seed[HASH_BYTES + SEED_BYTES];
+ unsigned char rho[2 * ROUNDS * HASH_BYTES];
+ unsigned char *rho0 = rho;
+ unsigned char *rho1 = rho + ROUNDS * HASH_BYTES;
+ gf31 sk_gf31[N];
+ gf31 rnd[(2 * N + M) * ROUNDS]; // Concatenated for easy RNG.
+ gf31 *r0 = rnd;
+ gf31 *t0 = rnd + N * ROUNDS;
+ gf31 *e0 = rnd + 2 * N * ROUNDS;
+ gf31 r1[N * ROUNDS];
+ gf31 t1[N * ROUNDS];
+ gf31 e1[M * ROUNDS];
+ gf31 gx[M * ROUNDS];
+ unsigned char packbuf0[NPACKED_BYTES];
+ unsigned char packbuf1[NPACKED_BYTES];
+ unsigned char packbuf2[MPACKED_BYTES];
+ unsigned char c[HASH_BYTES * ROUNDS * 2];
+ gf31 alpha;
+ int alpha_count = 0;
+ int b;
+ int i, j;
+ shake256incctx state;
+
+ shake256(skbuf, SEED_BYTES * 4, sk, SEED_BYTES);
+
+ PQCLEAN_MQDSS48_AVX2_gf31_nrand_schar(F, F_LEN, skbuf, SEED_BYTES);
+
+ shake256_inc_init(&state);
+ shake256_inc_absorb(&state, sk, SEED_BYTES);
+ shake256_inc_absorb(&state, m, mlen);
+ shake256_inc_finalize(&state);
+ shake256_inc_squeeze(sig, HASH_BYTES, &state); // Compute R.
+ shake256_inc_ctx_release(&state);
+
+ memcpy(pk, skbuf, SEED_BYTES);
+ PQCLEAN_MQDSS48_AVX2_gf31_nrand(sk_gf31, N, skbuf + SEED_BYTES, SEED_BYTES);
+ PQCLEAN_MQDSS48_AVX2_MQ(pk_gf31, sk_gf31, F);
+ PQCLEAN_MQDSS48_AVX2_vgf31_unique(pk_gf31, pk_gf31);
+ PQCLEAN_MQDSS48_AVX2_gf31_npack(pk + SEED_BYTES, pk_gf31, M);
+
+ shake256_inc_init(&state);
+ shake256_inc_absorb(&state, pk, PK_BYTES);
+ shake256_inc_absorb(&state, sig, HASH_BYTES);
+ shake256_inc_absorb(&state, m, mlen);
+ shake256_inc_finalize(&state);
+ shake256_inc_squeeze(D, HASH_BYTES, &state);
+ shake256_inc_ctx_release(&state);
+
+ sig += HASH_BYTES; // Compensate for prefixed R.
+
+ memcpy(rnd_seed, skbuf + 2 * SEED_BYTES, SEED_BYTES);
+ memcpy(rnd_seed + SEED_BYTES, D, HASH_BYTES);
+ shake256(rho, 2 * ROUNDS * HASH_BYTES, rnd_seed, SEED_BYTES + HASH_BYTES);
+
+ memcpy(rnd_seed, skbuf + 3 * SEED_BYTES, SEED_BYTES);
+ memcpy(rnd_seed + SEED_BYTES, D, HASH_BYTES);
+ PQCLEAN_MQDSS48_AVX2_gf31_nrand(rnd, (2 * N + M) * ROUNDS, rnd_seed, SEED_BYTES + HASH_BYTES);
+
+ for (i = 0; i < ROUNDS; i++) {
+ for (j = 0; j < N; j++) {
+ r1[j + i * N] = (gf31)(31 + sk_gf31[j] - r0[j + i * N]);
+ }
+ PQCLEAN_MQDSS48_AVX2_G(gx + i * M, t0 + i * N, r1 + i * N, F);
+ }
+ for (i = 0; i < ROUNDS * M; i++) {
+ gx[i] = (gf31)(gx[i] + e0[i]);
+ }
+ for (i = 0; i < ROUNDS; i++) {
+ PQCLEAN_MQDSS48_AVX2_gf31_npack(packbuf0, r0 + i * N, N);
+ PQCLEAN_MQDSS48_AVX2_gf31_npack(packbuf1, t0 + i * N, N);
+ PQCLEAN_MQDSS48_AVX2_gf31_npack(packbuf2, e0 + i * M, M);
+ com_0(c + HASH_BYTES * (2 * i + 0), rho0 + i * HASH_BYTES, packbuf0, packbuf1, packbuf2);
+ PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(r1 + i * N, r1 + i * N);
+ PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(gx + i * M, gx + i * M);
+ PQCLEAN_MQDSS48_AVX2_gf31_npack(packbuf0, r1 + i * N, N);
+ PQCLEAN_MQDSS48_AVX2_gf31_npack(packbuf1, gx + i * M, M);
+ com_1(c + HASH_BYTES * (2 * i + 1), rho1 + i * HASH_BYTES, packbuf0, packbuf1);
+ }
+
+ H(sigma0, c, HASH_BYTES * ROUNDS * 2); // Compute sigma_0.
+ shake256_absorb(&shakestate, D_sigma0_h0_sigma1, 2 * HASH_BYTES);
+ shake256_squeezeblocks(shakeblock, 1, &shakestate);
+
+ memcpy(h0, shakeblock, HASH_BYTES);
+
+ memcpy(sig, sigma0, HASH_BYTES);
+ sig += HASH_BYTES; // Compensate for sigma_0.
+
+ for (i = 0; i < ROUNDS; i++) {
+ do {
+ alpha = shakeblock[alpha_count] & 31;
+ alpha_count++;
+ if (alpha_count == SHAKE256_RATE) {
+ alpha_count = 0;
+ shake256_squeezeblocks(shakeblock, 1, &shakestate);
+ }
+ } while (alpha == 31);
+ for (j = 0; j < N; j++) {
+ t1[i * N + j] = (gf31)(alpha * r0[j + i * N] - t0[j + i * N] + 31);
+ }
+ PQCLEAN_MQDSS48_AVX2_MQ(e1 + i * M, r0 + i * N, F);
+ for (j = 0; j < N; j++) {
+ e1[i * N + j] = (gf31)(alpha * e1[j + i * M] - e0[j + i * M] + 31);
+ }
+ PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(t1 + i * N, t1 + i * N);
+ PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(e1 + i * N, e1 + i * N);
+ }
+ shake256_ctx_release(&shakestate);
+
+ PQCLEAN_MQDSS48_AVX2_gf31_npack(t1packed, t1, N * ROUNDS);
+ PQCLEAN_MQDSS48_AVX2_gf31_npack(e1packed, e1, M * ROUNDS);
+
+ memcpy(sig, t1packed, NPACKED_BYTES * ROUNDS);
+ sig += NPACKED_BYTES * ROUNDS;
+ memcpy(sig, e1packed, MPACKED_BYTES * ROUNDS);
+ sig += MPACKED_BYTES * ROUNDS;
+
+ shake256(h1, ((ROUNDS + 7) & ~7) >> 3, D_sigma0_h0_sigma1, 3 * HASH_BYTES + ROUNDS * (NPACKED_BYTES + MPACKED_BYTES));
+
+ for (i = 0; i < ROUNDS; i++) {
+ b = (h1[(i >> 3)] >> (i & 7)) & 1;
+ if (b == 0) {
+ PQCLEAN_MQDSS48_AVX2_gf31_npack(sig, r0 + i * N, N);
+ } else if (b == 1) {
+ PQCLEAN_MQDSS48_AVX2_gf31_npack(sig, r1 + i * N, N);
+ }
+ memcpy(sig + NPACKED_BYTES, c + HASH_BYTES * (2 * i + (1 - b)), HASH_BYTES);
+ memcpy(sig + NPACKED_BYTES + HASH_BYTES, rho + (i + b * ROUNDS) * HASH_BYTES, HASH_BYTES);
+ sig += NPACKED_BYTES + 2 * HASH_BYTES;
+ }
+
+ *siglen = SIG_LEN;
+ return 0;
+}
+
+/**
+ * Verifies a detached signature and message under a given public key.
+ */
+int PQCLEAN_MQDSS48_AVX2_crypto_sign_verify(
+ const uint8_t *sig, size_t siglen,
+ const uint8_t *m, size_t mlen, const uint8_t *pk) {
+
+ gf31 r[N];
+ gf31 t[N];
+ gf31 e[M];
+ signed char F[F_LEN];
+ gf31 pk_gf31[M];
+ // Concatenated for convenient hashing.
+ unsigned char D_sigma0_h0_sigma1[HASH_BYTES * 3 + ROUNDS * (NPACKED_BYTES + MPACKED_BYTES)];
+ unsigned char *D = D_sigma0_h0_sigma1;
+ unsigned char *sigma0 = D_sigma0_h0_sigma1 + HASH_BYTES;
+ unsigned char *h0 = D_sigma0_h0_sigma1 + 2 * HASH_BYTES;
+ unsigned char *t1packed = D_sigma0_h0_sigma1 + 3 * HASH_BYTES;
+ unsigned char *e1packed = D_sigma0_h0_sigma1 + 3 * HASH_BYTES + ROUNDS * NPACKED_BYTES;
+ unsigned char h1[((ROUNDS + 7) & ~7) >> 3];
+ unsigned char c[HASH_BYTES * ROUNDS * 2];
+ memset(c, 0, HASH_BYTES * 2);
+ gf31 x[N];
+ gf31 y[M];
+ gf31 z[M];
+ unsigned char packbuf0[NPACKED_BYTES];
+ unsigned char packbuf1[MPACKED_BYTES];
+ shake256ctx shakestate;
+ unsigned char shakeblock[SHAKE256_RATE];
+ int i, j;
+ gf31 alpha;
+ int alpha_count = 0;
+ int b;
+ shake256incctx state;
+
+ if (siglen != SIG_LEN) {
+ return -1;
+ }
+
+ shake256_inc_init(&state);
+ shake256_inc_absorb(&state, pk, PK_BYTES);
+ shake256_inc_absorb(&state, sig, HASH_BYTES);
+ shake256_inc_absorb(&state, m, mlen);
+ shake256_inc_finalize(&state);
+ shake256_inc_squeeze(D, HASH_BYTES, &state);
+ shake256_inc_ctx_release(&state);
+
+ sig += HASH_BYTES;
+
+ PQCLEAN_MQDSS48_AVX2_gf31_nrand_schar(F, F_LEN, pk, SEED_BYTES);
+ pk += SEED_BYTES;
+ PQCLEAN_MQDSS48_AVX2_gf31_nunpack(pk_gf31, pk, M);
+
+ memcpy(sigma0, sig, HASH_BYTES);
+
+ shake256_absorb(&shakestate, D_sigma0_h0_sigma1, 2 * HASH_BYTES);
+ shake256_squeezeblocks(shakeblock, 1, &shakestate);
+
+ memcpy(h0, shakeblock, HASH_BYTES);
+
+ sig += HASH_BYTES;
+
+ memcpy(t1packed, sig, ROUNDS * NPACKED_BYTES);
+ sig += ROUNDS * NPACKED_BYTES;
+ memcpy(e1packed, sig, ROUNDS * MPACKED_BYTES);
+ sig += ROUNDS * MPACKED_BYTES;
+
+ shake256(h1, ((ROUNDS + 7) & ~7) >> 3, D_sigma0_h0_sigma1, 3 * HASH_BYTES + ROUNDS * (NPACKED_BYTES + MPACKED_BYTES));
+
+ for (i = 0; i < ROUNDS; i++) {
+ do {
+ alpha = shakeblock[alpha_count] & 31;
+ alpha_count++;
+ if (alpha_count == SHAKE256_RATE) {
+ alpha_count = 0;
+ shake256_squeezeblocks(shakeblock, 1, &shakestate);
+ }
+ } while (alpha == 31);
+ b = (h1[(i >> 3)] >> (i & 7)) & 1;
+
+ PQCLEAN_MQDSS48_AVX2_gf31_nunpack(r, sig, N);
+ PQCLEAN_MQDSS48_AVX2_gf31_nunpack(t, t1packed + NPACKED_BYTES * i, N);
+ PQCLEAN_MQDSS48_AVX2_gf31_nunpack(e, e1packed + MPACKED_BYTES * i, M);
+
+ if (b == 0) {
+ PQCLEAN_MQDSS48_AVX2_MQ(y, r, F);
+ for (j = 0; j < N; j++) {
+ x[j] = (gf31)(alpha * r[j] - t[j] + 31);
+ }
+ for (j = 0; j < N; j++) {
+ y[j] = (gf31)(alpha * y[j] - e[j] + 31);
+ }
+ PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(x, x);
+ PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(y, y);
+ PQCLEAN_MQDSS48_AVX2_gf31_npack(packbuf0, x, N);
+ PQCLEAN_MQDSS48_AVX2_gf31_npack(packbuf1, y, M);
+ com_0(c + HASH_BYTES * (2 * i + 0), sig + HASH_BYTES + NPACKED_BYTES, sig, packbuf0, packbuf1);
+ } else {
+ PQCLEAN_MQDSS48_AVX2_MQ(y, r, F);
+ PQCLEAN_MQDSS48_AVX2_G(z, t, r, F);
+ for (j = 0; j < N; j++) {
+ y[j] = (gf31)(alpha * (31 + pk_gf31[j] - y[j]) - z[j] - e[j] + 62);
+ }
+ PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(y, y);
+ PQCLEAN_MQDSS48_AVX2_gf31_npack(packbuf0, y, M);
+ com_1(c + HASH_BYTES * (2 * i + 1), sig + HASH_BYTES + NPACKED_BYTES, sig, packbuf0);
+ }
+ memcpy(c + HASH_BYTES * (2 * i + (1 - b)), sig + NPACKED_BYTES, HASH_BYTES);
+ sig += NPACKED_BYTES + 2 * HASH_BYTES;
+ }
+ shake256_ctx_release(&shakestate);
+
+ H(c, c, HASH_BYTES * ROUNDS * 2);
+ if (memcmp(c, sigma0, HASH_BYTES) != 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * Returns an array containing the signature followed by the message.
+ */
+int PQCLEAN_MQDSS48_AVX2_crypto_sign(
+ uint8_t *sm, size_t *smlen,
+ const uint8_t *m, size_t mlen, const uint8_t *sk) {
+ size_t siglen;
+
+ PQCLEAN_MQDSS48_AVX2_crypto_sign_signature(
+ sm, &siglen, m, mlen, sk);
+
+ memmove(sm + SIG_LEN, m, mlen);
+ *smlen = siglen + mlen;
+
+ return 0;
+}
+
+/**
+ * Verifies a given signature-message pair under a given public key.
+ */
+int PQCLEAN_MQDSS48_AVX2_crypto_sign_open(
+ uint8_t *m, size_t *mlen,
+ const uint8_t *sm, size_t smlen, const uint8_t *pk) {
+ /* The API caller does not necessarily know what size a signature should be
+ but MQDSS signatures are always exactly SIG_LEN. */
+ if (smlen < SIG_LEN) {
+ memset(m, 0, smlen);
+ *mlen = 0;
+ return -1;
+ }
+
+ *mlen = smlen - SIG_LEN;
+
+ if (PQCLEAN_MQDSS48_AVX2_crypto_sign_verify(
+ sm, SIG_LEN, sm + SIG_LEN, *mlen, pk)) {
+ memset(m, 0, smlen);
+ *mlen = 0;
+ return -1;
+ }
+
+ /* If verification was successful, move the message to the right place. */
+ memmove(m, sm + SIG_LEN, *mlen);
+
+ return 0;
+}
diff --git a/crypto_sign/mqdss-48/clean/sign.c b/crypto_sign/mqdss-48/clean/sign.c
index fe4f2b75..57b11d66 100644
--- a/crypto_sign/mqdss-48/clean/sign.c
+++ b/crypto_sign/mqdss-48/clean/sign.c
@@ -1,4 +1,3 @@
-#include
#include
#include
#include
diff --git a/crypto_sign/mqdss-64/META.yml b/crypto_sign/mqdss-64/META.yml
index 28feaa8c..be2a6a1f 100644
--- a/crypto_sign/mqdss-64/META.yml
+++ b/crypto_sign/mqdss-64/META.yml
@@ -16,3 +16,12 @@ auxiliary-submitters:
implementations:
- name: clean
version: https://github.com/joostrijneveld/MQDSS/commit/00608d7610262ff07b1834885d32bc3fd27ef5e1
+ - name: avx2
+ version: https://github.com/joostrijneveld/MQDSS/commit/00608d7610262ff07b1834885d32bc3fd27ef5e1
+ supported_platforms:
+ - architecture: x86_64
+ required_flags:
+ - avx2
+ - architecture: x86
+ required_flags:
+ - avx2
diff --git a/crypto_sign/mqdss-64/avx2/LICENSE b/crypto_sign/mqdss-64/avx2/LICENSE
new file mode 100644
index 00000000..670154e3
--- /dev/null
+++ b/crypto_sign/mqdss-64/avx2/LICENSE
@@ -0,0 +1,116 @@
+CC0 1.0 Universal
+
+Statement of Purpose
+
+The laws of most jurisdictions throughout the world automatically confer
+exclusive Copyright and Related Rights (defined below) upon the creator and
+subsequent owner(s) (each and all, an "owner") of an original work of
+authorship and/or a database (each, a "Work").
+
+Certain owners wish to permanently relinquish those rights to a Work for the
+purpose of contributing to a commons of creative, cultural and scientific
+works ("Commons") that the public can reliably and without fear of later
+claims of infringement build upon, modify, incorporate in other works, reuse
+and redistribute as freely as possible in any form whatsoever and for any
+purposes, including without limitation commercial purposes. These owners may
+contribute to the Commons to promote the ideal of a free culture and the
+further production of creative, cultural and scientific works, or to gain
+reputation or greater distribution for their Work in part through the use and
+efforts of others.
+
+For these and/or other purposes and motivations, and without any expectation
+of additional consideration or compensation, the person associating CC0 with a
+Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
+and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
+and publicly distribute the Work under its terms, with knowledge of his or her
+Copyright and Related Rights in the Work and the meaning and intended legal
+effect of CC0 on those rights.
+
+1. Copyright and Related Rights. A Work made available under CC0 may be
+protected by copyright and related or neighboring rights ("Copyright and
+Related Rights"). Copyright and Related Rights include, but are not limited
+to, the following:
+
+ i. the right to reproduce, adapt, distribute, perform, display, communicate,
+ and translate a Work;
+
+ ii. moral rights retained by the original author(s) and/or performer(s);
+
+ iii. publicity and privacy rights pertaining to a person's image or likeness
+ depicted in a Work;
+
+ iv. rights protecting against unfair competition in regards to a Work,
+ subject to the limitations in paragraph 4(a), below;
+
+ v. rights protecting the extraction, dissemination, use and reuse of data in
+ a Work;
+
+ vi. database rights (such as those arising under Directive 96/9/EC of the
+ European Parliament and of the Council of 11 March 1996 on the legal
+ protection of databases, and under any national implementation thereof,
+ including any amended or successor version of such directive); and
+
+ vii. other similar, equivalent or corresponding rights throughout the world
+ based on applicable law or treaty, and any national implementations thereof.
+
+2. Waiver. To the greatest extent permitted by, but not in contravention of,
+applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
+unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
+and Related Rights and associated claims and causes of action, whether now
+known or unknown (including existing as well as future claims and causes of
+action), in the Work (i) in all territories worldwide, (ii) for the maximum
+duration provided by applicable law or treaty (including future time
+extensions), (iii) in any current or future medium and for any number of
+copies, and (iv) for any purpose whatsoever, including without limitation
+commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
+the Waiver for the benefit of each member of the public at large and to the
+detriment of Affirmer's heirs and successors, fully intending that such Waiver
+shall not be subject to revocation, rescission, cancellation, termination, or
+any other legal or equitable action to disrupt the quiet enjoyment of the Work
+by the public as contemplated by Affirmer's express Statement of Purpose.
+
+3. Public License Fallback. Should any part of the Waiver for any reason be
+judged legally invalid or ineffective under applicable law, then the Waiver
+shall be preserved to the maximum extent permitted taking into account
+Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
+is so judged Affirmer hereby grants to each affected person a royalty-free,
+non transferable, non sublicensable, non exclusive, irrevocable and
+unconditional license to exercise Affirmer's Copyright and Related Rights in
+the Work (i) in all territories worldwide, (ii) for the maximum duration
+provided by applicable law or treaty (including future time extensions), (iii)
+in any current or future medium and for any number of copies, and (iv) for any
+purpose whatsoever, including without limitation commercial, advertising or
+promotional purposes (the "License"). The License shall be deemed effective as
+of the date CC0 was applied by Affirmer to the Work. Should any part of the
+License for any reason be judged legally invalid or ineffective under
+applicable law, such partial invalidity or ineffectiveness shall not
+invalidate the remainder of the License, and in such case Affirmer hereby
+affirms that he or she will not (i) exercise any of his or her remaining
+Copyright and Related Rights in the Work or (ii) assert any associated claims
+and causes of action with respect to the Work, in either case contrary to
+Affirmer's express Statement of Purpose.
+
+4. Limitations and Disclaimers.
+
+ a. No trademark or patent rights held by Affirmer are waived, abandoned,
+ surrendered, licensed or otherwise affected by this document.
+
+ b. Affirmer offers the Work as-is and makes no representations or warranties
+ of any kind concerning the Work, express, implied, statutory or otherwise,
+ including without limitation warranties of title, merchantability, fitness
+ for a particular purpose, non infringement, or the absence of latent or
+ other defects, accuracy, or the present or absence of errors, whether or not
+ discoverable, all to the greatest extent permissible under applicable law.
+
+ c. Affirmer disclaims responsibility for clearing rights of other persons
+ that may apply to the Work or any use thereof, including without limitation
+ any person's Copyright and Related Rights in the Work. Further, Affirmer
+ disclaims responsibility for obtaining any necessary consents, permissions
+ or other rights required for any use of the Work.
+
+ d. Affirmer understands and acknowledges that Creative Commons is not a
+ party to this document and has no duty or obligation with respect to this
+ CC0 or use of the Work.
+
+For more information, please see
+
diff --git a/crypto_sign/mqdss-64/avx2/Makefile b/crypto_sign/mqdss-64/avx2/Makefile
new file mode 100644
index 00000000..940ebbd4
--- /dev/null
+++ b/crypto_sign/mqdss-64/avx2/Makefile
@@ -0,0 +1,22 @@
+# This Makefile can be used with GNU Make or BSD Make
+
+LIB=libmqdss-64_avx2.a
+
+HEADERS = params.h gf31.h mq.h api.h
+OBJECTS = gf31.o mq.o sign.o
+
+CFLAGS=-O3 -Wall -Wconversion -Wextra -Wpedantic -Wvla -Werror \
+ -Wmissing-prototypes -Wredundant-decls -std=c99 -mavx2 \
+ -I../../../common $(EXTRAFLAGS)
+
+all: $(LIB)
+
+%.o: %.c $(HEADERS)
+ $(CC) $(CFLAGS) -c -o $@ $<
+
+$(LIB): $(OBJECTS)
+ $(AR) -r $@ $(OBJECTS)
+
+clean:
+ $(RM) $(OBJECTS)
+ $(RM) $(LIB)
diff --git a/crypto_sign/mqdss-64/avx2/Makefile.Microsoft_nmake b/crypto_sign/mqdss-64/avx2/Makefile.Microsoft_nmake
new file mode 100644
index 00000000..9a3e768c
--- /dev/null
+++ b/crypto_sign/mqdss-64/avx2/Makefile.Microsoft_nmake
@@ -0,0 +1,19 @@
+# This Makefile can be used with Microsoft Visual Studio's nmake using the command:
+# nmake /f Makefile.Microsoft_nmake
+
+LIBRARY=libmqdss-64_clean.lib
+OBJECTS=gf31.obj mq.obj sign.obj
+
+CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX /arch:AVX2
+
+all: $(LIBRARY)
+
+# Make sure objects are recompiled if headers change.
+$(OBJECTS): *.h
+
+$(LIBRARY): $(OBJECTS)
+ LIB.EXE /NOLOGO /WX /OUT:$@ $**
+
+clean:
+ -DEL $(OBJECTS)
+ -DEL $(LIBRARY)
diff --git a/crypto_sign/mqdss-64/avx2/api.h b/crypto_sign/mqdss-64/avx2/api.h
new file mode 100644
index 00000000..91326baa
--- /dev/null
+++ b/crypto_sign/mqdss-64/avx2/api.h
@@ -0,0 +1,47 @@
+#ifndef PQCLEAN_MQDSS64_AVX2_API_H
+#define PQCLEAN_MQDSS64_AVX2_API_H
+
+#include
+#include
+
+#define PQCLEAN_MQDSS64_AVX2_CRYPTO_ALGNAME "MQDSS-64"
+
+#define PQCLEAN_MQDSS64_AVX2_CRYPTO_SECRETKEYBYTES 24
+#define PQCLEAN_MQDSS64_AVX2_CRYPTO_PUBLICKEYBYTES 64
+#define PQCLEAN_MQDSS64_AVX2_CRYPTO_BYTES 59928
+
+/*
+ * Generates an MQDSS key pair.
+ */
+int PQCLEAN_MQDSS64_AVX2_crypto_sign_keypair(
+ uint8_t *pk, uint8_t *sk);
+
+/**
+ * Returns an array containing a detached signature.
+ */
+int PQCLEAN_MQDSS64_AVX2_crypto_sign_signature(
+ uint8_t *sig, size_t *siglen,
+ const uint8_t *m, size_t mlen, const uint8_t *sk);
+
+/**
+ * Verifies a detached signature and message under a given public key.
+ */
+int PQCLEAN_MQDSS64_AVX2_crypto_sign_verify(
+ const uint8_t *sig, size_t siglen,
+ const uint8_t *m, size_t mlen, const uint8_t *pk);
+
+/**
+ * Returns an array containing the signature followed by the message.
+ */
+int PQCLEAN_MQDSS64_AVX2_crypto_sign(
+ uint8_t *sm, size_t *smlen,
+ const uint8_t *m, size_t mlen, const uint8_t *sk);
+
+/**
+ * Verifies a given signature-message pair under a given public key.
+ */
+int PQCLEAN_MQDSS64_AVX2_crypto_sign_open(
+ uint8_t *m, size_t *mlen,
+ const uint8_t *sm, size_t smlen, const uint8_t *pk);
+
+#endif
diff --git a/crypto_sign/mqdss-64/avx2/gf31.c b/crypto_sign/mqdss-64/avx2/gf31.c
new file mode 100644
index 00000000..5f65eb77
--- /dev/null
+++ b/crypto_sign/mqdss-64/avx2/gf31.c
@@ -0,0 +1,128 @@
+#include "params.h"
+#include "fips202.h"
+#include "gf31.h"
+#include
+#include
+#include
+#include
+
+/* Given a vector of N elements in the range [0, 31], this reduces the elements
+ to the range [0, 30] by mapping 31 to 0 (i.e reduction mod 31) */
+void PQCLEAN_MQDSS64_AVX2_vgf31_unique(gf31 *out, gf31 *in) {
+ __m256i x;
+ __m256i _w31 = _mm256_set1_epi16(31);
+ int i;
+
+ for (i = 0; i < (N >> 4); ++i) {
+ x = _mm256_loadu_si256((__m256i const *) (in + 16 * i));
+ x = _mm256_xor_si256(x, _mm256_and_si256(_w31, _mm256_cmpeq_epi16(x, _w31)));
+ _mm256_storeu_si256((__m256i *)(out + i * 16), x);
+ }
+}
+
+/* This function acts on vectors with 64 gf31 elements.
+It performs one reduction step and guarantees output in [0, 30],
+but requires input to be in [0, 32768). */
+void PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(gf31 *out, gf31 *in) {
+ __m256i x;
+ __m256i _w2114 = _mm256_set1_epi32(2114 * 65536 + 2114);
+ __m256i _w31 = _mm256_set1_epi16(31);
+ int i;
+
+ for (i = 0; i < (N >> 4); ++i) {
+ x = _mm256_loadu_si256((__m256i const *) (in + 16 * i));
+ x = _mm256_sub_epi16(x, _mm256_mullo_epi16(_w31, _mm256_mulhi_epi16(x, _w2114)));
+ x = _mm256_xor_si256(x, _mm256_and_si256(_w31, _mm256_cmpeq_epi16(x, _w31)));
+ _mm256_storeu_si256((__m256i *)(out + i * 16), x);
+ }
+}
+
+/* Given a seed, samples len gf31 elements (in the range [0, 30]), and places
+ them in a vector of 16-bit elements */
+void PQCLEAN_MQDSS64_AVX2_gf31_nrand(gf31 *out, size_t len, const uint8_t *seed, size_t seedlen) {
+ size_t i = 0, j;
+ shake256ctx shakestate;
+ uint8_t shakeblock[SHAKE256_RATE];
+
+ shake256_absorb(&shakestate, seed, seedlen);
+
+ while (i < len) {
+ shake256_squeezeblocks(shakeblock, 1, &shakestate);
+ for (j = 0; j < SHAKE256_RATE && i < len; j++) {
+ if ((shakeblock[j] & 31) != 31) {
+ out[i] = (shakeblock[j] & 31);
+ i++;
+ }
+ }
+ }
+ shake256_ctx_release(&shakestate);
+}
+
+/* Given a seed, samples len gf31 elements, transposed into unsigned range,
+ i.e. in the range [-15, 15], and places them in an array of 8-bit integers.
+ This is used for the expansion of F, which wants packed elements. */
+void PQCLEAN_MQDSS64_AVX2_gf31_nrand_schar(signed char *out, size_t len, const uint8_t *seed, size_t seedlen) {
+ size_t i = 0, j;
+ shake256ctx shakestate;
+ uint8_t shakeblock[SHAKE256_RATE];
+
+ shake256_absorb(&shakestate, seed, seedlen);
+
+ while (i < len) {
+ shake256_squeezeblocks(shakeblock, 1, &shakestate);
+ for (j = 0; j < SHAKE256_RATE && i < len; j++) {
+ if ((shakeblock[j] & 31) != 31) {
+ out[i] = (signed char)((shakeblock[j] & 31) - 15);
+ i++;
+ }
+ }
+ }
+ shake256_ctx_release(&shakestate);
+
+}
+
+/* Unpacks an array of packed GF31 elements to one element per gf31.
+ Assumes that there is sufficient empty space available at the end of the
+ array to unpack. Can perform in-place. */
+void PQCLEAN_MQDSS64_AVX2_gf31_nunpack(gf31 *out, const uint8_t *in, size_t n) {
+ size_t i;
+ size_t j = ((n * 5) >> 3) - 1;
+ unsigned int d = 0;
+
+ for (i = n; i > 0; i--) {
+ out[i - 1] = (gf31)((in[j] >> d) & 31);
+ d += 5;
+ if (d > 8) {
+ d -= 8;
+ j--;
+ out[i - 1] = (gf31)(out[i - 1] ^ ((in[j] << (5 - d)) & 31));
+ }
+ }
+}
+
+/* Packs an array of GF31 elements from gf31's to concatenated 5-bit values.
+ Assumes that there is sufficient space available to unpack.
+ Can perform in-place. */
+void PQCLEAN_MQDSS64_AVX2_gf31_npack(uint8_t *out, const gf31 *in, size_t n) {
+ unsigned int i = 0;
+ unsigned int j;
+ int d = 3;
+
+ for (j = 0; j < n; j++) {
+ assert(in[j] < 31);
+ }
+
+ /* There will be ceil(5n / 8) output blocks */
+ memset(out, 0, (size_t)((5 * n + 7) & ~7U) >> 3);
+
+ for (j = 0; j < n; j++) {
+ if (d < 0) {
+ d += 8;
+ out[i] = (uint8_t)((out[i] & (255 << (d - 3))) |
+ ((in[j] >> (8 - d)) & ~(255 << (d - 3))));
+ i++;
+ }
+ out[i] = (uint8_t)((out[i] & ~(31 << d)) | ((in[j] << d) & (31 << d)));
+ d -= 5;
+ }
+}
diff --git a/crypto_sign/mqdss-64/avx2/gf31.h b/crypto_sign/mqdss-64/avx2/gf31.h
new file mode 100644
index 00000000..556df9be
--- /dev/null
+++ b/crypto_sign/mqdss-64/avx2/gf31.h
@@ -0,0 +1,36 @@
+#ifndef MQDSS_GF31_H
+#define MQDSS_GF31_H
+
+#include
+#include
+
+typedef unsigned short gf31;
+
+/* Given a vector of elements in the range [0, 31], this reduces the elements
+ to the range [0, 30] by mapping 31 to 0 (i.e reduction mod 31) */
+void PQCLEAN_MQDSS64_AVX2_vgf31_unique(gf31 *out, gf31 *in);
+
+/* Given a vector of 16-bit integers (i.e. in [0, 65535], this reduces the
+ elements to the range [0, 30] by mapping 31 to 0 (i.e reduction mod 31) */
+void PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(gf31 *out, gf31 *in);
+
+/* Given a seed, samples len gf31 elements (in the range [0, 30]), and places
+ them in a vector of 16-bit elements */
+void PQCLEAN_MQDSS64_AVX2_gf31_nrand(gf31 *out, size_t len, const uint8_t *seed, size_t seedlen);
+
+/* Given a seed, samples len gf31 elements, transposed into unsigned range,
+ i.e. in the range [-15, 15], and places them in an array of 8-bit integers.
+ This is used for the expansion of F, which wants packed elements. */
+void PQCLEAN_MQDSS64_AVX2_gf31_nrand_schar(signed char *out, size_t len, const uint8_t *seed, size_t seedlen);
+
+/* Unpacks an array of packed GF31 elements to one element per gf31.
+ Assumes that there is sufficient empty space available at the end of the
+ array to unpack. Can perform in-place. */
+void PQCLEAN_MQDSS64_AVX2_gf31_nunpack(gf31 *out, const uint8_t *in, size_t n);
+
+/* Packs an array of GF31 elements from gf31's to concatenated 5-bit values.
+ Assumes that there is sufficient space available to unpack.
+ Can perform in-place. */
+void PQCLEAN_MQDSS64_AVX2_gf31_npack(uint8_t *out, const gf31 *in, size_t n);
+
+#endif
diff --git a/crypto_sign/mqdss-64/avx2/mq.c b/crypto_sign/mqdss-64/avx2/mq.c
new file mode 100644
index 00000000..b44e85fe
--- /dev/null
+++ b/crypto_sign/mqdss-64/avx2/mq.c
@@ -0,0 +1,239 @@
+#include "mq.h"
+#include "params.h"
+#include
+#include
+
+static inline __m256i reduce_16(__m256i r, __m256i _w31, __m256i _w2114) {
+ __m256i exp = _mm256_mulhi_epi16(r, _w2114);
+ return _mm256_sub_epi16(r, _mm256_mullo_epi16(_w31, exp));
+}
+
+/* Computes all products x_i * x_j, returns in reduced form */
+inline static
+void generate_quadratic_terms( unsigned char *xij, const gf31 *x ) {
+ __m256i mask_2114 = _mm256_set1_epi16( 2114 );
+ __m256i mask_31 = _mm256_set1_epi16( 31 );
+ __m256i xi[4];
+ xi[0] = _mm256_loadu_si256((__m256i const *) (x));
+ xi[1] = _mm256_loadu_si256((__m256i const *) (x + 16));
+ xi[2] = _mm256_loadu_si256((__m256i const *) (x + 32));
+ xi[3] = _mm256_loadu_si256((__m256i const *) (x + 48));
+
+ __m256i xixj[4];
+ xixj[0] = _mm256_setzero_si256();
+ xixj[1] = _mm256_setzero_si256();
+ xixj[2] = _mm256_setzero_si256();
+ xixj[3] = _mm256_setzero_si256();
+
+ int k = 0;
+ for (int i = 0; i < 32; i++) {
+ __m256i br_xi = _mm256_set1_epi16( (short)x[i] );
+ for (int j = 0; j <= (i >> 4); j++) {
+ xixj[j] = _mm256_mullo_epi16( xi[j], br_xi );
+ xixj[j] = reduce_16( xixj[j], mask_31, mask_2114 );
+ }
+
+ __m256i r = _mm256_packs_epi16(xixj[0], xixj[1]);
+ r = _mm256_permute4x64_epi64(r, 0xd8); // 3,1,2,0
+ _mm256_storeu_si256( (__m256i *)( xij + k ), r );
+ k += i + 1;
+ }
+
+ for (int i = 32; i < N; i++) {
+ __m256i br_xi = _mm256_set1_epi16( (short)x[i] );
+ for (int j = 0; j <= (i >> 4); j++) {
+ xixj[j] = _mm256_mullo_epi16( xi[j], br_xi );
+ xixj[j] = reduce_16( xixj[j], mask_31, mask_2114 );
+ }
+
+ __m256i r0 = _mm256_packs_epi16(xixj[0], xixj[1]);
+ r0 = _mm256_permute4x64_epi64(r0, 0xd8); // 3,1,2,0
+ _mm256_storeu_si256( (__m256i *)( xij + k ), r0 );
+ __m256i r1 = _mm256_packs_epi16(xixj[2], xixj[3]);
+ r1 = _mm256_permute4x64_epi64(r1, 0xd8); // 3,1,2,0
+ _mm256_storeu_si256( (__m256i *)( xij + 32 + k ), r1 );
+ k += i + 1;
+ }
+}
+
+/* Computes all terms (x_i * y_j) + (x_j * y_i), returns in reduced form */
+inline static
+void generate_xiyj_p_xjyi_terms( unsigned char *xij, const gf31 *x, const gf31 *y ) {
+ __m256i mask_2114 = _mm256_set1_epi16( 2114 );
+ __m256i mask_31 = _mm256_set1_epi16( 31 );
+ __m256i xiyi[4];
+ xiyi[0] = _mm256_xor_si256(_mm256_loadu_si256((__m256i const *) (x)), _mm256_slli_si256( _mm256_loadu_si256((__m256i const *) (y)), 1 ));
+ xiyi[1] = _mm256_xor_si256(_mm256_loadu_si256((__m256i const *) (x + 16)), _mm256_slli_si256( _mm256_loadu_si256((__m256i const *) (y + 16)), 1 ));
+ xiyi[2] = _mm256_xor_si256(_mm256_loadu_si256((__m256i const *) (x + 32)), _mm256_slli_si256( _mm256_loadu_si256((__m256i const *) (y + 32)), 1 ));
+ xiyi[3] = _mm256_xor_si256(_mm256_loadu_si256((__m256i const *) (x + 48)), _mm256_slli_si256( _mm256_loadu_si256((__m256i const *) (y + 48)), 1 ));
+
+ __m256i xixj[4];
+ xixj[0] = _mm256_setzero_si256();
+ xixj[1] = _mm256_setzero_si256();
+ xixj[2] = _mm256_setzero_si256();
+ xixj[3] = _mm256_setzero_si256();
+
+ int k = 0;
+ for (int i = 0; i < 32; i++) {
+ __m256i br_yixi = _mm256_set1_epi16( (short)((x[i] << 8)^y[i]) );
+ for (int j = 0; j <= (i >> 4); j++) {
+ xixj[j] = _mm256_maddubs_epi16( xiyi[j], br_yixi );
+ xixj[j] = reduce_16( xixj[j], mask_31, mask_2114 );
+ }
+
+ __m256i r = _mm256_packs_epi16(xixj[0], xixj[1]);
+ r = _mm256_permute4x64_epi64(r, 0xd8); // 3,1,2,0
+ _mm256_storeu_si256( (__m256i *)( xij + k ), r );
+ k += i + 1;
+ }
+
+ for (int i = 32; i < N; i++) {
+ __m256i br_yixi = _mm256_set1_epi16( (short)((x[i] << 8)^y[i]) );
+ for (int j = 0; j <= (i >> 4); j++) {
+ xixj[j] = _mm256_maddubs_epi16( xiyi[j], br_yixi );
+ xixj[j] = reduce_16( xixj[j], mask_31, mask_2114 );
+ }
+
+ __m256i r0 = _mm256_packs_epi16(xixj[0], xixj[1]);
+ r0 = _mm256_permute4x64_epi64(r0, 0xd8); // 3,1,2,0
+ _mm256_storeu_si256( (__m256i *)( xij + k ), r0 );
+ __m256i r1 = _mm256_packs_epi16(xixj[2], xixj[3]);
+ r1 = _mm256_permute4x64_epi64(r1, 0xd8); // 3,1,2,0
+ _mm256_storeu_si256( (__m256i *)( xij + 32 + k ), r1 );
+ k += i + 1;
+ }
+}
+
+#define EVAL_YMM_0(xx) {\
+ __m128i tmp = _mm256_castsi256_si128(xx); \
+ for (int macro_i = 0; macro_i < 8; macro_i++) { \
+ __m256i _xi = _mm256_broadcastw_epi16(tmp); \
+ tmp = _mm_srli_si128(tmp, 2); \
+ for (int macro_j = 0; macro_j < (N/16); macro_j++) { \
+ __m256i coeff = _mm256_loadu_si256((__m256i const *) F); \
+ F += 32; \
+ yy[macro_j] = _mm256_add_epi16(yy[macro_j], _mm256_maddubs_epi16(_xi, coeff)); \
+ } \
+ } \
+ }
+
+#define EVAL_YMM_1(xx) {\
+ __m128i tmp = _mm256_extracti128_si256(xx, 1); \
+ for (int macro_i = 0; macro_i < 8; macro_i++) { \
+ __m256i _xi = _mm256_broadcastw_epi16(tmp); \
+ tmp = _mm_srli_si128(tmp, 2); \
+ for (int macro_j = 0; macro_j < (N/16); macro_j++) { \
+ __m256i coeff = _mm256_loadu_si256((__m256i const *) F); \
+ F += 32; \
+ yy[macro_j] = _mm256_add_epi16(yy[macro_j], _mm256_maddubs_epi16(_xi, coeff)); \
+ } \
+ } \
+ }
+
+#define REDUCE_(yy) { \
+ (yy)[0] = reduce_16((yy)[0], mask_reduce, mask_2114); \
+ (yy)[1] = reduce_16((yy)[1], mask_reduce, mask_2114); \
+ (yy)[2] = reduce_16((yy)[2], mask_reduce, mask_2114); \
+ (yy)[3] = reduce_16((yy)[3], mask_reduce, mask_2114); \
+ }
+
+
+/* Evaluates the MQ function on a vector of N gf31 elements x (expected to be
+ in reduced 5-bit representation). Expects the coefficients in F to be in
+ signed representation (i.e. [-15, 15], packed bytewise).
+ Outputs M gf31 elements in unique 16-bit representation as fx. */
+void PQCLEAN_MQDSS64_AVX2_MQ(gf31 *fx, const gf31 *x, const signed char *F) {
+ __m256i mask_2114 = _mm256_set1_epi32(2114 * 65536 + 2114);
+ __m256i mask_reduce = _mm256_srli_epi16(_mm256_cmpeq_epi16(mask_2114, mask_2114), 11);
+
+ __m256i xi[4];
+ xi[0] = _mm256_loadu_si256((__m256i const *) (x));
+ xi[1] = _mm256_loadu_si256((__m256i const *) (x + 16));
+ xi[2] = _mm256_loadu_si256((__m256i const *) (x + 32));
+ xi[3] = _mm256_loadu_si256((__m256i const *) (x + 48));
+
+ __m256i _zero = _mm256_setzero_si256();
+ xi[0] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_zero, xi[0])), xi[0]);
+ xi[1] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_zero, xi[1])), xi[1]);
+ xi[2] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_zero, xi[2])), xi[2]);
+ xi[3] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_zero, xi[3])), xi[3]);
+
+ __m256i x1 = _mm256_packs_epi16(xi[0], xi[1]);
+ x1 = _mm256_permute4x64_epi64(x1, 0xd8); // 3,1,2,0
+ __m256i x2 = _mm256_packs_epi16(xi[2], xi[3]);
+ x2 = _mm256_permute4x64_epi64(x2, 0xd8); // 3,1,2,0
+
+ __m256i yy[M / 16];
+ yy[0] = _zero;
+ yy[1] = _zero;
+ yy[2] = _zero;
+ yy[3] = _zero;
+
+ EVAL_YMM_0(x1)
+ EVAL_YMM_1(x1)
+ EVAL_YMM_0(x2)
+ EVAL_YMM_1(x2)
+ REDUCE_(yy)
+
+ __m256i xixj[65];
+ generate_quadratic_terms( (unsigned char *) xixj, x );
+ for (int i = 0 ; i < 64 ; i += 2) {
+ EVAL_YMM_0(xixj[i])
+ EVAL_YMM_1(xixj[i])
+ EVAL_YMM_0(xixj[i + 1])
+ EVAL_YMM_1(xixj[i + 1])
+ REDUCE_(yy)
+ }
+ EVAL_YMM_0(xixj[64])
+ EVAL_YMM_1(xixj[64])
+ REDUCE_(yy)
+
+ yy[0] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[0])), yy[0]);
+ yy[1] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[1])), yy[1]);
+ yy[2] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[2])), yy[2]);
+ yy[3] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[3])), yy[3]);
+
+ for (int i = 0; i < (N / 16); ++i) {
+ _mm256_storeu_si256((__m256i *)(fx + i * 16), yy[i]);
+ }
+}
+
+/* Evaluates the bilinear polar form of the MQ function (i.e. G) on a vector of
+ N gf31 elements x (expected to be in reduced 5-bit representation). Expects
+ the coefficients in F to be in signed representation (i.e. [-15, 15], packed
+ bytewise). Outputs M gf31 elements in unique 16-bit representation as fx. */
+void PQCLEAN_MQDSS64_AVX2_G(gf31 *fx, const gf31 *x, const gf31 *y, const signed char *F) {
+ __m256i mask_2114 = _mm256_set1_epi32(2114 * 65536 + 2114);
+ __m256i mask_reduce = _mm256_srli_epi16(_mm256_cmpeq_epi16(mask_2114, mask_2114), 11);
+ __m256i _zero = _mm256_setzero_si256();
+
+ __m256i yy[(M / 16)];
+ yy[0] = _zero;
+ yy[1] = _zero;
+ yy[2] = _zero;
+ yy[3] = _zero;
+
+ F += N * M;
+
+ __m256i xixj[65];
+ generate_xiyj_p_xjyi_terms( (unsigned char *) xixj, x, y );
+ for (int i = 0 ; i < 64 ; i += 2) {
+ EVAL_YMM_0(xixj[i])
+ EVAL_YMM_1(xixj[i])
+ EVAL_YMM_0(xixj[i + 1])
+ EVAL_YMM_1(xixj[i + 1])
+ REDUCE_(yy)
+ }
+ EVAL_YMM_0(xixj[64])
+ EVAL_YMM_1(xixj[64])
+ REDUCE_(yy)
+
+ yy[0] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[0])), yy[0]);
+ yy[1] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[1])), yy[1]);
+ yy[2] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[2])), yy[2]);
+ yy[3] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[3])), yy[3]);
+
+ for (int i = 0; i < (N / 16); ++i) {
+ _mm256_storeu_si256((__m256i *)(fx + i * 16), yy[i]);
+ }
+}
diff --git a/crypto_sign/mqdss-64/avx2/mq.h b/crypto_sign/mqdss-64/avx2/mq.h
new file mode 100644
index 00000000..179555a3
--- /dev/null
+++ b/crypto_sign/mqdss-64/avx2/mq.h
@@ -0,0 +1,18 @@
+#ifndef MQDSS_MQ_H
+#define MQDSS_MQ_H
+
+#include "gf31.h"
+
+/* Evaluates the MQ function on a vector of N gf31 elements x (expected to be
+ in reduced 5-bit representation). Expects the coefficients in F to be in
+ signed representation (i.e. [-15, 15], packed bytewise).
+ Outputs M gf31 elements in unique 16-bit representation as fx. */
+void PQCLEAN_MQDSS64_AVX2_MQ(gf31 *fx, const gf31 *x, const signed char *F);
+
+/* Evaluates the bilinear polar form of the MQ function (i.e. G) on a vector of
+ N gf31 elements x (expected to be in reduced 5-bit representation). Expects
+ the coefficients in F to be in signed representation (i.e. [-15, 15], packed
+ bytewise). Outputs M gf31 elements in unique 16-bit representation as fx. */
+void PQCLEAN_MQDSS64_AVX2_G(gf31 *fx, const gf31 *x, const gf31 *y, const signed char *F);
+
+#endif
diff --git a/crypto_sign/mqdss-64/avx2/params.h b/crypto_sign/mqdss-64/avx2/params.h
new file mode 100644
index 00000000..d0278f01
--- /dev/null
+++ b/crypto_sign/mqdss-64/avx2/params.h
@@ -0,0 +1,25 @@
+#ifndef MQDSS_PARAMS_H
+#define MQDSS_PARAMS_H
+
+#define N 64
+#define M N
+#define F_LEN (M * (((N * (N + 1)) >> 1) + N)) /* Number of elements in F */
+
+#define ROUNDS 277
+
+/* Number of bytes that N, M and F_LEN elements require when packed into a byte
+ array, 5-bit elements packed continuously. */
+/* Assumes N and M to be multiples of 8 */
+#define NPACKED_BYTES ((N * 5) >> 3)
+#define MPACKED_BYTES ((M * 5) >> 3)
+#define FPACKED_BYTES ((F_LEN * 5) >> 3)
+
+#define HASH_BYTES 48
+#define SEED_BYTES 24
+#define PK_BYTES (SEED_BYTES + MPACKED_BYTES)
+#define SK_BYTES SEED_BYTES
+
+// R, sigma_0, ROUNDS * (t1, r{0,1}, e1, c, rho)
+#define SIG_LEN (2 * HASH_BYTES + ROUNDS * (2*NPACKED_BYTES + MPACKED_BYTES + HASH_BYTES + HASH_BYTES))
+
+#endif
diff --git a/crypto_sign/mqdss-64/avx2/sign.c b/crypto_sign/mqdss-64/avx2/sign.c
new file mode 100644
index 00000000..47940a5b
--- /dev/null
+++ b/crypto_sign/mqdss-64/avx2/sign.c
@@ -0,0 +1,389 @@
+#include
+#include
+#include
+
+#include "api.h"
+#include "fips202.h"
+#include "gf31.h"
+#include "mq.h"
+#include "params.h"
+#include "randombytes.h"
+
+/* Takes an array of len bytes and computes a hash digest.
+ This is used as a hash function in the Fiat-Shamir transform. */
+static void H(unsigned char *out, const unsigned char *in, const size_t len) {
+ shake256(out, HASH_BYTES, in, len);
+}
+
+/* Takes two arrays of N packed elements and an array of M packed elements,
+ and computes a HASH_BYTES commitment. */
+static void com_0(unsigned char *c,
+ const unsigned char *rho,
+ const unsigned char *inn, const unsigned char *inn2,
+ const unsigned char *inm) {
+ unsigned char buffer[HASH_BYTES + 2 * NPACKED_BYTES + MPACKED_BYTES];
+ memcpy(buffer, rho, HASH_BYTES);
+ memcpy(buffer + HASH_BYTES, inn, NPACKED_BYTES);
+ memcpy(buffer + HASH_BYTES + NPACKED_BYTES, inn2, NPACKED_BYTES);
+ memcpy(buffer + HASH_BYTES + 2 * NPACKED_BYTES, inm, MPACKED_BYTES);
+ shake256(c, HASH_BYTES, buffer, HASH_BYTES + 2 * NPACKED_BYTES + MPACKED_BYTES);
+}
+
+/* Takes an array of N packed elements and an array of M packed elements,
+ and computes a HASH_BYTES commitment. */
+static void com_1(unsigned char *c,
+ const unsigned char *rho,
+ const unsigned char *inn, const unsigned char *inm) {
+ unsigned char buffer[HASH_BYTES + NPACKED_BYTES + MPACKED_BYTES];
+ memcpy(buffer, rho, HASH_BYTES);
+ memcpy(buffer + HASH_BYTES, inn, NPACKED_BYTES);
+ memcpy(buffer + HASH_BYTES + NPACKED_BYTES, inm, MPACKED_BYTES);
+ shake256(c, HASH_BYTES, buffer, HASH_BYTES + NPACKED_BYTES + MPACKED_BYTES);
+}
+
+/*
+ * Generates an MQDSS key pair.
+ */
+int PQCLEAN_MQDSS64_AVX2_crypto_sign_keypair(uint8_t *pk, uint8_t *sk) {
+ signed char F[F_LEN];
+ unsigned char skbuf[SEED_BYTES * 2];
+ gf31 sk_gf31[N];
+ gf31 pk_gf31[M];
+
+ // Expand sk to obtain a seed for F and the secret input s.
+ // We also expand to obtain a value for sampling r0, t0 and e0 during
+ // signature generation, but that is not relevant here.
+ randombytes(sk, SEED_BYTES);
+ shake256(skbuf, SEED_BYTES * 2, sk, SEED_BYTES);
+
+ memcpy(pk, skbuf, SEED_BYTES);
+ PQCLEAN_MQDSS64_AVX2_gf31_nrand_schar(F, F_LEN, pk, SEED_BYTES);
+ PQCLEAN_MQDSS64_AVX2_gf31_nrand(sk_gf31, N, skbuf + SEED_BYTES, SEED_BYTES);
+ PQCLEAN_MQDSS64_AVX2_MQ(pk_gf31, sk_gf31, F);
+ PQCLEAN_MQDSS64_AVX2_vgf31_unique(pk_gf31, pk_gf31);
+ PQCLEAN_MQDSS64_AVX2_gf31_npack(pk + SEED_BYTES, pk_gf31, M);
+
+ return 0;
+}
+
+/**
+ * Returns an array containing a detached signature.
+ */
+int PQCLEAN_MQDSS64_AVX2_crypto_sign_signature(
+ uint8_t *sig, size_t *siglen,
+ const uint8_t *m, size_t mlen, const uint8_t *sk) {
+
+ signed char F[F_LEN];
+ unsigned char skbuf[SEED_BYTES * 4];
+ gf31 pk_gf31[M];
+ unsigned char pk[SEED_BYTES + MPACKED_BYTES];
+ // Concatenated for convenient hashing.
+ unsigned char D_sigma0_h0_sigma1[HASH_BYTES * 3 + ROUNDS * (NPACKED_BYTES + MPACKED_BYTES)];
+ unsigned char *D = D_sigma0_h0_sigma1;
+ unsigned char *sigma0 = D_sigma0_h0_sigma1 + HASH_BYTES;
+ unsigned char *h0 = D_sigma0_h0_sigma1 + 2 * HASH_BYTES;
+ unsigned char *t1packed = D_sigma0_h0_sigma1 + 3 * HASH_BYTES;
+ unsigned char *e1packed = D_sigma0_h0_sigma1 + 3 * HASH_BYTES + ROUNDS * NPACKED_BYTES;
+ shake256ctx shakestate;
+ unsigned char shakeblock[SHAKE256_RATE];
+ unsigned char h1[((ROUNDS + 7) & ~7) >> 3];
+ unsigned char rnd_seed[HASH_BYTES + SEED_BYTES];
+ unsigned char rho[2 * ROUNDS * HASH_BYTES];
+ unsigned char *rho0 = rho;
+ unsigned char *rho1 = rho + ROUNDS * HASH_BYTES;
+ gf31 sk_gf31[N];
+ gf31 rnd[(2 * N + M) * ROUNDS]; // Concatenated for easy RNG.
+ gf31 *r0 = rnd;
+ gf31 *t0 = rnd + N * ROUNDS;
+ gf31 *e0 = rnd + 2 * N * ROUNDS;
+ gf31 r1[N * ROUNDS];
+ gf31 t1[N * ROUNDS];
+ gf31 e1[M * ROUNDS];
+ gf31 gx[M * ROUNDS];
+ unsigned char packbuf0[NPACKED_BYTES];
+ unsigned char packbuf1[NPACKED_BYTES];
+ unsigned char packbuf2[MPACKED_BYTES];
+ unsigned char c[HASH_BYTES * ROUNDS * 2];
+ gf31 alpha;
+ int alpha_count = 0;
+ int b;
+ int i, j;
+ shake256incctx state;
+
+ shake256(skbuf, SEED_BYTES * 4, sk, SEED_BYTES);
+
+ PQCLEAN_MQDSS64_AVX2_gf31_nrand_schar(F, F_LEN, skbuf, SEED_BYTES);
+
+ shake256_inc_init(&state);
+ shake256_inc_absorb(&state, sk, SEED_BYTES);
+ shake256_inc_absorb(&state, m, mlen);
+ shake256_inc_finalize(&state);
+ shake256_inc_squeeze(sig, HASH_BYTES, &state); // Compute R.
+ shake256_inc_ctx_release(&state);
+
+ memcpy(pk, skbuf, SEED_BYTES);
+ PQCLEAN_MQDSS64_AVX2_gf31_nrand(sk_gf31, N, skbuf + SEED_BYTES, SEED_BYTES);
+ PQCLEAN_MQDSS64_AVX2_MQ(pk_gf31, sk_gf31, F);
+ PQCLEAN_MQDSS64_AVX2_vgf31_unique(pk_gf31, pk_gf31);
+ PQCLEAN_MQDSS64_AVX2_gf31_npack(pk + SEED_BYTES, pk_gf31, M);
+
+ shake256_inc_init(&state);
+ shake256_inc_absorb(&state, pk, PK_BYTES);
+ shake256_inc_absorb(&state, sig, HASH_BYTES);
+ shake256_inc_absorb(&state, m, mlen);
+ shake256_inc_finalize(&state);
+ shake256_inc_squeeze(D, HASH_BYTES, &state);
+ shake256_inc_ctx_release(&state);
+
+ sig += HASH_BYTES; // Compensate for prefixed R.
+
+ memcpy(rnd_seed, skbuf + 2 * SEED_BYTES, SEED_BYTES);
+ memcpy(rnd_seed + SEED_BYTES, D, HASH_BYTES);
+ shake256(rho, 2 * ROUNDS * HASH_BYTES, rnd_seed, SEED_BYTES + HASH_BYTES);
+
+ memcpy(rnd_seed, skbuf + 3 * SEED_BYTES, SEED_BYTES);
+ memcpy(rnd_seed + SEED_BYTES, D, HASH_BYTES);
+ PQCLEAN_MQDSS64_AVX2_gf31_nrand(rnd, (2 * N + M) * ROUNDS, rnd_seed, SEED_BYTES + HASH_BYTES);
+
+ for (i = 0; i < ROUNDS; i++) {
+ for (j = 0; j < N; j++) {
+ r1[j + i * N] = (gf31)(31 + sk_gf31[j] - r0[j + i * N]);
+ }
+ PQCLEAN_MQDSS64_AVX2_G(gx + i * M, t0 + i * N, r1 + i * N, F);
+ }
+ for (i = 0; i < ROUNDS * M; i++) {
+ gx[i] = (gf31)(gx[i] + e0[i]);
+ }
+ for (i = 0; i < ROUNDS; i++) {
+ PQCLEAN_MQDSS64_AVX2_gf31_npack(packbuf0, r0 + i * N, N);
+ PQCLEAN_MQDSS64_AVX2_gf31_npack(packbuf1, t0 + i * N, N);
+ PQCLEAN_MQDSS64_AVX2_gf31_npack(packbuf2, e0 + i * M, M);
+ com_0(c + HASH_BYTES * (2 * i + 0), rho0 + i * HASH_BYTES, packbuf0, packbuf1, packbuf2);
+ PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(r1 + i * N, r1 + i * N);
+ PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(gx + i * M, gx + i * M);
+ PQCLEAN_MQDSS64_AVX2_gf31_npack(packbuf0, r1 + i * N, N);
+ PQCLEAN_MQDSS64_AVX2_gf31_npack(packbuf1, gx + i * M, M);
+ com_1(c + HASH_BYTES * (2 * i + 1), rho1 + i * HASH_BYTES, packbuf0, packbuf1);
+ }
+
+ H(sigma0, c, HASH_BYTES * ROUNDS * 2); // Compute sigma_0.
+ shake256_absorb(&shakestate, D_sigma0_h0_sigma1, 2 * HASH_BYTES);
+ shake256_squeezeblocks(shakeblock, 1, &shakestate);
+
+ memcpy(h0, shakeblock, HASH_BYTES);
+
+ memcpy(sig, sigma0, HASH_BYTES);
+ sig += HASH_BYTES; // Compensate for sigma_0.
+
+ for (i = 0; i < ROUNDS; i++) {
+ do {
+ alpha = shakeblock[alpha_count] & 31;
+ alpha_count++;
+ if (alpha_count == SHAKE256_RATE) {
+ alpha_count = 0;
+ shake256_squeezeblocks(shakeblock, 1, &shakestate);
+ }
+ } while (alpha == 31);
+ for (j = 0; j < N; j++) {
+ t1[i * N + j] = (gf31)(alpha * r0[j + i * N] - t0[j + i * N] + 31);
+ }
+ PQCLEAN_MQDSS64_AVX2_MQ(e1 + i * M, r0 + i * N, F);
+ for (j = 0; j < N; j++) {
+ e1[i * N + j] = (gf31)(alpha * e1[j + i * M] - e0[j + i * M] + 31);
+ }
+ PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(t1 + i * N, t1 + i * N);
+ PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(e1 + i * N, e1 + i * N);
+ }
+ shake256_ctx_release(&shakestate);
+
+ PQCLEAN_MQDSS64_AVX2_gf31_npack(t1packed, t1, N * ROUNDS);
+ PQCLEAN_MQDSS64_AVX2_gf31_npack(e1packed, e1, M * ROUNDS);
+
+ memcpy(sig, t1packed, NPACKED_BYTES * ROUNDS);
+ sig += NPACKED_BYTES * ROUNDS;
+ memcpy(sig, e1packed, MPACKED_BYTES * ROUNDS);
+ sig += MPACKED_BYTES * ROUNDS;
+
+ shake256(h1, ((ROUNDS + 7) & ~7) >> 3, D_sigma0_h0_sigma1, 3 * HASH_BYTES + ROUNDS * (NPACKED_BYTES + MPACKED_BYTES));
+
+ for (i = 0; i < ROUNDS; i++) {
+ b = (h1[(i >> 3)] >> (i & 7)) & 1;
+ if (b == 0) {
+ PQCLEAN_MQDSS64_AVX2_gf31_npack(sig, r0 + i * N, N);
+ } else if (b == 1) {
+ PQCLEAN_MQDSS64_AVX2_gf31_npack(sig, r1 + i * N, N);
+ }
+ memcpy(sig + NPACKED_BYTES, c + HASH_BYTES * (2 * i + (1 - b)), HASH_BYTES);
+ memcpy(sig + NPACKED_BYTES + HASH_BYTES, rho + (i + b * ROUNDS) * HASH_BYTES, HASH_BYTES);
+ sig += NPACKED_BYTES + 2 * HASH_BYTES;
+ }
+
+ *siglen = SIG_LEN;
+ return 0;
+}
+
+/**
+ * Verifies a detached signature and message under a given public key.
+ */
+int PQCLEAN_MQDSS64_AVX2_crypto_sign_verify(
+ const uint8_t *sig, size_t siglen,
+ const uint8_t *m, size_t mlen, const uint8_t *pk) {
+
+ gf31 r[N];
+ gf31 t[N];
+ gf31 e[M];
+ signed char F[F_LEN];
+ gf31 pk_gf31[M];
+ // Concatenated for convenient hashing.
+ unsigned char D_sigma0_h0_sigma1[HASH_BYTES * 3 + ROUNDS * (NPACKED_BYTES + MPACKED_BYTES)];
+ unsigned char *D = D_sigma0_h0_sigma1;
+ unsigned char *sigma0 = D_sigma0_h0_sigma1 + HASH_BYTES;
+ unsigned char *h0 = D_sigma0_h0_sigma1 + 2 * HASH_BYTES;
+ unsigned char *t1packed = D_sigma0_h0_sigma1 + 3 * HASH_BYTES;
+ unsigned char *e1packed = D_sigma0_h0_sigma1 + 3 * HASH_BYTES + ROUNDS * NPACKED_BYTES;
+ unsigned char h1[((ROUNDS + 7) & ~7) >> 3];
+ unsigned char c[HASH_BYTES * ROUNDS * 2];
+ memset(c, 0, HASH_BYTES * 2);
+ gf31 x[N];
+ gf31 y[M];
+ gf31 z[M];
+ unsigned char packbuf0[NPACKED_BYTES];
+ unsigned char packbuf1[MPACKED_BYTES];
+ shake256ctx shakestate;
+ unsigned char shakeblock[SHAKE256_RATE];
+ int i, j;
+ gf31 alpha;
+ int alpha_count = 0;
+ int b;
+ shake256incctx state;
+
+ if (siglen != SIG_LEN) {
+ return -1;
+ }
+
+ shake256_inc_init(&state);
+ shake256_inc_absorb(&state, pk, PK_BYTES);
+ shake256_inc_absorb(&state, sig, HASH_BYTES);
+ shake256_inc_absorb(&state, m, mlen);
+ shake256_inc_finalize(&state);
+ shake256_inc_squeeze(D, HASH_BYTES, &state);
+ shake256_inc_ctx_release(&state);
+
+ sig += HASH_BYTES;
+
+ PQCLEAN_MQDSS64_AVX2_gf31_nrand_schar(F, F_LEN, pk, SEED_BYTES);
+ pk += SEED_BYTES;
+ PQCLEAN_MQDSS64_AVX2_gf31_nunpack(pk_gf31, pk, M);
+
+ memcpy(sigma0, sig, HASH_BYTES);
+
+ shake256_absorb(&shakestate, D_sigma0_h0_sigma1, 2 * HASH_BYTES);
+ shake256_squeezeblocks(shakeblock, 1, &shakestate);
+
+ memcpy(h0, shakeblock, HASH_BYTES);
+
+ sig += HASH_BYTES;
+
+ memcpy(t1packed, sig, ROUNDS * NPACKED_BYTES);
+ sig += ROUNDS * NPACKED_BYTES;
+ memcpy(e1packed, sig, ROUNDS * MPACKED_BYTES);
+ sig += ROUNDS * MPACKED_BYTES;
+
+ shake256(h1, ((ROUNDS + 7) & ~7) >> 3, D_sigma0_h0_sigma1, 3 * HASH_BYTES + ROUNDS * (NPACKED_BYTES + MPACKED_BYTES));
+
+ for (i = 0; i < ROUNDS; i++) {
+ do {
+ alpha = shakeblock[alpha_count] & 31;
+ alpha_count++;
+ if (alpha_count == SHAKE256_RATE) {
+ alpha_count = 0;
+ shake256_squeezeblocks(shakeblock, 1, &shakestate);
+ }
+ } while (alpha == 31);
+ b = (h1[(i >> 3)] >> (i & 7)) & 1;
+
+ PQCLEAN_MQDSS64_AVX2_gf31_nunpack(r, sig, N);
+ PQCLEAN_MQDSS64_AVX2_gf31_nunpack(t, t1packed + NPACKED_BYTES * i, N);
+ PQCLEAN_MQDSS64_AVX2_gf31_nunpack(e, e1packed + MPACKED_BYTES * i, M);
+
+ if (b == 0) {
+ PQCLEAN_MQDSS64_AVX2_MQ(y, r, F);
+ for (j = 0; j < N; j++) {
+ x[j] = (gf31)(alpha * r[j] - t[j] + 31);
+ }
+ for (j = 0; j < N; j++) {
+ y[j] = (gf31)(alpha * y[j] - e[j] + 31);
+ }
+ PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(x, x);
+ PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(y, y);
+ PQCLEAN_MQDSS64_AVX2_gf31_npack(packbuf0, x, N);
+ PQCLEAN_MQDSS64_AVX2_gf31_npack(packbuf1, y, M);
+ com_0(c + HASH_BYTES * (2 * i + 0), sig + HASH_BYTES + NPACKED_BYTES, sig, packbuf0, packbuf1);
+ } else {
+ PQCLEAN_MQDSS64_AVX2_MQ(y, r, F);
+ PQCLEAN_MQDSS64_AVX2_G(z, t, r, F);
+ for (j = 0; j < N; j++) {
+ y[j] = (gf31)(alpha * (31 + pk_gf31[j] - y[j]) - z[j] - e[j] + 62);
+ }
+ PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(y, y);
+ PQCLEAN_MQDSS64_AVX2_gf31_npack(packbuf0, y, M);
+ com_1(c + HASH_BYTES * (2 * i + 1), sig + HASH_BYTES + NPACKED_BYTES, sig, packbuf0);
+ }
+ memcpy(c + HASH_BYTES * (2 * i + (1 - b)), sig + NPACKED_BYTES, HASH_BYTES);
+ sig += NPACKED_BYTES + 2 * HASH_BYTES;
+ }
+ shake256_ctx_release(&shakestate);
+
+ H(c, c, HASH_BYTES * ROUNDS * 2);
+ if (memcmp(c, sigma0, HASH_BYTES) != 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * Returns an array containing the signature followed by the message.
+ */
+int PQCLEAN_MQDSS64_AVX2_crypto_sign(
+ uint8_t *sm, size_t *smlen,
+ const uint8_t *m, size_t mlen, const uint8_t *sk) {
+ size_t siglen;
+
+ PQCLEAN_MQDSS64_AVX2_crypto_sign_signature(
+ sm, &siglen, m, mlen, sk);
+
+ memmove(sm + SIG_LEN, m, mlen);
+ *smlen = siglen + mlen;
+
+ return 0;
+}
+
+/**
+ * Verifies a given signature-message pair under a given public key.
+ */
+int PQCLEAN_MQDSS64_AVX2_crypto_sign_open(
+ uint8_t *m, size_t *mlen,
+ const uint8_t *sm, size_t smlen, const uint8_t *pk) {
+ /* The API caller does not necessarily know what size a signature should be
+ but MQDSS signatures are always exactly SIG_LEN. */
+ if (smlen < SIG_LEN) {
+ memset(m, 0, smlen);
+ *mlen = 0;
+ return -1;
+ }
+
+ *mlen = smlen - SIG_LEN;
+
+ if (PQCLEAN_MQDSS64_AVX2_crypto_sign_verify(
+ sm, SIG_LEN, sm + SIG_LEN, *mlen, pk)) {
+ memset(m, 0, smlen);
+ *mlen = 0;
+ return -1;
+ }
+
+ /* If verification was successful, move the message to the right place. */
+ memmove(m, sm + SIG_LEN, *mlen);
+
+ return 0;
+}
diff --git a/crypto_sign/mqdss-64/clean/sign.c b/crypto_sign/mqdss-64/clean/sign.c
index 702cbdf8..16bbe254 100644
--- a/crypto_sign/mqdss-64/clean/sign.c
+++ b/crypto_sign/mqdss-64/clean/sign.c
@@ -1,4 +1,3 @@
-#include
#include
#include
#include
diff --git a/test/duplicate_consistency/mqdss-48_clean.yml b/test/duplicate_consistency/mqdss-48_clean.yml
new file mode 100644
index 00000000..a5d2e758
--- /dev/null
+++ b/test/duplicate_consistency/mqdss-48_clean.yml
@@ -0,0 +1,20 @@
+consistency_checks:
+- source:
+ scheme: mqdss-48
+ implementation: avx2
+ files:
+ - api.h
+ - mq.h
+ - LICENSE
+ - mq.h
+ - sign.c
+ - params.h
+- source:
+ scheme: mqdss-64
+ implementation: clean
+ files:
+ - gf31.c
+ - gf31.h
+ - LICENSE
+ - mq.c
+ - mq.h
diff --git a/test/duplicate_consistency/mqdss-64_clean.yml b/test/duplicate_consistency/mqdss-64_clean.yml
index ff84f477..79021ca0 100644
--- a/test/duplicate_consistency/mqdss-64_clean.yml
+++ b/test/duplicate_consistency/mqdss-64_clean.yml
@@ -9,3 +9,14 @@ consistency_checks:
- mq.c
- mq.h
- sign.c
+- source:
+ scheme: mqdss-64
+ implementation: avx2
+ files:
+ - api.h
+ - mq.h
+ - LICENSE
+ - mq.h
+ - sign.c
+ - params.h
+
diff --git a/test/test_testvectors.py b/test/test_testvectors.py
index a6bc0855..37e46133 100644
--- a/test/test_testvectors.py
+++ b/test/test_testvectors.py
@@ -40,6 +40,7 @@ def test_testvectors(implementation, impl_path, test_dir, init, destr):
implementation.name,
'.exe' if os.name == 'nt' else ''
))],
+ print_output=False,
).replace('\r', '')
assert(implementation.scheme.metadata()['testvectors-sha256'].lower()
== hashlib.sha256(out.encode('utf-8')).hexdigest().lower())