* Add AVX2 version of mqdss * Fix duplicate consistencykyber
@@ -16,3 +16,12 @@ auxiliary-submitters: | |||
implementations: | |||
- name: clean | |||
version: https://github.com/joostrijneveld/MQDSS/commit/00608d7610262ff07b1834885d32bc3fd27ef5e1 | |||
- name: avx2 | |||
version: https://github.com/joostrijneveld/MQDSS/commit/00608d7610262ff07b1834885d32bc3fd27ef5e1 | |||
supported_platforms: | |||
- architecture: x86_64 | |||
required_flags: | |||
- avx2 | |||
- architecture: x86 | |||
required_flags: | |||
- avx2 |
@@ -0,0 +1,116 @@ | |||
CC0 1.0 Universal | |||
Statement of Purpose | |||
The laws of most jurisdictions throughout the world automatically confer | |||
exclusive Copyright and Related Rights (defined below) upon the creator and | |||
subsequent owner(s) (each and all, an "owner") of an original work of | |||
authorship and/or a database (each, a "Work"). | |||
Certain owners wish to permanently relinquish those rights to a Work for the | |||
purpose of contributing to a commons of creative, cultural and scientific | |||
works ("Commons") that the public can reliably and without fear of later | |||
claims of infringement build upon, modify, incorporate in other works, reuse | |||
and redistribute as freely as possible in any form whatsoever and for any | |||
purposes, including without limitation commercial purposes. These owners may | |||
contribute to the Commons to promote the ideal of a free culture and the | |||
further production of creative, cultural and scientific works, or to gain | |||
reputation or greater distribution for their Work in part through the use and | |||
efforts of others. | |||
For these and/or other purposes and motivations, and without any expectation | |||
of additional consideration or compensation, the person associating CC0 with a | |||
Work (the "Affirmer"), to the extent that he or she is an owner of Copyright | |||
and Related Rights in the Work, voluntarily elects to apply CC0 to the Work | |||
and publicly distribute the Work under its terms, with knowledge of his or her | |||
Copyright and Related Rights in the Work and the meaning and intended legal | |||
effect of CC0 on those rights. | |||
1. Copyright and Related Rights. A Work made available under CC0 may be | |||
protected by copyright and related or neighboring rights ("Copyright and | |||
Related Rights"). Copyright and Related Rights include, but are not limited | |||
to, the following: | |||
i. the right to reproduce, adapt, distribute, perform, display, communicate, | |||
and translate a Work; | |||
ii. moral rights retained by the original author(s) and/or performer(s); | |||
iii. publicity and privacy rights pertaining to a person's image or likeness | |||
depicted in a Work; | |||
iv. rights protecting against unfair competition in regards to a Work, | |||
subject to the limitations in paragraph 4(a), below; | |||
v. rights protecting the extraction, dissemination, use and reuse of data in | |||
a Work; | |||
vi. database rights (such as those arising under Directive 96/9/EC of the | |||
European Parliament and of the Council of 11 March 1996 on the legal | |||
protection of databases, and under any national implementation thereof, | |||
including any amended or successor version of such directive); and | |||
vii. other similar, equivalent or corresponding rights throughout the world | |||
based on applicable law or treaty, and any national implementations thereof. | |||
2. Waiver. To the greatest extent permitted by, but not in contravention of, | |||
applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and | |||
unconditionally waives, abandons, and surrenders all of Affirmer's Copyright | |||
and Related Rights and associated claims and causes of action, whether now | |||
known or unknown (including existing as well as future claims and causes of | |||
action), in the Work (i) in all territories worldwide, (ii) for the maximum | |||
duration provided by applicable law or treaty (including future time | |||
extensions), (iii) in any current or future medium and for any number of | |||
copies, and (iv) for any purpose whatsoever, including without limitation | |||
commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes | |||
the Waiver for the benefit of each member of the public at large and to the | |||
detriment of Affirmer's heirs and successors, fully intending that such Waiver | |||
shall not be subject to revocation, rescission, cancellation, termination, or | |||
any other legal or equitable action to disrupt the quiet enjoyment of the Work | |||
by the public as contemplated by Affirmer's express Statement of Purpose. | |||
3. Public License Fallback. Should any part of the Waiver for any reason be | |||
judged legally invalid or ineffective under applicable law, then the Waiver | |||
shall be preserved to the maximum extent permitted taking into account | |||
Affirmer's express Statement of Purpose. In addition, to the extent the Waiver | |||
is so judged Affirmer hereby grants to each affected person a royalty-free, | |||
non transferable, non sublicensable, non exclusive, irrevocable and | |||
unconditional license to exercise Affirmer's Copyright and Related Rights in | |||
the Work (i) in all territories worldwide, (ii) for the maximum duration | |||
provided by applicable law or treaty (including future time extensions), (iii) | |||
in any current or future medium and for any number of copies, and (iv) for any | |||
purpose whatsoever, including without limitation commercial, advertising or | |||
promotional purposes (the "License"). The License shall be deemed effective as | |||
of the date CC0 was applied by Affirmer to the Work. Should any part of the | |||
License for any reason be judged legally invalid or ineffective under | |||
applicable law, such partial invalidity or ineffectiveness shall not | |||
invalidate the remainder of the License, and in such case Affirmer hereby | |||
affirms that he or she will not (i) exercise any of his or her remaining | |||
Copyright and Related Rights in the Work or (ii) assert any associated claims | |||
and causes of action with respect to the Work, in either case contrary to | |||
Affirmer's express Statement of Purpose. | |||
4. Limitations and Disclaimers. | |||
a. No trademark or patent rights held by Affirmer are waived, abandoned, | |||
surrendered, licensed or otherwise affected by this document. | |||
b. Affirmer offers the Work as-is and makes no representations or warranties | |||
of any kind concerning the Work, express, implied, statutory or otherwise, | |||
including without limitation warranties of title, merchantability, fitness | |||
for a particular purpose, non infringement, or the absence of latent or | |||
other defects, accuracy, or the present or absence of errors, whether or not | |||
discoverable, all to the greatest extent permissible under applicable law. | |||
c. Affirmer disclaims responsibility for clearing rights of other persons | |||
that may apply to the Work or any use thereof, including without limitation | |||
any person's Copyright and Related Rights in the Work. Further, Affirmer | |||
disclaims responsibility for obtaining any necessary consents, permissions | |||
or other rights required for any use of the Work. | |||
d. Affirmer understands and acknowledges that Creative Commons is not a | |||
party to this document and has no duty or obligation with respect to this | |||
CC0 or use of the Work. | |||
For more information, please see | |||
<http://creativecommons.org/publicdomain/zero/1.0/> |
@@ -0,0 +1,22 @@ | |||
# This Makefile can be used with GNU Make or BSD Make | |||
LIB=libmqdss-48_avx2.a | |||
HEADERS = params.h gf31.h mq.h api.h | |||
OBJECTS = gf31.o mq.o sign.o | |||
CFLAGS=-O3 -Wall -Wconversion -Wextra -Wpedantic -Wvla -Werror \ | |||
-Wmissing-prototypes -Wredundant-decls -std=c99 -mavx2 \ | |||
-I../../../common $(EXTRAFLAGS) | |||
all: $(LIB) | |||
%.o: %.c $(HEADERS) | |||
$(CC) $(CFLAGS) -c -o $@ $< | |||
$(LIB): $(OBJECTS) | |||
$(AR) -r $@ $(OBJECTS) | |||
clean: | |||
$(RM) $(OBJECTS) | |||
$(RM) $(LIB) |
@@ -0,0 +1,19 @@ | |||
# This Makefile can be used with Microsoft Visual Studio's nmake using the command: | |||
# nmake /f Makefile.Microsoft_nmake | |||
LIBRARY=libmqdss-48_avx2.lib | |||
OBJECTS=gf31.obj mq.obj sign.obj | |||
CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX /arch:AVX2 | |||
all: $(LIBRARY) | |||
# Make sure objects are recompiled if headers change. | |||
$(OBJECTS): *.h | |||
$(LIBRARY): $(OBJECTS) | |||
LIB.EXE /NOLOGO /WX /OUT:$@ $** | |||
clean: | |||
-DEL $(OBJECTS) | |||
-DEL $(LIBRARY) |
@@ -0,0 +1,47 @@ | |||
#ifndef PQCLEAN_MQDSS48_AVX2_API_H | |||
#define PQCLEAN_MQDSS48_AVX2_API_H | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#define PQCLEAN_MQDSS48_AVX2_CRYPTO_ALGNAME "MQDSS-48" | |||
#define PQCLEAN_MQDSS48_AVX2_CRYPTO_SECRETKEYBYTES 16 | |||
#define PQCLEAN_MQDSS48_AVX2_CRYPTO_PUBLICKEYBYTES 46 | |||
#define PQCLEAN_MQDSS48_AVX2_CRYPTO_BYTES 28400 | |||
/* | |||
* Generates an MQDSS key pair. | |||
*/ | |||
int PQCLEAN_MQDSS48_AVX2_crypto_sign_keypair( | |||
uint8_t *pk, uint8_t *sk); | |||
/** | |||
* Returns an array containing a detached signature. | |||
*/ | |||
int PQCLEAN_MQDSS48_AVX2_crypto_sign_signature( | |||
uint8_t *sig, size_t *siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk); | |||
/** | |||
* Verifies a detached signature and message under a given public key. | |||
*/ | |||
int PQCLEAN_MQDSS48_AVX2_crypto_sign_verify( | |||
const uint8_t *sig, size_t siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk); | |||
/** | |||
* Returns an array containing the signature followed by the message. | |||
*/ | |||
int PQCLEAN_MQDSS48_AVX2_crypto_sign( | |||
uint8_t *sm, size_t *smlen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk); | |||
/** | |||
* Verifies a given signature-message pair under a given public key. | |||
*/ | |||
int PQCLEAN_MQDSS48_AVX2_crypto_sign_open( | |||
uint8_t *m, size_t *mlen, | |||
const uint8_t *sm, size_t smlen, const uint8_t *pk); | |||
#endif |
@@ -0,0 +1,123 @@ | |||
#include "params.h" | |||
#include "fips202.h" | |||
#include "gf31.h" | |||
#include <immintrin.h> | |||
#include <stdint.h> | |||
#include <string.h> | |||
/* Given a vector of N elements in the range [0, 31], this reduces the elements | |||
to the range [0, 30] by mapping 31 to 0 (i.e reduction mod 31) */ | |||
void PQCLEAN_MQDSS48_AVX2_vgf31_unique(gf31 *out, gf31 *in) { | |||
__m256i x; | |||
__m256i _w31 = _mm256_set1_epi16(31); | |||
int i; | |||
for (i = 0; i < (N >> 4); ++i) { | |||
x = _mm256_loadu_si256((__m256i const *) (in + 16 * i)); | |||
x = _mm256_xor_si256(x, _mm256_and_si256(_w31, _mm256_cmpeq_epi16(x, _w31))); | |||
_mm256_storeu_si256((__m256i *)(out + i * 16), x); | |||
} | |||
} | |||
/* This function acts on vectors with 64 gf31 elements. | |||
It performs one reduction step and guarantees output in [0, 30], | |||
but requires input to be in [0, 32768). */ | |||
void PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(gf31 *out, gf31 *in) { | |||
__m256i x; | |||
__m256i _w2114 = _mm256_set1_epi32(2114 * 65536 + 2114); | |||
__m256i _w31 = _mm256_set1_epi16(31); | |||
int i; | |||
for (i = 0; i < (N >> 4); ++i) { | |||
x = _mm256_loadu_si256((__m256i const *) (in + 16 * i)); | |||
x = _mm256_sub_epi16(x, _mm256_mullo_epi16(_w31, _mm256_mulhi_epi16(x, _w2114))); | |||
x = _mm256_xor_si256(x, _mm256_and_si256(_w31, _mm256_cmpeq_epi16(x, _w31))); | |||
_mm256_storeu_si256((__m256i *)(out + i * 16), x); | |||
} | |||
} | |||
/* Given a seed, samples len gf31 elements (in the range [0, 30]), and places | |||
them in a vector of 16-bit elements */ | |||
void PQCLEAN_MQDSS48_AVX2_gf31_nrand(gf31 *out, size_t len, const uint8_t *seed, size_t seedlen) { | |||
size_t i = 0, j; | |||
shake256ctx shakestate; | |||
uint8_t shakeblock[SHAKE256_RATE]; | |||
shake256_absorb(&shakestate, seed, seedlen); | |||
while (i < len) { | |||
shake256_squeezeblocks(shakeblock, 1, &shakestate); | |||
for (j = 0; j < SHAKE256_RATE && i < len; j++) { | |||
if ((shakeblock[j] & 31) != 31) { | |||
out[i] = (shakeblock[j] & 31); | |||
i++; | |||
} | |||
} | |||
} | |||
shake256_ctx_release(&shakestate); | |||
} | |||
/* Given a seed, samples len gf31 elements, transposed into unsigned range, | |||
i.e. in the range [-15, 15], and places them in an array of 8-bit integers. | |||
This is used for the expansion of F, which wants packed elements. */ | |||
void PQCLEAN_MQDSS48_AVX2_gf31_nrand_schar(signed char *out, size_t len, const uint8_t *seed, size_t seedlen) { | |||
size_t i = 0, j; | |||
shake256ctx shakestate; | |||
uint8_t shakeblock[SHAKE256_RATE]; | |||
shake256_absorb(&shakestate, seed, seedlen); | |||
while (i < len) { | |||
shake256_squeezeblocks(shakeblock, 1, &shakestate); | |||
for (j = 0; j < SHAKE256_RATE && i < len; j++) { | |||
if ((shakeblock[j] & 31) != 31) { | |||
out[i] = (signed char)((shakeblock[j] & 31) - 15); | |||
i++; | |||
} | |||
} | |||
} | |||
shake256_ctx_release(&shakestate); | |||
} | |||
/* Unpacks an array of packed GF31 elements to one element per gf31. | |||
Assumes that there is sufficient empty space available at the end of the | |||
array to unpack. Can perform in-place. */ | |||
void PQCLEAN_MQDSS48_AVX2_gf31_nunpack(gf31 *out, const uint8_t *in, size_t n) { | |||
size_t i; | |||
size_t j = ((n * 5) >> 3) - 1; | |||
unsigned int d = 0; | |||
for (i = n; i > 0; i--) { | |||
out[i - 1] = (gf31)((in[j] >> d) & 31); | |||
d += 5; | |||
if (d > 8) { | |||
d -= 8; | |||
j--; | |||
out[i - 1] = (gf31)(out[i - 1] ^ ((in[j] << (5 - d)) & 31)); | |||
} | |||
} | |||
} | |||
/* Packs an array of GF31 elements from gf31's to concatenated 5-bit values. | |||
Assumes that there is sufficient space available to unpack. | |||
Can perform in-place. */ | |||
void PQCLEAN_MQDSS48_AVX2_gf31_npack(uint8_t *out, const gf31 *in, size_t n) { | |||
unsigned int i = 0; | |||
unsigned int j; | |||
int d = 3; | |||
/* There will be ceil(5n / 8) output blocks */ | |||
memset(out, 0, (size_t)((5 * n + 7) & ~7U) >> 3); | |||
for (j = 0; j < n; j++) { | |||
if (d < 0) { | |||
d += 8; | |||
out[i] = (uint8_t)((out[i] & (255 << (d - 3))) | | |||
((in[j] >> (8 - d)) & ~(255 << (d - 3)))); | |||
i++; | |||
} | |||
out[i] = (uint8_t)((out[i] & ~(31 << d)) | ((in[j] << d) & (31 << d))); | |||
d -= 5; | |||
} | |||
} |
@@ -0,0 +1,36 @@ | |||
#ifndef MQDSS_GF31_H | |||
#define MQDSS_GF31_H | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
typedef unsigned short gf31; | |||
/* Given a vector of elements in the range [0, 31], this reduces the elements | |||
to the range [0, 30] by mapping 31 to 0 (i.e reduction mod 31) */ | |||
void PQCLEAN_MQDSS48_AVX2_vgf31_unique(gf31 *out, gf31 *in); | |||
/* Given a vector of 16-bit integers (i.e. in [0, 65535], this reduces the | |||
elements to the range [0, 30] by mapping 31 to 0 (i.e reduction mod 31) */ | |||
void PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(gf31 *out, gf31 *in); | |||
/* Given a seed, samples len gf31 elements (in the range [0, 30]), and places | |||
them in a vector of 16-bit elements */ | |||
void PQCLEAN_MQDSS48_AVX2_gf31_nrand(gf31 *out, size_t len, const uint8_t *seed, size_t seedlen); | |||
/* Given a seed, samples len gf31 elements, transposed into unsigned range, | |||
i.e. in the range [-15, 15], and places them in an array of 8-bit integers. | |||
This is used for the expansion of F, which wants packed elements. */ | |||
void PQCLEAN_MQDSS48_AVX2_gf31_nrand_schar(signed char *out, size_t len, const uint8_t *seed, size_t seedlen); | |||
/* Unpacks an array of packed GF31 elements to one element per gf31. | |||
Assumes that there is sufficient empty space available at the end of the | |||
array to unpack. Can perform in-place. */ | |||
void PQCLEAN_MQDSS48_AVX2_gf31_nunpack(gf31 *out, const uint8_t *in, size_t n); | |||
/* Packs an array of GF31 elements from gf31's to concatenated 5-bit values. | |||
Assumes that there is sufficient space available to unpack. | |||
Can perform in-place. */ | |||
void PQCLEAN_MQDSS48_AVX2_gf31_npack(uint8_t *out, const gf31 *in, size_t n); | |||
#endif |
@@ -0,0 +1,251 @@ | |||
#include "mq.h" | |||
#include "params.h" | |||
#include <immintrin.h> | |||
#include <stdio.h> | |||
static inline __m256i reduce_16(__m256i r, __m256i _w31, __m256i _w2114) { | |||
__m256i exp = _mm256_mulhi_epi16(r, _w2114); | |||
return _mm256_sub_epi16(r, _mm256_mullo_epi16(_w31, exp)); | |||
} | |||
/* Computes all products x_i * x_j, returns in reduced form */ | |||
inline static | |||
void generate_quadratic_terms( unsigned char *xij, const gf31 *x ) { | |||
__m256i mask_2114 = _mm256_set1_epi16( 2114 ); | |||
__m256i mask_31 = _mm256_set1_epi16( 31 ); | |||
__m256i xi[4]; | |||
xi[0] = _mm256_loadu_si256((__m256i const *) (x)); | |||
xi[1] = _mm256_loadu_si256((__m256i const *) (x + 16)); | |||
xi[2] = _mm256_loadu_si256((__m256i const *) (x + 32)); | |||
xi[3] = _mm256_setzero_si256(); | |||
__m256i xixj[4]; | |||
xixj[0] = _mm256_setzero_si256(); | |||
xixj[1] = _mm256_setzero_si256(); | |||
xixj[2] = _mm256_setzero_si256(); | |||
xixj[3] = _mm256_setzero_si256(); | |||
int k = 0; | |||
for (int i = 0; i < 32; i++) { | |||
__m256i br_xi = _mm256_set1_epi16( (short)x[i] ); | |||
for (int j = 0; j <= (i >> 4); j++) { | |||
xixj[j] = _mm256_mullo_epi16( xi[j], br_xi ); | |||
xixj[j] = reduce_16( xixj[j], mask_31, mask_2114 ); | |||
} | |||
__m256i r = _mm256_packs_epi16(xixj[0], xixj[1]); | |||
r = _mm256_permute4x64_epi64(r, 0xd8); // 3,1,2,0 | |||
_mm256_storeu_si256( (__m256i *)( xij + k ), r ); | |||
k += i + 1; | |||
} | |||
for (int i = 32; i < N; i++) { | |||
__m256i br_xi = _mm256_set1_epi16( (short)x[i] ); | |||
for (int j = 0; j <= (i >> 4); j++) { | |||
xixj[j] = _mm256_mullo_epi16( xi[j], br_xi ); | |||
xixj[j] = reduce_16( xixj[j], mask_31, mask_2114 ); | |||
} | |||
__m256i r0 = _mm256_packs_epi16(xixj[0], xixj[1]); | |||
r0 = _mm256_permute4x64_epi64(r0, 0xd8); // 3,1,2,0 | |||
_mm256_storeu_si256( (__m256i *)( xij + k ), r0 ); | |||
__m256i r1 = _mm256_packs_epi16(xixj[2], xixj[3]); | |||
r1 = _mm256_permute4x64_epi64(r1, 0xd8); // 3,1,2,0 | |||
_mm256_storeu_si256( (__m256i *)( xij + 32 + k ), r1 ); | |||
k += i + 1; | |||
} | |||
} | |||
/* Computes all terms (x_i * y_j) + (x_j * y_i), returns in reduced form */ | |||
inline static | |||
void generate_xiyj_p_xjyi_terms( unsigned char *xij, const gf31 *x, const gf31 *y ) { | |||
__m256i mask_2114 = _mm256_set1_epi16( 2114 ); | |||
__m256i mask_31 = _mm256_set1_epi16( 31 ); | |||
__m256i xiyi[4]; | |||
xiyi[0] = _mm256_xor_si256(_mm256_loadu_si256((__m256i const *) (x)), _mm256_slli_si256( _mm256_loadu_si256((__m256i const *) (y)), 1 )); | |||
xiyi[1] = _mm256_xor_si256(_mm256_loadu_si256((__m256i const *) (x + 16)), _mm256_slli_si256( _mm256_loadu_si256((__m256i const *) (y + 16)), 1 )); | |||
xiyi[2] = _mm256_xor_si256(_mm256_loadu_si256((__m256i const *) (x + 32)), _mm256_slli_si256( _mm256_loadu_si256((__m256i const *) (y + 32)), 1 )); | |||
xiyi[3] = _mm256_setzero_si256(); | |||
__m256i xixj[4]; | |||
xixj[0] = _mm256_setzero_si256(); | |||
xixj[1] = _mm256_setzero_si256(); | |||
xixj[2] = _mm256_setzero_si256(); | |||
xixj[3] = _mm256_setzero_si256(); | |||
int k = 0; | |||
for (int i = 0; i < 32; i++) { | |||
__m256i br_yixi = _mm256_set1_epi16( (short)((x[i] << 8)^y[i]) ); | |||
for (int j = 0; j <= (i >> 4); j++) { | |||
xixj[j] = _mm256_maddubs_epi16( xiyi[j], br_yixi ); | |||
xixj[j] = reduce_16( xixj[j], mask_31, mask_2114 ); | |||
} | |||
__m256i r = _mm256_packs_epi16(xixj[0], xixj[1]); | |||
r = _mm256_permute4x64_epi64(r, 0xd8); // 3,1,2,0 | |||
_mm256_storeu_si256( (__m256i *)( xij + k ), r ); | |||
k += i + 1; | |||
} | |||
for (int i = 32; i < N; i++) { | |||
__m256i br_yixi = _mm256_set1_epi16( (short)((x[i] << 8)^y[i]) ); | |||
for (int j = 0; j <= (i >> 4); j++) { | |||
xixj[j] = _mm256_maddubs_epi16( xiyi[j], br_yixi ); | |||
xixj[j] = reduce_16( xixj[j], mask_31, mask_2114 ); | |||
} | |||
__m256i r0 = _mm256_packs_epi16(xixj[0], xixj[1]); | |||
r0 = _mm256_permute4x64_epi64(r0, 0xd8); // 3,1,2,0 | |||
_mm256_storeu_si256( (__m256i *)( xij + k ), r0 ); | |||
__m256i r1 = _mm256_packs_epi16(xixj[2], xixj[3]); | |||
r1 = _mm256_permute4x64_epi64(r1, 0xd8); // 3,1,2,0 | |||
_mm256_storeu_si256( (__m256i *)( xij + 32 + k ), r1 ); | |||
k += i + 1; | |||
} | |||
} | |||
#define EVAL_YMM_0(xx) {\ | |||
__m128i tmp = _mm256_castsi256_si128(xx); \ | |||
for (int macro_i = 0; macro_i < 8; macro_i++) { \ | |||
__m256i _xi = _mm256_broadcastw_epi16(tmp); \ | |||
tmp = _mm_srli_si128(tmp, 2); \ | |||
for (int macro_j = 0; macro_j < (N/16); macro_j++) { \ | |||
__m256i coeff = _mm256_loadu_si256((__m256i const *) F); \ | |||
F += 32; \ | |||
yy[macro_j] = _mm256_add_epi16(yy[macro_j], _mm256_maddubs_epi16(_xi, coeff)); \ | |||
} \ | |||
} \ | |||
} | |||
#define EVAL_YMM_1(xx) {\ | |||
__m128i tmp = _mm256_extracti128_si256(xx, 1); \ | |||
for (int macro_i = 0; macro_i < 8; macro_i++) { \ | |||
__m256i _xi = _mm256_broadcastw_epi16(tmp); \ | |||
tmp = _mm_srli_si128(tmp, 2); \ | |||
for (int macro_j = 0; macro_j < (N/16); macro_j++) { \ | |||
__m256i coeff = _mm256_loadu_si256((__m256i const *) F); \ | |||
F += 32; \ | |||
yy[macro_j] = _mm256_add_epi16(yy[macro_j], _mm256_maddubs_epi16(_xi, coeff)); \ | |||
} \ | |||
} \ | |||
} | |||
#define REDUCE_(yy) { \ | |||
(yy)[0] = reduce_16((yy)[0], mask_reduce, mask_2114); \ | |||
(yy)[1] = reduce_16((yy)[1], mask_reduce, mask_2114); \ | |||
(yy)[2] = reduce_16((yy)[2], mask_reduce, mask_2114); \ | |||
} | |||
/* Evaluates the MQ function on a vector of N gf31 elements x (expected to be | |||
in reduced 5-bit representation). Expects the coefficients in F to be in | |||
signed representation (i.e. [-15, 15], packed bytewise). | |||
Outputs M gf31 elements in unique 16-bit representation as fx. */ | |||
void PQCLEAN_MQDSS48_AVX2_MQ(gf31 *fx, const gf31 *x, const signed char *F) { | |||
__m256i mask_2114 = _mm256_set1_epi32(2114 * 65536 + 2114); | |||
__m256i mask_reduce = _mm256_srli_epi16(_mm256_cmpeq_epi16(mask_2114, mask_2114), 11); | |||
__m256i xi[4]; | |||
xi[0] = _mm256_loadu_si256((__m256i const *) (x)); | |||
xi[1] = _mm256_loadu_si256((__m256i const *) (x + 16)); | |||
xi[2] = _mm256_loadu_si256((__m256i const *) (x + 32)); | |||
xi[3] = _mm256_setzero_si256(); | |||
__m256i _zero = _mm256_setzero_si256(); | |||
xi[0] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_zero, xi[0])), xi[0]); | |||
xi[1] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_zero, xi[1])), xi[1]); | |||
xi[2] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_zero, xi[2])), xi[2]); | |||
__m256i x1 = _mm256_packs_epi16(xi[0], xi[1]); | |||
x1 = _mm256_permute4x64_epi64(x1, 0xd8); // 3,1,2,0 | |||
__m256i x2 = _mm256_packs_epi16(xi[2], xi[3]); | |||
x2 = _mm256_permute4x64_epi64(x2, 0xd8); // 3,1,2,0 | |||
__m256i yy[M / 16]; | |||
yy[0] = _zero; | |||
yy[1] = _zero; | |||
yy[2] = _zero; | |||
EVAL_YMM_0(x1) | |||
EVAL_YMM_1(x1) | |||
EVAL_YMM_0(x2) | |||
REDUCE_(yy) | |||
__m256i xixj[38]; | |||
generate_quadratic_terms( (unsigned char *) xixj, x ); | |||
for (int i = 0 ; i < 36 ; i += 2) { | |||
EVAL_YMM_0(xixj[i]) | |||
EVAL_YMM_1(xixj[i]) | |||
EVAL_YMM_0(xixj[i + 1]) | |||
EVAL_YMM_1(xixj[i + 1]) | |||
REDUCE_(yy) | |||
} | |||
EVAL_YMM_0(xixj[36]) { | |||
__m128i tmp = _mm256_extracti128_si256(xixj[36], 1); | |||
for (int i = 0; i < 4; i++) { | |||
__m256i _xi = _mm256_broadcastw_epi16(tmp); | |||
tmp = _mm_srli_si128(tmp, 2); | |||
for (int j = 0; j < (N / 16); j++) { | |||
__m256i coeff = _mm256_loadu_si256((__m256i const *) F); | |||
F += 32; | |||
yy[j] = _mm256_add_epi16(yy[j], _mm256_maddubs_epi16(_xi, coeff)); | |||
} | |||
} | |||
} | |||
REDUCE_(yy) | |||
yy[0] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[0])), yy[0]); | |||
yy[1] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[1])), yy[1]); | |||
yy[2] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[2])), yy[2]); | |||
for (int i = 0; i < (N / 16); ++i) { | |||
_mm256_storeu_si256((__m256i *)(fx + i * 16), yy[i]); | |||
} | |||
} | |||
/* Evaluates the bilinear polar form of the MQ function (i.e. G) on a vector of | |||
N gf31 elements x (expected to be in reduced 5-bit representation). Expects | |||
the coefficients in F to be in signed representation (i.e. [-15, 15], packed | |||
bytewise). Outputs M gf31 elements in unique 16-bit representation as fx. */ | |||
void PQCLEAN_MQDSS48_AVX2_G(gf31 *fx, const gf31 *x, const gf31 *y, const signed char *F) { | |||
__m256i mask_2114 = _mm256_set1_epi32(2114 * 65536 + 2114); | |||
__m256i mask_reduce = _mm256_srli_epi16(_mm256_cmpeq_epi16(mask_2114, mask_2114), 11); | |||
__m256i _zero = _mm256_setzero_si256(); | |||
__m256i yy[(M / 16)]; | |||
yy[0] = _zero; | |||
yy[1] = _zero; | |||
yy[2] = _zero; | |||
F += N * M; | |||
__m256i xixj[38]; | |||
generate_xiyj_p_xjyi_terms( (unsigned char *) xixj, x, y ); | |||
for (int i = 0 ; i < 36 ; i += 2) { | |||
EVAL_YMM_0(xixj[i]) | |||
EVAL_YMM_1(xixj[i]) | |||
EVAL_YMM_0(xixj[i + 1]) | |||
EVAL_YMM_1(xixj[i + 1]) | |||
REDUCE_(yy) | |||
} | |||
EVAL_YMM_0(xixj[36]) { | |||
__m128i tmp = _mm256_extracti128_si256(xixj[36], 1); | |||
for (int i = 0; i < 4; i++) { | |||
__m256i _xi = _mm256_broadcastw_epi16(tmp); | |||
tmp = _mm_srli_si128(tmp, 2); | |||
for (int j = 0; j < (N / 16); j++) { | |||
__m256i coeff = _mm256_loadu_si256((__m256i const *) F); | |||
F += 32; | |||
yy[j] = _mm256_add_epi16(yy[j], _mm256_maddubs_epi16(_xi, coeff)); | |||
} | |||
} | |||
} | |||
REDUCE_(yy) | |||
yy[0] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[0])), yy[0]); | |||
yy[1] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[1])), yy[1]); | |||
yy[2] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[2])), yy[2]); | |||
for (int i = 0; i < (N / 16); ++i) { | |||
_mm256_storeu_si256((__m256i *)(fx + i * 16), yy[i]); | |||
} | |||
} |
@@ -0,0 +1,18 @@ | |||
#ifndef MQDSS_MQ_H | |||
#define MQDSS_MQ_H | |||
#include "gf31.h" | |||
/* Evaluates the MQ function on a vector of N gf31 elements x (expected to be | |||
in reduced 5-bit representation). Expects the coefficients in F to be in | |||
signed representation (i.e. [-15, 15], packed bytewise). | |||
Outputs M gf31 elements in unique 16-bit representation as fx. */ | |||
void PQCLEAN_MQDSS48_AVX2_MQ(gf31 *fx, const gf31 *x, const signed char *F); | |||
/* Evaluates the bilinear polar form of the MQ function (i.e. G) on a vector of | |||
N gf31 elements x (expected to be in reduced 5-bit representation). Expects | |||
the coefficients in F to be in signed representation (i.e. [-15, 15], packed | |||
bytewise). Outputs M gf31 elements in unique 16-bit representation as fx. */ | |||
void PQCLEAN_MQDSS48_AVX2_G(gf31 *fx, const gf31 *x, const gf31 *y, const signed char *F); | |||
#endif |
@@ -0,0 +1,25 @@ | |||
#ifndef MQDSS_PARAMS_H | |||
#define MQDSS_PARAMS_H | |||
#define N 48 | |||
#define M N | |||
#define F_LEN (M * (((N * (N + 1)) >> 1) + N)) /* Number of elements in F */ | |||
#define ROUNDS 184 | |||
/* Number of bytes that N, M and F_LEN elements require when packed into a byte | |||
array, 5-bit elements packed continuously. */ | |||
/* Assumes N and M to be multiples of 8 */ | |||
#define NPACKED_BYTES ((N * 5) >> 3) | |||
#define MPACKED_BYTES ((M * 5) >> 3) | |||
#define FPACKED_BYTES ((F_LEN * 5) >> 3) | |||
#define HASH_BYTES 32 | |||
#define SEED_BYTES 16 | |||
#define PK_BYTES (SEED_BYTES + MPACKED_BYTES) | |||
#define SK_BYTES SEED_BYTES | |||
// R, sigma_0, ROUNDS * (t1, r{0,1}, e1, c, rho) | |||
#define SIG_LEN (2 * HASH_BYTES + ROUNDS * (2*NPACKED_BYTES + MPACKED_BYTES + HASH_BYTES + HASH_BYTES)) | |||
#endif |
@@ -0,0 +1,389 @@ | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#include <string.h> | |||
#include "api.h" | |||
#include "fips202.h" | |||
#include "gf31.h" | |||
#include "mq.h" | |||
#include "params.h" | |||
#include "randombytes.h" | |||
/* Takes an array of len bytes and computes a hash digest. | |||
This is used as a hash function in the Fiat-Shamir transform. */ | |||
static void H(unsigned char *out, const unsigned char *in, const size_t len) { | |||
shake256(out, HASH_BYTES, in, len); | |||
} | |||
/* Takes two arrays of N packed elements and an array of M packed elements, | |||
and computes a HASH_BYTES commitment. */ | |||
static void com_0(unsigned char *c, | |||
const unsigned char *rho, | |||
const unsigned char *inn, const unsigned char *inn2, | |||
const unsigned char *inm) { | |||
unsigned char buffer[HASH_BYTES + 2 * NPACKED_BYTES + MPACKED_BYTES]; | |||
memcpy(buffer, rho, HASH_BYTES); | |||
memcpy(buffer + HASH_BYTES, inn, NPACKED_BYTES); | |||
memcpy(buffer + HASH_BYTES + NPACKED_BYTES, inn2, NPACKED_BYTES); | |||
memcpy(buffer + HASH_BYTES + 2 * NPACKED_BYTES, inm, MPACKED_BYTES); | |||
shake256(c, HASH_BYTES, buffer, HASH_BYTES + 2 * NPACKED_BYTES + MPACKED_BYTES); | |||
} | |||
/* Takes an array of N packed elements and an array of M packed elements, | |||
and computes a HASH_BYTES commitment. */ | |||
static void com_1(unsigned char *c, | |||
const unsigned char *rho, | |||
const unsigned char *inn, const unsigned char *inm) { | |||
unsigned char buffer[HASH_BYTES + NPACKED_BYTES + MPACKED_BYTES]; | |||
memcpy(buffer, rho, HASH_BYTES); | |||
memcpy(buffer + HASH_BYTES, inn, NPACKED_BYTES); | |||
memcpy(buffer + HASH_BYTES + NPACKED_BYTES, inm, MPACKED_BYTES); | |||
shake256(c, HASH_BYTES, buffer, HASH_BYTES + NPACKED_BYTES + MPACKED_BYTES); | |||
} | |||
/* | |||
* Generates an MQDSS key pair. | |||
*/ | |||
int PQCLEAN_MQDSS48_AVX2_crypto_sign_keypair(uint8_t *pk, uint8_t *sk) { | |||
signed char F[F_LEN]; | |||
unsigned char skbuf[SEED_BYTES * 2]; | |||
gf31 sk_gf31[N]; | |||
gf31 pk_gf31[M]; | |||
// Expand sk to obtain a seed for F and the secret input s. | |||
// We also expand to obtain a value for sampling r0, t0 and e0 during | |||
// signature generation, but that is not relevant here. | |||
randombytes(sk, SEED_BYTES); | |||
shake256(skbuf, SEED_BYTES * 2, sk, SEED_BYTES); | |||
memcpy(pk, skbuf, SEED_BYTES); | |||
PQCLEAN_MQDSS48_AVX2_gf31_nrand_schar(F, F_LEN, pk, SEED_BYTES); | |||
PQCLEAN_MQDSS48_AVX2_gf31_nrand(sk_gf31, N, skbuf + SEED_BYTES, SEED_BYTES); | |||
PQCLEAN_MQDSS48_AVX2_MQ(pk_gf31, sk_gf31, F); | |||
PQCLEAN_MQDSS48_AVX2_vgf31_unique(pk_gf31, pk_gf31); | |||
PQCLEAN_MQDSS48_AVX2_gf31_npack(pk + SEED_BYTES, pk_gf31, M); | |||
return 0; | |||
} | |||
/** | |||
* Returns an array containing a detached signature. | |||
*/ | |||
int PQCLEAN_MQDSS48_AVX2_crypto_sign_signature( | |||
uint8_t *sig, size_t *siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk) { | |||
signed char F[F_LEN]; | |||
unsigned char skbuf[SEED_BYTES * 4]; | |||
gf31 pk_gf31[M]; | |||
unsigned char pk[SEED_BYTES + MPACKED_BYTES]; | |||
// Concatenated for convenient hashing. | |||
unsigned char D_sigma0_h0_sigma1[HASH_BYTES * 3 + ROUNDS * (NPACKED_BYTES + MPACKED_BYTES)]; | |||
unsigned char *D = D_sigma0_h0_sigma1; | |||
unsigned char *sigma0 = D_sigma0_h0_sigma1 + HASH_BYTES; | |||
unsigned char *h0 = D_sigma0_h0_sigma1 + 2 * HASH_BYTES; | |||
unsigned char *t1packed = D_sigma0_h0_sigma1 + 3 * HASH_BYTES; | |||
unsigned char *e1packed = D_sigma0_h0_sigma1 + 3 * HASH_BYTES + ROUNDS * NPACKED_BYTES; | |||
shake256ctx shakestate; | |||
unsigned char shakeblock[SHAKE256_RATE]; | |||
unsigned char h1[((ROUNDS + 7) & ~7) >> 3]; | |||
unsigned char rnd_seed[HASH_BYTES + SEED_BYTES]; | |||
unsigned char rho[2 * ROUNDS * HASH_BYTES]; | |||
unsigned char *rho0 = rho; | |||
unsigned char *rho1 = rho + ROUNDS * HASH_BYTES; | |||
gf31 sk_gf31[N]; | |||
gf31 rnd[(2 * N + M) * ROUNDS]; // Concatenated for easy RNG. | |||
gf31 *r0 = rnd; | |||
gf31 *t0 = rnd + N * ROUNDS; | |||
gf31 *e0 = rnd + 2 * N * ROUNDS; | |||
gf31 r1[N * ROUNDS]; | |||
gf31 t1[N * ROUNDS]; | |||
gf31 e1[M * ROUNDS]; | |||
gf31 gx[M * ROUNDS]; | |||
unsigned char packbuf0[NPACKED_BYTES]; | |||
unsigned char packbuf1[NPACKED_BYTES]; | |||
unsigned char packbuf2[MPACKED_BYTES]; | |||
unsigned char c[HASH_BYTES * ROUNDS * 2]; | |||
gf31 alpha; | |||
int alpha_count = 0; | |||
int b; | |||
int i, j; | |||
shake256incctx state; | |||
shake256(skbuf, SEED_BYTES * 4, sk, SEED_BYTES); | |||
PQCLEAN_MQDSS48_AVX2_gf31_nrand_schar(F, F_LEN, skbuf, SEED_BYTES); | |||
shake256_inc_init(&state); | |||
shake256_inc_absorb(&state, sk, SEED_BYTES); | |||
shake256_inc_absorb(&state, m, mlen); | |||
shake256_inc_finalize(&state); | |||
shake256_inc_squeeze(sig, HASH_BYTES, &state); // Compute R. | |||
shake256_inc_ctx_release(&state); | |||
memcpy(pk, skbuf, SEED_BYTES); | |||
PQCLEAN_MQDSS48_AVX2_gf31_nrand(sk_gf31, N, skbuf + SEED_BYTES, SEED_BYTES); | |||
PQCLEAN_MQDSS48_AVX2_MQ(pk_gf31, sk_gf31, F); | |||
PQCLEAN_MQDSS48_AVX2_vgf31_unique(pk_gf31, pk_gf31); | |||
PQCLEAN_MQDSS48_AVX2_gf31_npack(pk + SEED_BYTES, pk_gf31, M); | |||
shake256_inc_init(&state); | |||
shake256_inc_absorb(&state, pk, PK_BYTES); | |||
shake256_inc_absorb(&state, sig, HASH_BYTES); | |||
shake256_inc_absorb(&state, m, mlen); | |||
shake256_inc_finalize(&state); | |||
shake256_inc_squeeze(D, HASH_BYTES, &state); | |||
shake256_inc_ctx_release(&state); | |||
sig += HASH_BYTES; // Compensate for prefixed R. | |||
memcpy(rnd_seed, skbuf + 2 * SEED_BYTES, SEED_BYTES); | |||
memcpy(rnd_seed + SEED_BYTES, D, HASH_BYTES); | |||
shake256(rho, 2 * ROUNDS * HASH_BYTES, rnd_seed, SEED_BYTES + HASH_BYTES); | |||
memcpy(rnd_seed, skbuf + 3 * SEED_BYTES, SEED_BYTES); | |||
memcpy(rnd_seed + SEED_BYTES, D, HASH_BYTES); | |||
PQCLEAN_MQDSS48_AVX2_gf31_nrand(rnd, (2 * N + M) * ROUNDS, rnd_seed, SEED_BYTES + HASH_BYTES); | |||
for (i = 0; i < ROUNDS; i++) { | |||
for (j = 0; j < N; j++) { | |||
r1[j + i * N] = (gf31)(31 + sk_gf31[j] - r0[j + i * N]); | |||
} | |||
PQCLEAN_MQDSS48_AVX2_G(gx + i * M, t0 + i * N, r1 + i * N, F); | |||
} | |||
for (i = 0; i < ROUNDS * M; i++) { | |||
gx[i] = (gf31)(gx[i] + e0[i]); | |||
} | |||
for (i = 0; i < ROUNDS; i++) { | |||
PQCLEAN_MQDSS48_AVX2_gf31_npack(packbuf0, r0 + i * N, N); | |||
PQCLEAN_MQDSS48_AVX2_gf31_npack(packbuf1, t0 + i * N, N); | |||
PQCLEAN_MQDSS48_AVX2_gf31_npack(packbuf2, e0 + i * M, M); | |||
com_0(c + HASH_BYTES * (2 * i + 0), rho0 + i * HASH_BYTES, packbuf0, packbuf1, packbuf2); | |||
PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(r1 + i * N, r1 + i * N); | |||
PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(gx + i * M, gx + i * M); | |||
PQCLEAN_MQDSS48_AVX2_gf31_npack(packbuf0, r1 + i * N, N); | |||
PQCLEAN_MQDSS48_AVX2_gf31_npack(packbuf1, gx + i * M, M); | |||
com_1(c + HASH_BYTES * (2 * i + 1), rho1 + i * HASH_BYTES, packbuf0, packbuf1); | |||
} | |||
H(sigma0, c, HASH_BYTES * ROUNDS * 2); // Compute sigma_0. | |||
shake256_absorb(&shakestate, D_sigma0_h0_sigma1, 2 * HASH_BYTES); | |||
shake256_squeezeblocks(shakeblock, 1, &shakestate); | |||
memcpy(h0, shakeblock, HASH_BYTES); | |||
memcpy(sig, sigma0, HASH_BYTES); | |||
sig += HASH_BYTES; // Compensate for sigma_0. | |||
for (i = 0; i < ROUNDS; i++) { | |||
do { | |||
alpha = shakeblock[alpha_count] & 31; | |||
alpha_count++; | |||
if (alpha_count == SHAKE256_RATE) { | |||
alpha_count = 0; | |||
shake256_squeezeblocks(shakeblock, 1, &shakestate); | |||
} | |||
} while (alpha == 31); | |||
for (j = 0; j < N; j++) { | |||
t1[i * N + j] = (gf31)(alpha * r0[j + i * N] - t0[j + i * N] + 31); | |||
} | |||
PQCLEAN_MQDSS48_AVX2_MQ(e1 + i * M, r0 + i * N, F); | |||
for (j = 0; j < N; j++) { | |||
e1[i * N + j] = (gf31)(alpha * e1[j + i * M] - e0[j + i * M] + 31); | |||
} | |||
PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(t1 + i * N, t1 + i * N); | |||
PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(e1 + i * N, e1 + i * N); | |||
} | |||
shake256_ctx_release(&shakestate); | |||
PQCLEAN_MQDSS48_AVX2_gf31_npack(t1packed, t1, N * ROUNDS); | |||
PQCLEAN_MQDSS48_AVX2_gf31_npack(e1packed, e1, M * ROUNDS); | |||
memcpy(sig, t1packed, NPACKED_BYTES * ROUNDS); | |||
sig += NPACKED_BYTES * ROUNDS; | |||
memcpy(sig, e1packed, MPACKED_BYTES * ROUNDS); | |||
sig += MPACKED_BYTES * ROUNDS; | |||
shake256(h1, ((ROUNDS + 7) & ~7) >> 3, D_sigma0_h0_sigma1, 3 * HASH_BYTES + ROUNDS * (NPACKED_BYTES + MPACKED_BYTES)); | |||
for (i = 0; i < ROUNDS; i++) { | |||
b = (h1[(i >> 3)] >> (i & 7)) & 1; | |||
if (b == 0) { | |||
PQCLEAN_MQDSS48_AVX2_gf31_npack(sig, r0 + i * N, N); | |||
} else if (b == 1) { | |||
PQCLEAN_MQDSS48_AVX2_gf31_npack(sig, r1 + i * N, N); | |||
} | |||
memcpy(sig + NPACKED_BYTES, c + HASH_BYTES * (2 * i + (1 - b)), HASH_BYTES); | |||
memcpy(sig + NPACKED_BYTES + HASH_BYTES, rho + (i + b * ROUNDS) * HASH_BYTES, HASH_BYTES); | |||
sig += NPACKED_BYTES + 2 * HASH_BYTES; | |||
} | |||
*siglen = SIG_LEN; | |||
return 0; | |||
} | |||
/** | |||
* Verifies a detached signature and message under a given public key. | |||
*/ | |||
int PQCLEAN_MQDSS48_AVX2_crypto_sign_verify( | |||
const uint8_t *sig, size_t siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk) { | |||
gf31 r[N]; | |||
gf31 t[N]; | |||
gf31 e[M]; | |||
signed char F[F_LEN]; | |||
gf31 pk_gf31[M]; | |||
// Concatenated for convenient hashing. | |||
unsigned char D_sigma0_h0_sigma1[HASH_BYTES * 3 + ROUNDS * (NPACKED_BYTES + MPACKED_BYTES)]; | |||
unsigned char *D = D_sigma0_h0_sigma1; | |||
unsigned char *sigma0 = D_sigma0_h0_sigma1 + HASH_BYTES; | |||
unsigned char *h0 = D_sigma0_h0_sigma1 + 2 * HASH_BYTES; | |||
unsigned char *t1packed = D_sigma0_h0_sigma1 + 3 * HASH_BYTES; | |||
unsigned char *e1packed = D_sigma0_h0_sigma1 + 3 * HASH_BYTES + ROUNDS * NPACKED_BYTES; | |||
unsigned char h1[((ROUNDS + 7) & ~7) >> 3]; | |||
unsigned char c[HASH_BYTES * ROUNDS * 2]; | |||
memset(c, 0, HASH_BYTES * 2); | |||
gf31 x[N]; | |||
gf31 y[M]; | |||
gf31 z[M]; | |||
unsigned char packbuf0[NPACKED_BYTES]; | |||
unsigned char packbuf1[MPACKED_BYTES]; | |||
shake256ctx shakestate; | |||
unsigned char shakeblock[SHAKE256_RATE]; | |||
int i, j; | |||
gf31 alpha; | |||
int alpha_count = 0; | |||
int b; | |||
shake256incctx state; | |||
if (siglen != SIG_LEN) { | |||
return -1; | |||
} | |||
shake256_inc_init(&state); | |||
shake256_inc_absorb(&state, pk, PK_BYTES); | |||
shake256_inc_absorb(&state, sig, HASH_BYTES); | |||
shake256_inc_absorb(&state, m, mlen); | |||
shake256_inc_finalize(&state); | |||
shake256_inc_squeeze(D, HASH_BYTES, &state); | |||
shake256_inc_ctx_release(&state); | |||
sig += HASH_BYTES; | |||
PQCLEAN_MQDSS48_AVX2_gf31_nrand_schar(F, F_LEN, pk, SEED_BYTES); | |||
pk += SEED_BYTES; | |||
PQCLEAN_MQDSS48_AVX2_gf31_nunpack(pk_gf31, pk, M); | |||
memcpy(sigma0, sig, HASH_BYTES); | |||
shake256_absorb(&shakestate, D_sigma0_h0_sigma1, 2 * HASH_BYTES); | |||
shake256_squeezeblocks(shakeblock, 1, &shakestate); | |||
memcpy(h0, shakeblock, HASH_BYTES); | |||
sig += HASH_BYTES; | |||
memcpy(t1packed, sig, ROUNDS * NPACKED_BYTES); | |||
sig += ROUNDS * NPACKED_BYTES; | |||
memcpy(e1packed, sig, ROUNDS * MPACKED_BYTES); | |||
sig += ROUNDS * MPACKED_BYTES; | |||
shake256(h1, ((ROUNDS + 7) & ~7) >> 3, D_sigma0_h0_sigma1, 3 * HASH_BYTES + ROUNDS * (NPACKED_BYTES + MPACKED_BYTES)); | |||
for (i = 0; i < ROUNDS; i++) { | |||
do { | |||
alpha = shakeblock[alpha_count] & 31; | |||
alpha_count++; | |||
if (alpha_count == SHAKE256_RATE) { | |||
alpha_count = 0; | |||
shake256_squeezeblocks(shakeblock, 1, &shakestate); | |||
} | |||
} while (alpha == 31); | |||
b = (h1[(i >> 3)] >> (i & 7)) & 1; | |||
PQCLEAN_MQDSS48_AVX2_gf31_nunpack(r, sig, N); | |||
PQCLEAN_MQDSS48_AVX2_gf31_nunpack(t, t1packed + NPACKED_BYTES * i, N); | |||
PQCLEAN_MQDSS48_AVX2_gf31_nunpack(e, e1packed + MPACKED_BYTES * i, M); | |||
if (b == 0) { | |||
PQCLEAN_MQDSS48_AVX2_MQ(y, r, F); | |||
for (j = 0; j < N; j++) { | |||
x[j] = (gf31)(alpha * r[j] - t[j] + 31); | |||
} | |||
for (j = 0; j < N; j++) { | |||
y[j] = (gf31)(alpha * y[j] - e[j] + 31); | |||
} | |||
PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(x, x); | |||
PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(y, y); | |||
PQCLEAN_MQDSS48_AVX2_gf31_npack(packbuf0, x, N); | |||
PQCLEAN_MQDSS48_AVX2_gf31_npack(packbuf1, y, M); | |||
com_0(c + HASH_BYTES * (2 * i + 0), sig + HASH_BYTES + NPACKED_BYTES, sig, packbuf0, packbuf1); | |||
} else { | |||
PQCLEAN_MQDSS48_AVX2_MQ(y, r, F); | |||
PQCLEAN_MQDSS48_AVX2_G(z, t, r, F); | |||
for (j = 0; j < N; j++) { | |||
y[j] = (gf31)(alpha * (31 + pk_gf31[j] - y[j]) - z[j] - e[j] + 62); | |||
} | |||
PQCLEAN_MQDSS48_AVX2_vgf31_shorten_unique(y, y); | |||
PQCLEAN_MQDSS48_AVX2_gf31_npack(packbuf0, y, M); | |||
com_1(c + HASH_BYTES * (2 * i + 1), sig + HASH_BYTES + NPACKED_BYTES, sig, packbuf0); | |||
} | |||
memcpy(c + HASH_BYTES * (2 * i + (1 - b)), sig + NPACKED_BYTES, HASH_BYTES); | |||
sig += NPACKED_BYTES + 2 * HASH_BYTES; | |||
} | |||
shake256_ctx_release(&shakestate); | |||
H(c, c, HASH_BYTES * ROUNDS * 2); | |||
if (memcmp(c, sigma0, HASH_BYTES) != 0) { | |||
return -1; | |||
} | |||
return 0; | |||
} | |||
/** | |||
* Returns an array containing the signature followed by the message. | |||
*/ | |||
int PQCLEAN_MQDSS48_AVX2_crypto_sign( | |||
uint8_t *sm, size_t *smlen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk) { | |||
size_t siglen; | |||
PQCLEAN_MQDSS48_AVX2_crypto_sign_signature( | |||
sm, &siglen, m, mlen, sk); | |||
memmove(sm + SIG_LEN, m, mlen); | |||
*smlen = siglen + mlen; | |||
return 0; | |||
} | |||
/** | |||
* Verifies a given signature-message pair under a given public key. | |||
*/ | |||
int PQCLEAN_MQDSS48_AVX2_crypto_sign_open( | |||
uint8_t *m, size_t *mlen, | |||
const uint8_t *sm, size_t smlen, const uint8_t *pk) { | |||
/* The API caller does not necessarily know what size a signature should be | |||
but MQDSS signatures are always exactly SIG_LEN. */ | |||
if (smlen < SIG_LEN) { | |||
memset(m, 0, smlen); | |||
*mlen = 0; | |||
return -1; | |||
} | |||
*mlen = smlen - SIG_LEN; | |||
if (PQCLEAN_MQDSS48_AVX2_crypto_sign_verify( | |||
sm, SIG_LEN, sm + SIG_LEN, *mlen, pk)) { | |||
memset(m, 0, smlen); | |||
*mlen = 0; | |||
return -1; | |||
} | |||
/* If verification was successful, move the message to the right place. */ | |||
memmove(m, sm + SIG_LEN, *mlen); | |||
return 0; | |||
} |
@@ -1,4 +1,3 @@ | |||
#include <assert.h> | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#include <string.h> | |||
@@ -16,3 +16,12 @@ auxiliary-submitters: | |||
implementations: | |||
- name: clean | |||
version: https://github.com/joostrijneveld/MQDSS/commit/00608d7610262ff07b1834885d32bc3fd27ef5e1 | |||
- name: avx2 | |||
version: https://github.com/joostrijneveld/MQDSS/commit/00608d7610262ff07b1834885d32bc3fd27ef5e1 | |||
supported_platforms: | |||
- architecture: x86_64 | |||
required_flags: | |||
- avx2 | |||
- architecture: x86 | |||
required_flags: | |||
- avx2 |
@@ -0,0 +1,116 @@ | |||
CC0 1.0 Universal | |||
Statement of Purpose | |||
The laws of most jurisdictions throughout the world automatically confer | |||
exclusive Copyright and Related Rights (defined below) upon the creator and | |||
subsequent owner(s) (each and all, an "owner") of an original work of | |||
authorship and/or a database (each, a "Work"). | |||
Certain owners wish to permanently relinquish those rights to a Work for the | |||
purpose of contributing to a commons of creative, cultural and scientific | |||
works ("Commons") that the public can reliably and without fear of later | |||
claims of infringement build upon, modify, incorporate in other works, reuse | |||
and redistribute as freely as possible in any form whatsoever and for any | |||
purposes, including without limitation commercial purposes. These owners may | |||
contribute to the Commons to promote the ideal of a free culture and the | |||
further production of creative, cultural and scientific works, or to gain | |||
reputation or greater distribution for their Work in part through the use and | |||
efforts of others. | |||
For these and/or other purposes and motivations, and without any expectation | |||
of additional consideration or compensation, the person associating CC0 with a | |||
Work (the "Affirmer"), to the extent that he or she is an owner of Copyright | |||
and Related Rights in the Work, voluntarily elects to apply CC0 to the Work | |||
and publicly distribute the Work under its terms, with knowledge of his or her | |||
Copyright and Related Rights in the Work and the meaning and intended legal | |||
effect of CC0 on those rights. | |||
1. Copyright and Related Rights. A Work made available under CC0 may be | |||
protected by copyright and related or neighboring rights ("Copyright and | |||
Related Rights"). Copyright and Related Rights include, but are not limited | |||
to, the following: | |||
i. the right to reproduce, adapt, distribute, perform, display, communicate, | |||
and translate a Work; | |||
ii. moral rights retained by the original author(s) and/or performer(s); | |||
iii. publicity and privacy rights pertaining to a person's image or likeness | |||
depicted in a Work; | |||
iv. rights protecting against unfair competition in regards to a Work, | |||
subject to the limitations in paragraph 4(a), below; | |||
v. rights protecting the extraction, dissemination, use and reuse of data in | |||
a Work; | |||
vi. database rights (such as those arising under Directive 96/9/EC of the | |||
European Parliament and of the Council of 11 March 1996 on the legal | |||
protection of databases, and under any national implementation thereof, | |||
including any amended or successor version of such directive); and | |||
vii. other similar, equivalent or corresponding rights throughout the world | |||
based on applicable law or treaty, and any national implementations thereof. | |||
2. Waiver. To the greatest extent permitted by, but not in contravention of, | |||
applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and | |||
unconditionally waives, abandons, and surrenders all of Affirmer's Copyright | |||
and Related Rights and associated claims and causes of action, whether now | |||
known or unknown (including existing as well as future claims and causes of | |||
action), in the Work (i) in all territories worldwide, (ii) for the maximum | |||
duration provided by applicable law or treaty (including future time | |||
extensions), (iii) in any current or future medium and for any number of | |||
copies, and (iv) for any purpose whatsoever, including without limitation | |||
commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes | |||
the Waiver for the benefit of each member of the public at large and to the | |||
detriment of Affirmer's heirs and successors, fully intending that such Waiver | |||
shall not be subject to revocation, rescission, cancellation, termination, or | |||
any other legal or equitable action to disrupt the quiet enjoyment of the Work | |||
by the public as contemplated by Affirmer's express Statement of Purpose. | |||
3. Public License Fallback. Should any part of the Waiver for any reason be | |||
judged legally invalid or ineffective under applicable law, then the Waiver | |||
shall be preserved to the maximum extent permitted taking into account | |||
Affirmer's express Statement of Purpose. In addition, to the extent the Waiver | |||
is so judged Affirmer hereby grants to each affected person a royalty-free, | |||
non transferable, non sublicensable, non exclusive, irrevocable and | |||
unconditional license to exercise Affirmer's Copyright and Related Rights in | |||
the Work (i) in all territories worldwide, (ii) for the maximum duration | |||
provided by applicable law or treaty (including future time extensions), (iii) | |||
in any current or future medium and for any number of copies, and (iv) for any | |||
purpose whatsoever, including without limitation commercial, advertising or | |||
promotional purposes (the "License"). The License shall be deemed effective as | |||
of the date CC0 was applied by Affirmer to the Work. Should any part of the | |||
License for any reason be judged legally invalid or ineffective under | |||
applicable law, such partial invalidity or ineffectiveness shall not | |||
invalidate the remainder of the License, and in such case Affirmer hereby | |||
affirms that he or she will not (i) exercise any of his or her remaining | |||
Copyright and Related Rights in the Work or (ii) assert any associated claims | |||
and causes of action with respect to the Work, in either case contrary to | |||
Affirmer's express Statement of Purpose. | |||
4. Limitations and Disclaimers. | |||
a. No trademark or patent rights held by Affirmer are waived, abandoned, | |||
surrendered, licensed or otherwise affected by this document. | |||
b. Affirmer offers the Work as-is and makes no representations or warranties | |||
of any kind concerning the Work, express, implied, statutory or otherwise, | |||
including without limitation warranties of title, merchantability, fitness | |||
for a particular purpose, non infringement, or the absence of latent or | |||
other defects, accuracy, or the present or absence of errors, whether or not | |||
discoverable, all to the greatest extent permissible under applicable law. | |||
c. Affirmer disclaims responsibility for clearing rights of other persons | |||
that may apply to the Work or any use thereof, including without limitation | |||
any person's Copyright and Related Rights in the Work. Further, Affirmer | |||
disclaims responsibility for obtaining any necessary consents, permissions | |||
or other rights required for any use of the Work. | |||
d. Affirmer understands and acknowledges that Creative Commons is not a | |||
party to this document and has no duty or obligation with respect to this | |||
CC0 or use of the Work. | |||
For more information, please see | |||
<http://creativecommons.org/publicdomain/zero/1.0/> |
@@ -0,0 +1,22 @@ | |||
# This Makefile can be used with GNU Make or BSD Make | |||
LIB=libmqdss-64_avx2.a | |||
HEADERS = params.h gf31.h mq.h api.h | |||
OBJECTS = gf31.o mq.o sign.o | |||
CFLAGS=-O3 -Wall -Wconversion -Wextra -Wpedantic -Wvla -Werror \ | |||
-Wmissing-prototypes -Wredundant-decls -std=c99 -mavx2 \ | |||
-I../../../common $(EXTRAFLAGS) | |||
all: $(LIB) | |||
%.o: %.c $(HEADERS) | |||
$(CC) $(CFLAGS) -c -o $@ $< | |||
$(LIB): $(OBJECTS) | |||
$(AR) -r $@ $(OBJECTS) | |||
clean: | |||
$(RM) $(OBJECTS) | |||
$(RM) $(LIB) |
@@ -0,0 +1,19 @@ | |||
# This Makefile can be used with Microsoft Visual Studio's nmake using the command: | |||
# nmake /f Makefile.Microsoft_nmake | |||
LIBRARY=libmqdss-64_clean.lib | |||
OBJECTS=gf31.obj mq.obj sign.obj | |||
CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX /arch:AVX2 | |||
all: $(LIBRARY) | |||
# Make sure objects are recompiled if headers change. | |||
$(OBJECTS): *.h | |||
$(LIBRARY): $(OBJECTS) | |||
LIB.EXE /NOLOGO /WX /OUT:$@ $** | |||
clean: | |||
-DEL $(OBJECTS) | |||
-DEL $(LIBRARY) |
@@ -0,0 +1,47 @@ | |||
#ifndef PQCLEAN_MQDSS64_AVX2_API_H | |||
#define PQCLEAN_MQDSS64_AVX2_API_H | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#define PQCLEAN_MQDSS64_AVX2_CRYPTO_ALGNAME "MQDSS-64" | |||
#define PQCLEAN_MQDSS64_AVX2_CRYPTO_SECRETKEYBYTES 24 | |||
#define PQCLEAN_MQDSS64_AVX2_CRYPTO_PUBLICKEYBYTES 64 | |||
#define PQCLEAN_MQDSS64_AVX2_CRYPTO_BYTES 59928 | |||
/* | |||
* Generates an MQDSS key pair. | |||
*/ | |||
int PQCLEAN_MQDSS64_AVX2_crypto_sign_keypair( | |||
uint8_t *pk, uint8_t *sk); | |||
/** | |||
* Returns an array containing a detached signature. | |||
*/ | |||
int PQCLEAN_MQDSS64_AVX2_crypto_sign_signature( | |||
uint8_t *sig, size_t *siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk); | |||
/** | |||
* Verifies a detached signature and message under a given public key. | |||
*/ | |||
int PQCLEAN_MQDSS64_AVX2_crypto_sign_verify( | |||
const uint8_t *sig, size_t siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk); | |||
/** | |||
* Returns an array containing the signature followed by the message. | |||
*/ | |||
int PQCLEAN_MQDSS64_AVX2_crypto_sign( | |||
uint8_t *sm, size_t *smlen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk); | |||
/** | |||
* Verifies a given signature-message pair under a given public key. | |||
*/ | |||
int PQCLEAN_MQDSS64_AVX2_crypto_sign_open( | |||
uint8_t *m, size_t *mlen, | |||
const uint8_t *sm, size_t smlen, const uint8_t *pk); | |||
#endif |
@@ -0,0 +1,128 @@ | |||
#include "params.h" | |||
#include "fips202.h" | |||
#include "gf31.h" | |||
#include <assert.h> | |||
#include <immintrin.h> | |||
#include <stdint.h> | |||
#include <string.h> | |||
/* Given a vector of N elements in the range [0, 31], this reduces the elements | |||
to the range [0, 30] by mapping 31 to 0 (i.e reduction mod 31) */ | |||
void PQCLEAN_MQDSS64_AVX2_vgf31_unique(gf31 *out, gf31 *in) { | |||
__m256i x; | |||
__m256i _w31 = _mm256_set1_epi16(31); | |||
int i; | |||
for (i = 0; i < (N >> 4); ++i) { | |||
x = _mm256_loadu_si256((__m256i const *) (in + 16 * i)); | |||
x = _mm256_xor_si256(x, _mm256_and_si256(_w31, _mm256_cmpeq_epi16(x, _w31))); | |||
_mm256_storeu_si256((__m256i *)(out + i * 16), x); | |||
} | |||
} | |||
/* This function acts on vectors with 64 gf31 elements. | |||
It performs one reduction step and guarantees output in [0, 30], | |||
but requires input to be in [0, 32768). */ | |||
void PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(gf31 *out, gf31 *in) { | |||
__m256i x; | |||
__m256i _w2114 = _mm256_set1_epi32(2114 * 65536 + 2114); | |||
__m256i _w31 = _mm256_set1_epi16(31); | |||
int i; | |||
for (i = 0; i < (N >> 4); ++i) { | |||
x = _mm256_loadu_si256((__m256i const *) (in + 16 * i)); | |||
x = _mm256_sub_epi16(x, _mm256_mullo_epi16(_w31, _mm256_mulhi_epi16(x, _w2114))); | |||
x = _mm256_xor_si256(x, _mm256_and_si256(_w31, _mm256_cmpeq_epi16(x, _w31))); | |||
_mm256_storeu_si256((__m256i *)(out + i * 16), x); | |||
} | |||
} | |||
/* Given a seed, samples len gf31 elements (in the range [0, 30]), and places | |||
them in a vector of 16-bit elements */ | |||
void PQCLEAN_MQDSS64_AVX2_gf31_nrand(gf31 *out, size_t len, const uint8_t *seed, size_t seedlen) { | |||
size_t i = 0, j; | |||
shake256ctx shakestate; | |||
uint8_t shakeblock[SHAKE256_RATE]; | |||
shake256_absorb(&shakestate, seed, seedlen); | |||
while (i < len) { | |||
shake256_squeezeblocks(shakeblock, 1, &shakestate); | |||
for (j = 0; j < SHAKE256_RATE && i < len; j++) { | |||
if ((shakeblock[j] & 31) != 31) { | |||
out[i] = (shakeblock[j] & 31); | |||
i++; | |||
} | |||
} | |||
} | |||
shake256_ctx_release(&shakestate); | |||
} | |||
/* Given a seed, samples len gf31 elements, transposed into unsigned range, | |||
i.e. in the range [-15, 15], and places them in an array of 8-bit integers. | |||
This is used for the expansion of F, which wants packed elements. */ | |||
void PQCLEAN_MQDSS64_AVX2_gf31_nrand_schar(signed char *out, size_t len, const uint8_t *seed, size_t seedlen) { | |||
size_t i = 0, j; | |||
shake256ctx shakestate; | |||
uint8_t shakeblock[SHAKE256_RATE]; | |||
shake256_absorb(&shakestate, seed, seedlen); | |||
while (i < len) { | |||
shake256_squeezeblocks(shakeblock, 1, &shakestate); | |||
for (j = 0; j < SHAKE256_RATE && i < len; j++) { | |||
if ((shakeblock[j] & 31) != 31) { | |||
out[i] = (signed char)((shakeblock[j] & 31) - 15); | |||
i++; | |||
} | |||
} | |||
} | |||
shake256_ctx_release(&shakestate); | |||
} | |||
/* Unpacks an array of packed GF31 elements to one element per gf31. | |||
Assumes that there is sufficient empty space available at the end of the | |||
array to unpack. Can perform in-place. */ | |||
void PQCLEAN_MQDSS64_AVX2_gf31_nunpack(gf31 *out, const uint8_t *in, size_t n) { | |||
size_t i; | |||
size_t j = ((n * 5) >> 3) - 1; | |||
unsigned int d = 0; | |||
for (i = n; i > 0; i--) { | |||
out[i - 1] = (gf31)((in[j] >> d) & 31); | |||
d += 5; | |||
if (d > 8) { | |||
d -= 8; | |||
j--; | |||
out[i - 1] = (gf31)(out[i - 1] ^ ((in[j] << (5 - d)) & 31)); | |||
} | |||
} | |||
} | |||
/* Packs an array of GF31 elements from gf31's to concatenated 5-bit values. | |||
Assumes that there is sufficient space available to unpack. | |||
Can perform in-place. */ | |||
void PQCLEAN_MQDSS64_AVX2_gf31_npack(uint8_t *out, const gf31 *in, size_t n) { | |||
unsigned int i = 0; | |||
unsigned int j; | |||
int d = 3; | |||
for (j = 0; j < n; j++) { | |||
assert(in[j] < 31); | |||
} | |||
/* There will be ceil(5n / 8) output blocks */ | |||
memset(out, 0, (size_t)((5 * n + 7) & ~7U) >> 3); | |||
for (j = 0; j < n; j++) { | |||
if (d < 0) { | |||
d += 8; | |||
out[i] = (uint8_t)((out[i] & (255 << (d - 3))) | | |||
((in[j] >> (8 - d)) & ~(255 << (d - 3)))); | |||
i++; | |||
} | |||
out[i] = (uint8_t)((out[i] & ~(31 << d)) | ((in[j] << d) & (31 << d))); | |||
d -= 5; | |||
} | |||
} |
@@ -0,0 +1,36 @@ | |||
#ifndef MQDSS_GF31_H | |||
#define MQDSS_GF31_H | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
typedef unsigned short gf31; | |||
/* Given a vector of elements in the range [0, 31], this reduces the elements | |||
to the range [0, 30] by mapping 31 to 0 (i.e reduction mod 31) */ | |||
void PQCLEAN_MQDSS64_AVX2_vgf31_unique(gf31 *out, gf31 *in); | |||
/* Given a vector of 16-bit integers (i.e. in [0, 65535], this reduces the | |||
elements to the range [0, 30] by mapping 31 to 0 (i.e reduction mod 31) */ | |||
void PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(gf31 *out, gf31 *in); | |||
/* Given a seed, samples len gf31 elements (in the range [0, 30]), and places | |||
them in a vector of 16-bit elements */ | |||
void PQCLEAN_MQDSS64_AVX2_gf31_nrand(gf31 *out, size_t len, const uint8_t *seed, size_t seedlen); | |||
/* Given a seed, samples len gf31 elements, transposed into unsigned range, | |||
i.e. in the range [-15, 15], and places them in an array of 8-bit integers. | |||
This is used for the expansion of F, which wants packed elements. */ | |||
void PQCLEAN_MQDSS64_AVX2_gf31_nrand_schar(signed char *out, size_t len, const uint8_t *seed, size_t seedlen); | |||
/* Unpacks an array of packed GF31 elements to one element per gf31. | |||
Assumes that there is sufficient empty space available at the end of the | |||
array to unpack. Can perform in-place. */ | |||
void PQCLEAN_MQDSS64_AVX2_gf31_nunpack(gf31 *out, const uint8_t *in, size_t n); | |||
/* Packs an array of GF31 elements from gf31's to concatenated 5-bit values. | |||
Assumes that there is sufficient space available to unpack. | |||
Can perform in-place. */ | |||
void PQCLEAN_MQDSS64_AVX2_gf31_npack(uint8_t *out, const gf31 *in, size_t n); | |||
#endif |
@@ -0,0 +1,239 @@ | |||
#include "mq.h" | |||
#include "params.h" | |||
#include <immintrin.h> | |||
#include <stdio.h> | |||
static inline __m256i reduce_16(__m256i r, __m256i _w31, __m256i _w2114) { | |||
__m256i exp = _mm256_mulhi_epi16(r, _w2114); | |||
return _mm256_sub_epi16(r, _mm256_mullo_epi16(_w31, exp)); | |||
} | |||
/* Computes all products x_i * x_j, returns in reduced form */ | |||
inline static | |||
void generate_quadratic_terms( unsigned char *xij, const gf31 *x ) { | |||
__m256i mask_2114 = _mm256_set1_epi16( 2114 ); | |||
__m256i mask_31 = _mm256_set1_epi16( 31 ); | |||
__m256i xi[4]; | |||
xi[0] = _mm256_loadu_si256((__m256i const *) (x)); | |||
xi[1] = _mm256_loadu_si256((__m256i const *) (x + 16)); | |||
xi[2] = _mm256_loadu_si256((__m256i const *) (x + 32)); | |||
xi[3] = _mm256_loadu_si256((__m256i const *) (x + 48)); | |||
__m256i xixj[4]; | |||
xixj[0] = _mm256_setzero_si256(); | |||
xixj[1] = _mm256_setzero_si256(); | |||
xixj[2] = _mm256_setzero_si256(); | |||
xixj[3] = _mm256_setzero_si256(); | |||
int k = 0; | |||
for (int i = 0; i < 32; i++) { | |||
__m256i br_xi = _mm256_set1_epi16( (short)x[i] ); | |||
for (int j = 0; j <= (i >> 4); j++) { | |||
xixj[j] = _mm256_mullo_epi16( xi[j], br_xi ); | |||
xixj[j] = reduce_16( xixj[j], mask_31, mask_2114 ); | |||
} | |||
__m256i r = _mm256_packs_epi16(xixj[0], xixj[1]); | |||
r = _mm256_permute4x64_epi64(r, 0xd8); // 3,1,2,0 | |||
_mm256_storeu_si256( (__m256i *)( xij + k ), r ); | |||
k += i + 1; | |||
} | |||
for (int i = 32; i < N; i++) { | |||
__m256i br_xi = _mm256_set1_epi16( (short)x[i] ); | |||
for (int j = 0; j <= (i >> 4); j++) { | |||
xixj[j] = _mm256_mullo_epi16( xi[j], br_xi ); | |||
xixj[j] = reduce_16( xixj[j], mask_31, mask_2114 ); | |||
} | |||
__m256i r0 = _mm256_packs_epi16(xixj[0], xixj[1]); | |||
r0 = _mm256_permute4x64_epi64(r0, 0xd8); // 3,1,2,0 | |||
_mm256_storeu_si256( (__m256i *)( xij + k ), r0 ); | |||
__m256i r1 = _mm256_packs_epi16(xixj[2], xixj[3]); | |||
r1 = _mm256_permute4x64_epi64(r1, 0xd8); // 3,1,2,0 | |||
_mm256_storeu_si256( (__m256i *)( xij + 32 + k ), r1 ); | |||
k += i + 1; | |||
} | |||
} | |||
/* Computes all terms (x_i * y_j) + (x_j * y_i), returns in reduced form */ | |||
inline static | |||
void generate_xiyj_p_xjyi_terms( unsigned char *xij, const gf31 *x, const gf31 *y ) { | |||
__m256i mask_2114 = _mm256_set1_epi16( 2114 ); | |||
__m256i mask_31 = _mm256_set1_epi16( 31 ); | |||
__m256i xiyi[4]; | |||
xiyi[0] = _mm256_xor_si256(_mm256_loadu_si256((__m256i const *) (x)), _mm256_slli_si256( _mm256_loadu_si256((__m256i const *) (y)), 1 )); | |||
xiyi[1] = _mm256_xor_si256(_mm256_loadu_si256((__m256i const *) (x + 16)), _mm256_slli_si256( _mm256_loadu_si256((__m256i const *) (y + 16)), 1 )); | |||
xiyi[2] = _mm256_xor_si256(_mm256_loadu_si256((__m256i const *) (x + 32)), _mm256_slli_si256( _mm256_loadu_si256((__m256i const *) (y + 32)), 1 )); | |||
xiyi[3] = _mm256_xor_si256(_mm256_loadu_si256((__m256i const *) (x + 48)), _mm256_slli_si256( _mm256_loadu_si256((__m256i const *) (y + 48)), 1 )); | |||
__m256i xixj[4]; | |||
xixj[0] = _mm256_setzero_si256(); | |||
xixj[1] = _mm256_setzero_si256(); | |||
xixj[2] = _mm256_setzero_si256(); | |||
xixj[3] = _mm256_setzero_si256(); | |||
int k = 0; | |||
for (int i = 0; i < 32; i++) { | |||
__m256i br_yixi = _mm256_set1_epi16( (short)((x[i] << 8)^y[i]) ); | |||
for (int j = 0; j <= (i >> 4); j++) { | |||
xixj[j] = _mm256_maddubs_epi16( xiyi[j], br_yixi ); | |||
xixj[j] = reduce_16( xixj[j], mask_31, mask_2114 ); | |||
} | |||
__m256i r = _mm256_packs_epi16(xixj[0], xixj[1]); | |||
r = _mm256_permute4x64_epi64(r, 0xd8); // 3,1,2,0 | |||
_mm256_storeu_si256( (__m256i *)( xij + k ), r ); | |||
k += i + 1; | |||
} | |||
for (int i = 32; i < N; i++) { | |||
__m256i br_yixi = _mm256_set1_epi16( (short)((x[i] << 8)^y[i]) ); | |||
for (int j = 0; j <= (i >> 4); j++) { | |||
xixj[j] = _mm256_maddubs_epi16( xiyi[j], br_yixi ); | |||
xixj[j] = reduce_16( xixj[j], mask_31, mask_2114 ); | |||
} | |||
__m256i r0 = _mm256_packs_epi16(xixj[0], xixj[1]); | |||
r0 = _mm256_permute4x64_epi64(r0, 0xd8); // 3,1,2,0 | |||
_mm256_storeu_si256( (__m256i *)( xij + k ), r0 ); | |||
__m256i r1 = _mm256_packs_epi16(xixj[2], xixj[3]); | |||
r1 = _mm256_permute4x64_epi64(r1, 0xd8); // 3,1,2,0 | |||
_mm256_storeu_si256( (__m256i *)( xij + 32 + k ), r1 ); | |||
k += i + 1; | |||
} | |||
} | |||
#define EVAL_YMM_0(xx) {\ | |||
__m128i tmp = _mm256_castsi256_si128(xx); \ | |||
for (int macro_i = 0; macro_i < 8; macro_i++) { \ | |||
__m256i _xi = _mm256_broadcastw_epi16(tmp); \ | |||
tmp = _mm_srli_si128(tmp, 2); \ | |||
for (int macro_j = 0; macro_j < (N/16); macro_j++) { \ | |||
__m256i coeff = _mm256_loadu_si256((__m256i const *) F); \ | |||
F += 32; \ | |||
yy[macro_j] = _mm256_add_epi16(yy[macro_j], _mm256_maddubs_epi16(_xi, coeff)); \ | |||
} \ | |||
} \ | |||
} | |||
#define EVAL_YMM_1(xx) {\ | |||
__m128i tmp = _mm256_extracti128_si256(xx, 1); \ | |||
for (int macro_i = 0; macro_i < 8; macro_i++) { \ | |||
__m256i _xi = _mm256_broadcastw_epi16(tmp); \ | |||
tmp = _mm_srli_si128(tmp, 2); \ | |||
for (int macro_j = 0; macro_j < (N/16); macro_j++) { \ | |||
__m256i coeff = _mm256_loadu_si256((__m256i const *) F); \ | |||
F += 32; \ | |||
yy[macro_j] = _mm256_add_epi16(yy[macro_j], _mm256_maddubs_epi16(_xi, coeff)); \ | |||
} \ | |||
} \ | |||
} | |||
#define REDUCE_(yy) { \ | |||
(yy)[0] = reduce_16((yy)[0], mask_reduce, mask_2114); \ | |||
(yy)[1] = reduce_16((yy)[1], mask_reduce, mask_2114); \ | |||
(yy)[2] = reduce_16((yy)[2], mask_reduce, mask_2114); \ | |||
(yy)[3] = reduce_16((yy)[3], mask_reduce, mask_2114); \ | |||
} | |||
/* Evaluates the MQ function on a vector of N gf31 elements x (expected to be | |||
in reduced 5-bit representation). Expects the coefficients in F to be in | |||
signed representation (i.e. [-15, 15], packed bytewise). | |||
Outputs M gf31 elements in unique 16-bit representation as fx. */ | |||
void PQCLEAN_MQDSS64_AVX2_MQ(gf31 *fx, const gf31 *x, const signed char *F) { | |||
__m256i mask_2114 = _mm256_set1_epi32(2114 * 65536 + 2114); | |||
__m256i mask_reduce = _mm256_srli_epi16(_mm256_cmpeq_epi16(mask_2114, mask_2114), 11); | |||
__m256i xi[4]; | |||
xi[0] = _mm256_loadu_si256((__m256i const *) (x)); | |||
xi[1] = _mm256_loadu_si256((__m256i const *) (x + 16)); | |||
xi[2] = _mm256_loadu_si256((__m256i const *) (x + 32)); | |||
xi[3] = _mm256_loadu_si256((__m256i const *) (x + 48)); | |||
__m256i _zero = _mm256_setzero_si256(); | |||
xi[0] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_zero, xi[0])), xi[0]); | |||
xi[1] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_zero, xi[1])), xi[1]); | |||
xi[2] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_zero, xi[2])), xi[2]); | |||
xi[3] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_zero, xi[3])), xi[3]); | |||
__m256i x1 = _mm256_packs_epi16(xi[0], xi[1]); | |||
x1 = _mm256_permute4x64_epi64(x1, 0xd8); // 3,1,2,0 | |||
__m256i x2 = _mm256_packs_epi16(xi[2], xi[3]); | |||
x2 = _mm256_permute4x64_epi64(x2, 0xd8); // 3,1,2,0 | |||
__m256i yy[M / 16]; | |||
yy[0] = _zero; | |||
yy[1] = _zero; | |||
yy[2] = _zero; | |||
yy[3] = _zero; | |||
EVAL_YMM_0(x1) | |||
EVAL_YMM_1(x1) | |||
EVAL_YMM_0(x2) | |||
EVAL_YMM_1(x2) | |||
REDUCE_(yy) | |||
__m256i xixj[65]; | |||
generate_quadratic_terms( (unsigned char *) xixj, x ); | |||
for (int i = 0 ; i < 64 ; i += 2) { | |||
EVAL_YMM_0(xixj[i]) | |||
EVAL_YMM_1(xixj[i]) | |||
EVAL_YMM_0(xixj[i + 1]) | |||
EVAL_YMM_1(xixj[i + 1]) | |||
REDUCE_(yy) | |||
} | |||
EVAL_YMM_0(xixj[64]) | |||
EVAL_YMM_1(xixj[64]) | |||
REDUCE_(yy) | |||
yy[0] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[0])), yy[0]); | |||
yy[1] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[1])), yy[1]); | |||
yy[2] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[2])), yy[2]); | |||
yy[3] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[3])), yy[3]); | |||
for (int i = 0; i < (N / 16); ++i) { | |||
_mm256_storeu_si256((__m256i *)(fx + i * 16), yy[i]); | |||
} | |||
} | |||
/* Evaluates the bilinear polar form of the MQ function (i.e. G) on a vector of | |||
N gf31 elements x (expected to be in reduced 5-bit representation). Expects | |||
the coefficients in F to be in signed representation (i.e. [-15, 15], packed | |||
bytewise). Outputs M gf31 elements in unique 16-bit representation as fx. */ | |||
void PQCLEAN_MQDSS64_AVX2_G(gf31 *fx, const gf31 *x, const gf31 *y, const signed char *F) { | |||
__m256i mask_2114 = _mm256_set1_epi32(2114 * 65536 + 2114); | |||
__m256i mask_reduce = _mm256_srli_epi16(_mm256_cmpeq_epi16(mask_2114, mask_2114), 11); | |||
__m256i _zero = _mm256_setzero_si256(); | |||
__m256i yy[(M / 16)]; | |||
yy[0] = _zero; | |||
yy[1] = _zero; | |||
yy[2] = _zero; | |||
yy[3] = _zero; | |||
F += N * M; | |||
__m256i xixj[65]; | |||
generate_xiyj_p_xjyi_terms( (unsigned char *) xixj, x, y ); | |||
for (int i = 0 ; i < 64 ; i += 2) { | |||
EVAL_YMM_0(xixj[i]) | |||
EVAL_YMM_1(xixj[i]) | |||
EVAL_YMM_0(xixj[i + 1]) | |||
EVAL_YMM_1(xixj[i + 1]) | |||
REDUCE_(yy) | |||
} | |||
EVAL_YMM_0(xixj[64]) | |||
EVAL_YMM_1(xixj[64]) | |||
REDUCE_(yy) | |||
yy[0] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[0])), yy[0]); | |||
yy[1] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[1])), yy[1]); | |||
yy[2] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[2])), yy[2]); | |||
yy[3] = _mm256_add_epi16(_mm256_and_si256(mask_reduce, _mm256_cmpgt_epi16(_mm256_setzero_si256(), yy[3])), yy[3]); | |||
for (int i = 0; i < (N / 16); ++i) { | |||
_mm256_storeu_si256((__m256i *)(fx + i * 16), yy[i]); | |||
} | |||
} |
@@ -0,0 +1,18 @@ | |||
#ifndef MQDSS_MQ_H | |||
#define MQDSS_MQ_H | |||
#include "gf31.h" | |||
/* Evaluates the MQ function on a vector of N gf31 elements x (expected to be | |||
in reduced 5-bit representation). Expects the coefficients in F to be in | |||
signed representation (i.e. [-15, 15], packed bytewise). | |||
Outputs M gf31 elements in unique 16-bit representation as fx. */ | |||
void PQCLEAN_MQDSS64_AVX2_MQ(gf31 *fx, const gf31 *x, const signed char *F); | |||
/* Evaluates the bilinear polar form of the MQ function (i.e. G) on a vector of | |||
N gf31 elements x (expected to be in reduced 5-bit representation). Expects | |||
the coefficients in F to be in signed representation (i.e. [-15, 15], packed | |||
bytewise). Outputs M gf31 elements in unique 16-bit representation as fx. */ | |||
void PQCLEAN_MQDSS64_AVX2_G(gf31 *fx, const gf31 *x, const gf31 *y, const signed char *F); | |||
#endif |
@@ -0,0 +1,25 @@ | |||
#ifndef MQDSS_PARAMS_H | |||
#define MQDSS_PARAMS_H | |||
#define N 64 | |||
#define M N | |||
#define F_LEN (M * (((N * (N + 1)) >> 1) + N)) /* Number of elements in F */ | |||
#define ROUNDS 277 | |||
/* Number of bytes that N, M and F_LEN elements require when packed into a byte | |||
array, 5-bit elements packed continuously. */ | |||
/* Assumes N and M to be multiples of 8 */ | |||
#define NPACKED_BYTES ((N * 5) >> 3) | |||
#define MPACKED_BYTES ((M * 5) >> 3) | |||
#define FPACKED_BYTES ((F_LEN * 5) >> 3) | |||
#define HASH_BYTES 48 | |||
#define SEED_BYTES 24 | |||
#define PK_BYTES (SEED_BYTES + MPACKED_BYTES) | |||
#define SK_BYTES SEED_BYTES | |||
// R, sigma_0, ROUNDS * (t1, r{0,1}, e1, c, rho) | |||
#define SIG_LEN (2 * HASH_BYTES + ROUNDS * (2*NPACKED_BYTES + MPACKED_BYTES + HASH_BYTES + HASH_BYTES)) | |||
#endif |
@@ -0,0 +1,389 @@ | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#include <string.h> | |||
#include "api.h" | |||
#include "fips202.h" | |||
#include "gf31.h" | |||
#include "mq.h" | |||
#include "params.h" | |||
#include "randombytes.h" | |||
/* Takes an array of len bytes and computes a hash digest. | |||
This is used as a hash function in the Fiat-Shamir transform. */ | |||
static void H(unsigned char *out, const unsigned char *in, const size_t len) { | |||
shake256(out, HASH_BYTES, in, len); | |||
} | |||
/* Takes two arrays of N packed elements and an array of M packed elements, | |||
and computes a HASH_BYTES commitment. */ | |||
static void com_0(unsigned char *c, | |||
const unsigned char *rho, | |||
const unsigned char *inn, const unsigned char *inn2, | |||
const unsigned char *inm) { | |||
unsigned char buffer[HASH_BYTES + 2 * NPACKED_BYTES + MPACKED_BYTES]; | |||
memcpy(buffer, rho, HASH_BYTES); | |||
memcpy(buffer + HASH_BYTES, inn, NPACKED_BYTES); | |||
memcpy(buffer + HASH_BYTES + NPACKED_BYTES, inn2, NPACKED_BYTES); | |||
memcpy(buffer + HASH_BYTES + 2 * NPACKED_BYTES, inm, MPACKED_BYTES); | |||
shake256(c, HASH_BYTES, buffer, HASH_BYTES + 2 * NPACKED_BYTES + MPACKED_BYTES); | |||
} | |||
/* Takes an array of N packed elements and an array of M packed elements, | |||
and computes a HASH_BYTES commitment. */ | |||
static void com_1(unsigned char *c, | |||
const unsigned char *rho, | |||
const unsigned char *inn, const unsigned char *inm) { | |||
unsigned char buffer[HASH_BYTES + NPACKED_BYTES + MPACKED_BYTES]; | |||
memcpy(buffer, rho, HASH_BYTES); | |||
memcpy(buffer + HASH_BYTES, inn, NPACKED_BYTES); | |||
memcpy(buffer + HASH_BYTES + NPACKED_BYTES, inm, MPACKED_BYTES); | |||
shake256(c, HASH_BYTES, buffer, HASH_BYTES + NPACKED_BYTES + MPACKED_BYTES); | |||
} | |||
/* | |||
* Generates an MQDSS key pair. | |||
*/ | |||
int PQCLEAN_MQDSS64_AVX2_crypto_sign_keypair(uint8_t *pk, uint8_t *sk) { | |||
signed char F[F_LEN]; | |||
unsigned char skbuf[SEED_BYTES * 2]; | |||
gf31 sk_gf31[N]; | |||
gf31 pk_gf31[M]; | |||
// Expand sk to obtain a seed for F and the secret input s. | |||
// We also expand to obtain a value for sampling r0, t0 and e0 during | |||
// signature generation, but that is not relevant here. | |||
randombytes(sk, SEED_BYTES); | |||
shake256(skbuf, SEED_BYTES * 2, sk, SEED_BYTES); | |||
memcpy(pk, skbuf, SEED_BYTES); | |||
PQCLEAN_MQDSS64_AVX2_gf31_nrand_schar(F, F_LEN, pk, SEED_BYTES); | |||
PQCLEAN_MQDSS64_AVX2_gf31_nrand(sk_gf31, N, skbuf + SEED_BYTES, SEED_BYTES); | |||
PQCLEAN_MQDSS64_AVX2_MQ(pk_gf31, sk_gf31, F); | |||
PQCLEAN_MQDSS64_AVX2_vgf31_unique(pk_gf31, pk_gf31); | |||
PQCLEAN_MQDSS64_AVX2_gf31_npack(pk + SEED_BYTES, pk_gf31, M); | |||
return 0; | |||
} | |||
/** | |||
* Returns an array containing a detached signature. | |||
*/ | |||
int PQCLEAN_MQDSS64_AVX2_crypto_sign_signature( | |||
uint8_t *sig, size_t *siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk) { | |||
signed char F[F_LEN]; | |||
unsigned char skbuf[SEED_BYTES * 4]; | |||
gf31 pk_gf31[M]; | |||
unsigned char pk[SEED_BYTES + MPACKED_BYTES]; | |||
// Concatenated for convenient hashing. | |||
unsigned char D_sigma0_h0_sigma1[HASH_BYTES * 3 + ROUNDS * (NPACKED_BYTES + MPACKED_BYTES)]; | |||
unsigned char *D = D_sigma0_h0_sigma1; | |||
unsigned char *sigma0 = D_sigma0_h0_sigma1 + HASH_BYTES; | |||
unsigned char *h0 = D_sigma0_h0_sigma1 + 2 * HASH_BYTES; | |||
unsigned char *t1packed = D_sigma0_h0_sigma1 + 3 * HASH_BYTES; | |||
unsigned char *e1packed = D_sigma0_h0_sigma1 + 3 * HASH_BYTES + ROUNDS * NPACKED_BYTES; | |||
shake256ctx shakestate; | |||
unsigned char shakeblock[SHAKE256_RATE]; | |||
unsigned char h1[((ROUNDS + 7) & ~7) >> 3]; | |||
unsigned char rnd_seed[HASH_BYTES + SEED_BYTES]; | |||
unsigned char rho[2 * ROUNDS * HASH_BYTES]; | |||
unsigned char *rho0 = rho; | |||
unsigned char *rho1 = rho + ROUNDS * HASH_BYTES; | |||
gf31 sk_gf31[N]; | |||
gf31 rnd[(2 * N + M) * ROUNDS]; // Concatenated for easy RNG. | |||
gf31 *r0 = rnd; | |||
gf31 *t0 = rnd + N * ROUNDS; | |||
gf31 *e0 = rnd + 2 * N * ROUNDS; | |||
gf31 r1[N * ROUNDS]; | |||
gf31 t1[N * ROUNDS]; | |||
gf31 e1[M * ROUNDS]; | |||
gf31 gx[M * ROUNDS]; | |||
unsigned char packbuf0[NPACKED_BYTES]; | |||
unsigned char packbuf1[NPACKED_BYTES]; | |||
unsigned char packbuf2[MPACKED_BYTES]; | |||
unsigned char c[HASH_BYTES * ROUNDS * 2]; | |||
gf31 alpha; | |||
int alpha_count = 0; | |||
int b; | |||
int i, j; | |||
shake256incctx state; | |||
shake256(skbuf, SEED_BYTES * 4, sk, SEED_BYTES); | |||
PQCLEAN_MQDSS64_AVX2_gf31_nrand_schar(F, F_LEN, skbuf, SEED_BYTES); | |||
shake256_inc_init(&state); | |||
shake256_inc_absorb(&state, sk, SEED_BYTES); | |||
shake256_inc_absorb(&state, m, mlen); | |||
shake256_inc_finalize(&state); | |||
shake256_inc_squeeze(sig, HASH_BYTES, &state); // Compute R. | |||
shake256_inc_ctx_release(&state); | |||
memcpy(pk, skbuf, SEED_BYTES); | |||
PQCLEAN_MQDSS64_AVX2_gf31_nrand(sk_gf31, N, skbuf + SEED_BYTES, SEED_BYTES); | |||
PQCLEAN_MQDSS64_AVX2_MQ(pk_gf31, sk_gf31, F); | |||
PQCLEAN_MQDSS64_AVX2_vgf31_unique(pk_gf31, pk_gf31); | |||
PQCLEAN_MQDSS64_AVX2_gf31_npack(pk + SEED_BYTES, pk_gf31, M); | |||
shake256_inc_init(&state); | |||
shake256_inc_absorb(&state, pk, PK_BYTES); | |||
shake256_inc_absorb(&state, sig, HASH_BYTES); | |||
shake256_inc_absorb(&state, m, mlen); | |||
shake256_inc_finalize(&state); | |||
shake256_inc_squeeze(D, HASH_BYTES, &state); | |||
shake256_inc_ctx_release(&state); | |||
sig += HASH_BYTES; // Compensate for prefixed R. | |||
memcpy(rnd_seed, skbuf + 2 * SEED_BYTES, SEED_BYTES); | |||
memcpy(rnd_seed + SEED_BYTES, D, HASH_BYTES); | |||
shake256(rho, 2 * ROUNDS * HASH_BYTES, rnd_seed, SEED_BYTES + HASH_BYTES); | |||
memcpy(rnd_seed, skbuf + 3 * SEED_BYTES, SEED_BYTES); | |||
memcpy(rnd_seed + SEED_BYTES, D, HASH_BYTES); | |||
PQCLEAN_MQDSS64_AVX2_gf31_nrand(rnd, (2 * N + M) * ROUNDS, rnd_seed, SEED_BYTES + HASH_BYTES); | |||
for (i = 0; i < ROUNDS; i++) { | |||
for (j = 0; j < N; j++) { | |||
r1[j + i * N] = (gf31)(31 + sk_gf31[j] - r0[j + i * N]); | |||
} | |||
PQCLEAN_MQDSS64_AVX2_G(gx + i * M, t0 + i * N, r1 + i * N, F); | |||
} | |||
for (i = 0; i < ROUNDS * M; i++) { | |||
gx[i] = (gf31)(gx[i] + e0[i]); | |||
} | |||
for (i = 0; i < ROUNDS; i++) { | |||
PQCLEAN_MQDSS64_AVX2_gf31_npack(packbuf0, r0 + i * N, N); | |||
PQCLEAN_MQDSS64_AVX2_gf31_npack(packbuf1, t0 + i * N, N); | |||
PQCLEAN_MQDSS64_AVX2_gf31_npack(packbuf2, e0 + i * M, M); | |||
com_0(c + HASH_BYTES * (2 * i + 0), rho0 + i * HASH_BYTES, packbuf0, packbuf1, packbuf2); | |||
PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(r1 + i * N, r1 + i * N); | |||
PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(gx + i * M, gx + i * M); | |||
PQCLEAN_MQDSS64_AVX2_gf31_npack(packbuf0, r1 + i * N, N); | |||
PQCLEAN_MQDSS64_AVX2_gf31_npack(packbuf1, gx + i * M, M); | |||
com_1(c + HASH_BYTES * (2 * i + 1), rho1 + i * HASH_BYTES, packbuf0, packbuf1); | |||
} | |||
H(sigma0, c, HASH_BYTES * ROUNDS * 2); // Compute sigma_0. | |||
shake256_absorb(&shakestate, D_sigma0_h0_sigma1, 2 * HASH_BYTES); | |||
shake256_squeezeblocks(shakeblock, 1, &shakestate); | |||
memcpy(h0, shakeblock, HASH_BYTES); | |||
memcpy(sig, sigma0, HASH_BYTES); | |||
sig += HASH_BYTES; // Compensate for sigma_0. | |||
for (i = 0; i < ROUNDS; i++) { | |||
do { | |||
alpha = shakeblock[alpha_count] & 31; | |||
alpha_count++; | |||
if (alpha_count == SHAKE256_RATE) { | |||
alpha_count = 0; | |||
shake256_squeezeblocks(shakeblock, 1, &shakestate); | |||
} | |||
} while (alpha == 31); | |||
for (j = 0; j < N; j++) { | |||
t1[i * N + j] = (gf31)(alpha * r0[j + i * N] - t0[j + i * N] + 31); | |||
} | |||
PQCLEAN_MQDSS64_AVX2_MQ(e1 + i * M, r0 + i * N, F); | |||
for (j = 0; j < N; j++) { | |||
e1[i * N + j] = (gf31)(alpha * e1[j + i * M] - e0[j + i * M] + 31); | |||
} | |||
PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(t1 + i * N, t1 + i * N); | |||
PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(e1 + i * N, e1 + i * N); | |||
} | |||
shake256_ctx_release(&shakestate); | |||
PQCLEAN_MQDSS64_AVX2_gf31_npack(t1packed, t1, N * ROUNDS); | |||
PQCLEAN_MQDSS64_AVX2_gf31_npack(e1packed, e1, M * ROUNDS); | |||
memcpy(sig, t1packed, NPACKED_BYTES * ROUNDS); | |||
sig += NPACKED_BYTES * ROUNDS; | |||
memcpy(sig, e1packed, MPACKED_BYTES * ROUNDS); | |||
sig += MPACKED_BYTES * ROUNDS; | |||
shake256(h1, ((ROUNDS + 7) & ~7) >> 3, D_sigma0_h0_sigma1, 3 * HASH_BYTES + ROUNDS * (NPACKED_BYTES + MPACKED_BYTES)); | |||
for (i = 0; i < ROUNDS; i++) { | |||
b = (h1[(i >> 3)] >> (i & 7)) & 1; | |||
if (b == 0) { | |||
PQCLEAN_MQDSS64_AVX2_gf31_npack(sig, r0 + i * N, N); | |||
} else if (b == 1) { | |||
PQCLEAN_MQDSS64_AVX2_gf31_npack(sig, r1 + i * N, N); | |||
} | |||
memcpy(sig + NPACKED_BYTES, c + HASH_BYTES * (2 * i + (1 - b)), HASH_BYTES); | |||
memcpy(sig + NPACKED_BYTES + HASH_BYTES, rho + (i + b * ROUNDS) * HASH_BYTES, HASH_BYTES); | |||
sig += NPACKED_BYTES + 2 * HASH_BYTES; | |||
} | |||
*siglen = SIG_LEN; | |||
return 0; | |||
} | |||
/** | |||
* Verifies a detached signature and message under a given public key. | |||
*/ | |||
int PQCLEAN_MQDSS64_AVX2_crypto_sign_verify( | |||
const uint8_t *sig, size_t siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk) { | |||
gf31 r[N]; | |||
gf31 t[N]; | |||
gf31 e[M]; | |||
signed char F[F_LEN]; | |||
gf31 pk_gf31[M]; | |||
// Concatenated for convenient hashing. | |||
unsigned char D_sigma0_h0_sigma1[HASH_BYTES * 3 + ROUNDS * (NPACKED_BYTES + MPACKED_BYTES)]; | |||
unsigned char *D = D_sigma0_h0_sigma1; | |||
unsigned char *sigma0 = D_sigma0_h0_sigma1 + HASH_BYTES; | |||
unsigned char *h0 = D_sigma0_h0_sigma1 + 2 * HASH_BYTES; | |||
unsigned char *t1packed = D_sigma0_h0_sigma1 + 3 * HASH_BYTES; | |||
unsigned char *e1packed = D_sigma0_h0_sigma1 + 3 * HASH_BYTES + ROUNDS * NPACKED_BYTES; | |||
unsigned char h1[((ROUNDS + 7) & ~7) >> 3]; | |||
unsigned char c[HASH_BYTES * ROUNDS * 2]; | |||
memset(c, 0, HASH_BYTES * 2); | |||
gf31 x[N]; | |||
gf31 y[M]; | |||
gf31 z[M]; | |||
unsigned char packbuf0[NPACKED_BYTES]; | |||
unsigned char packbuf1[MPACKED_BYTES]; | |||
shake256ctx shakestate; | |||
unsigned char shakeblock[SHAKE256_RATE]; | |||
int i, j; | |||
gf31 alpha; | |||
int alpha_count = 0; | |||
int b; | |||
shake256incctx state; | |||
if (siglen != SIG_LEN) { | |||
return -1; | |||
} | |||
shake256_inc_init(&state); | |||
shake256_inc_absorb(&state, pk, PK_BYTES); | |||
shake256_inc_absorb(&state, sig, HASH_BYTES); | |||
shake256_inc_absorb(&state, m, mlen); | |||
shake256_inc_finalize(&state); | |||
shake256_inc_squeeze(D, HASH_BYTES, &state); | |||
shake256_inc_ctx_release(&state); | |||
sig += HASH_BYTES; | |||
PQCLEAN_MQDSS64_AVX2_gf31_nrand_schar(F, F_LEN, pk, SEED_BYTES); | |||
pk += SEED_BYTES; | |||
PQCLEAN_MQDSS64_AVX2_gf31_nunpack(pk_gf31, pk, M); | |||
memcpy(sigma0, sig, HASH_BYTES); | |||
shake256_absorb(&shakestate, D_sigma0_h0_sigma1, 2 * HASH_BYTES); | |||
shake256_squeezeblocks(shakeblock, 1, &shakestate); | |||
memcpy(h0, shakeblock, HASH_BYTES); | |||
sig += HASH_BYTES; | |||
memcpy(t1packed, sig, ROUNDS * NPACKED_BYTES); | |||
sig += ROUNDS * NPACKED_BYTES; | |||
memcpy(e1packed, sig, ROUNDS * MPACKED_BYTES); | |||
sig += ROUNDS * MPACKED_BYTES; | |||
shake256(h1, ((ROUNDS + 7) & ~7) >> 3, D_sigma0_h0_sigma1, 3 * HASH_BYTES + ROUNDS * (NPACKED_BYTES + MPACKED_BYTES)); | |||
for (i = 0; i < ROUNDS; i++) { | |||
do { | |||
alpha = shakeblock[alpha_count] & 31; | |||
alpha_count++; | |||
if (alpha_count == SHAKE256_RATE) { | |||
alpha_count = 0; | |||
shake256_squeezeblocks(shakeblock, 1, &shakestate); | |||
} | |||
} while (alpha == 31); | |||
b = (h1[(i >> 3)] >> (i & 7)) & 1; | |||
PQCLEAN_MQDSS64_AVX2_gf31_nunpack(r, sig, N); | |||
PQCLEAN_MQDSS64_AVX2_gf31_nunpack(t, t1packed + NPACKED_BYTES * i, N); | |||
PQCLEAN_MQDSS64_AVX2_gf31_nunpack(e, e1packed + MPACKED_BYTES * i, M); | |||
if (b == 0) { | |||
PQCLEAN_MQDSS64_AVX2_MQ(y, r, F); | |||
for (j = 0; j < N; j++) { | |||
x[j] = (gf31)(alpha * r[j] - t[j] + 31); | |||
} | |||
for (j = 0; j < N; j++) { | |||
y[j] = (gf31)(alpha * y[j] - e[j] + 31); | |||
} | |||
PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(x, x); | |||
PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(y, y); | |||
PQCLEAN_MQDSS64_AVX2_gf31_npack(packbuf0, x, N); | |||
PQCLEAN_MQDSS64_AVX2_gf31_npack(packbuf1, y, M); | |||
com_0(c + HASH_BYTES * (2 * i + 0), sig + HASH_BYTES + NPACKED_BYTES, sig, packbuf0, packbuf1); | |||
} else { | |||
PQCLEAN_MQDSS64_AVX2_MQ(y, r, F); | |||
PQCLEAN_MQDSS64_AVX2_G(z, t, r, F); | |||
for (j = 0; j < N; j++) { | |||
y[j] = (gf31)(alpha * (31 + pk_gf31[j] - y[j]) - z[j] - e[j] + 62); | |||
} | |||
PQCLEAN_MQDSS64_AVX2_vgf31_shorten_unique(y, y); | |||
PQCLEAN_MQDSS64_AVX2_gf31_npack(packbuf0, y, M); | |||
com_1(c + HASH_BYTES * (2 * i + 1), sig + HASH_BYTES + NPACKED_BYTES, sig, packbuf0); | |||
} | |||
memcpy(c + HASH_BYTES * (2 * i + (1 - b)), sig + NPACKED_BYTES, HASH_BYTES); | |||
sig += NPACKED_BYTES + 2 * HASH_BYTES; | |||
} | |||
shake256_ctx_release(&shakestate); | |||
H(c, c, HASH_BYTES * ROUNDS * 2); | |||
if (memcmp(c, sigma0, HASH_BYTES) != 0) { | |||
return -1; | |||
} | |||
return 0; | |||
} | |||
/** | |||
* Returns an array containing the signature followed by the message. | |||
*/ | |||
int PQCLEAN_MQDSS64_AVX2_crypto_sign( | |||
uint8_t *sm, size_t *smlen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk) { | |||
size_t siglen; | |||
PQCLEAN_MQDSS64_AVX2_crypto_sign_signature( | |||
sm, &siglen, m, mlen, sk); | |||
memmove(sm + SIG_LEN, m, mlen); | |||
*smlen = siglen + mlen; | |||
return 0; | |||
} | |||
/** | |||
* Verifies a given signature-message pair under a given public key. | |||
*/ | |||
int PQCLEAN_MQDSS64_AVX2_crypto_sign_open( | |||
uint8_t *m, size_t *mlen, | |||
const uint8_t *sm, size_t smlen, const uint8_t *pk) { | |||
/* The API caller does not necessarily know what size a signature should be | |||
but MQDSS signatures are always exactly SIG_LEN. */ | |||
if (smlen < SIG_LEN) { | |||
memset(m, 0, smlen); | |||
*mlen = 0; | |||
return -1; | |||
} | |||
*mlen = smlen - SIG_LEN; | |||
if (PQCLEAN_MQDSS64_AVX2_crypto_sign_verify( | |||
sm, SIG_LEN, sm + SIG_LEN, *mlen, pk)) { | |||
memset(m, 0, smlen); | |||
*mlen = 0; | |||
return -1; | |||
} | |||
/* If verification was successful, move the message to the right place. */ | |||
memmove(m, sm + SIG_LEN, *mlen); | |||
return 0; | |||
} |
@@ -1,4 +1,3 @@ | |||
#include <assert.h> | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#include <string.h> | |||
@@ -0,0 +1,20 @@ | |||
consistency_checks: | |||
- source: | |||
scheme: mqdss-48 | |||
implementation: avx2 | |||
files: | |||
- api.h | |||
- mq.h | |||
- LICENSE | |||
- mq.h | |||
- sign.c | |||
- params.h | |||
- source: | |||
scheme: mqdss-64 | |||
implementation: clean | |||
files: | |||
- gf31.c | |||
- gf31.h | |||
- LICENSE | |||
- mq.c | |||
- mq.h |
@@ -9,3 +9,14 @@ consistency_checks: | |||
- mq.c | |||
- mq.h | |||
- sign.c | |||
- source: | |||
scheme: mqdss-64 | |||
implementation: avx2 | |||
files: | |||
- api.h | |||
- mq.h | |||
- LICENSE | |||
- mq.h | |||
- sign.c | |||
- params.h | |||
@@ -40,6 +40,7 @@ def test_testvectors(implementation, impl_path, test_dir, init, destr): | |||
implementation.name, | |||
'.exe' if os.name == 'nt' else '' | |||
))], | |||
print_output=False, | |||
).replace('\r', '') | |||
assert(implementation.scheme.metadata()['testvectors-sha256'].lower() | |||
== hashlib.sha256(out.encode('utf-8')).hexdigest().lower()) | |||