From 4a301f189684f2b85f31b74b3369ea4d1c485ac0 Mon Sep 17 00:00:00 2001 From: "John M. Schanck" Date: Wed, 9 Sep 2020 09:44:31 -0400 Subject: [PATCH] Simplify hqc-rmrs*/clean/reed_muller.c and fix potentially non-constant time behavior. --- crypto_kem/hqc-rmrs-128/clean/reed_muller.c | 120 +++++++++----------- crypto_kem/hqc-rmrs-192/clean/reed_muller.c | 120 +++++++++----------- crypto_kem/hqc-rmrs-256/clean/reed_muller.c | 120 +++++++++----------- 3 files changed, 156 insertions(+), 204 deletions(-) diff --git a/crypto_kem/hqc-rmrs-128/clean/reed_muller.c b/crypto_kem/hqc-rmrs-128/clean/reed_muller.c index eca7d8f9..36e80ff6 100644 --- a/crypto_kem/hqc-rmrs-128/clean/reed_muller.c +++ b/crypto_kem/hqc-rmrs-128/clean/reed_muller.c @@ -7,33 +7,19 @@ * Constant time implementation of Reed-Muller code RM(1,7) */ -// setting this will help the compiler with auto vectorization -#undef ALIGNVECTORS - // number of repeated code words #define MULTIPLICITY CEIL_DIVIDE(PARAM_N2, 128) -// codeword is 128 bits, seen multiple ways -typedef union { - uint8_t u8[16]; - uint32_t u32[4]; -} codeword -; - -// Expanded codeword has a short for every bit, for internal calculations -typedef int16_t expandedCodeword[128] -; - // copy bit 0 into all bits of a 32 bit value -#define BIT0MASK(x) (int32_t)(-((x) & 1)) +#define BIT0MASK(x) (-((x) & 1)) -static void encode(codeword *word, int32_t message); -static void hadamard(expandedCodeword *src, expandedCodeword *dst); -static void expand_and_sum(expandedCodeword *dest, codeword src[]); -static int32_t find_peaks(expandedCodeword *transform); +static void encode(uint32_t *word, const uint8_t message); +static void hadamard(uint16_t src[128], uint16_t dst[128]); +static void expand_and_sum(uint16_t dest[128], const uint32_t src[4 * MULTIPLICITY]); +static uint8_t find_peaks(const uint16_t transform[128]); @@ -54,10 +40,10 @@ static int32_t find_peaks(expandedCodeword *transform); * @param[out] word An RM(1,7) codeword * @param[in] message A message */ -static void encode(codeword *word, int32_t message) { +static void encode(uint32_t *word, uint8_t message) { // the four parts of the word are identical // except for encoding bits 5 and 6 - int32_t first_word; + uint32_t first_word; // bit 7 flips all the bits, do that first to save work first_word = BIT0MASK(message >> 7); // bits 0, 1, 2, 3, 4 are the same for all four longs @@ -68,14 +54,14 @@ static void encode(codeword *word, int32_t message) { first_word ^= BIT0MASK(message >> 3) & 0xff00ff00; first_word ^= BIT0MASK(message >> 4) & 0xffff0000; // we can store this in the first quarter - word->u32[0] = first_word; + word[0] = first_word; // bit 5 flips entries 1 and 3; bit 6 flips 2 and 3 first_word ^= BIT0MASK(message >> 5); - word->u32[1] = first_word; + word[1] = first_word; first_word ^= BIT0MASK(message >> 6); - word->u32[3] = first_word; + word[3] = first_word; first_word ^= BIT0MASK(message >> 5); - word->u32[2] = first_word; + word[2] = first_word; } @@ -111,19 +97,20 @@ static void encode(codeword *word, int32_t message) { * @param[out] src Structure that contain the expanded codeword * @param[out] dst Structure that contain the expanded codeword */ -static void hadamard(expandedCodeword *src, expandedCodeword *dst) { +static void hadamard(uint16_t src[128], uint16_t dst[128]) { // the passes move data: // src -> dst -> src -> dst -> src -> dst -> src -> dst // using p1 and p2 alternately - expandedCodeword *p1 = src; - expandedCodeword *p2 = dst; - for (int32_t pass = 0 ; pass < 7 ; pass++) { - for (int32_t i = 0 ; i < 64 ; i++) { - (*p2)[i] = (*p1)[2 * i] + (*p1)[2 * i + 1]; - (*p2)[i + 64] = (*p1)[2 * i] - (*p1)[2 * i + 1]; + uint16_t *p1 = src; + uint16_t *p2 = dst; + uint16_t *p3; + for (uint32_t pass = 0 ; pass < 7 ; pass++) { + for (uint32_t i = 0 ; i < 64 ; i++) { + p2[i] = p1[2 * i] + p1[2 * i + 1]; + p2[i + 64] = p1[2 * i] - p1[2 * i + 1]; } // swap p1, p2 for next round - expandedCodeword *p3 = p1; + p3 = p1; p1 = p2; p2 = p3; } @@ -144,18 +131,18 @@ static void hadamard(expandedCodeword *src, expandedCodeword *dst) { * @param[out] dest Structure that contain the expanded codeword * @param[in] src Structure that contain the codeword */ -static void expand_and_sum(expandedCodeword *dest, codeword src[]) { +static void expand_and_sum(uint16_t dest[128], const uint32_t src[4 * MULTIPLICITY]) { // start with the first copy - for (int32_t part = 0 ; part < 4 ; part++) { - for (int32_t bit = 0 ; bit < 32 ; bit++) { - (*dest)[part * 32 + bit] = src[0].u32[part] >> bit & 1; + for (uint32_t part = 0 ; part < 4 ; part++) { + for (uint32_t bit = 0 ; bit < 32 ; bit++) { + dest[part * 32 + bit] = (uint16_t) ((src[part] >> bit) & 1); } } // sum the rest of the copies - for (int32_t copy = 1 ; copy < MULTIPLICITY ; copy++) { - for (int32_t part = 0 ; part < 4 ; part++) { - for (int32_t bit = 0 ; bit < 32 ; bit++) { - (*dest)[part * 32 + bit] += src[copy].u32[part] >> bit & 1; + for (uint32_t copy = 1 ; copy < MULTIPLICITY ; copy++) { + for (uint32_t part = 0 ; part < 4 ; part++) { + for (uint32_t bit = 0 ; bit < 32 ; bit++) { + dest[part * 32 + bit] += (uint16_t) ((src[4 * copy + part] >> bit) & 1); } } } @@ -172,27 +159,26 @@ static void expand_and_sum(expandedCodeword *dest, codeword src[]) { * in the lowest 7 bits it taken * @param[in] transform Structure that contain the expanded codeword */ -static int32_t find_peaks(expandedCodeword *transform) { - int32_t peak_abs_value = 0; - int32_t peak_value = 0; - int32_t peak_pos = 0; - for (int32_t i = 0 ; i < 128 ; i++) { - // get absolute value - int32_t t = (*transform)[i]; - int32_t pos_mask = -(t > 0); - int32_t absolute = (pos_mask & t) | (~pos_mask & -t); - // all compilers nowadays compile with a conditional move - peak_value = absolute > peak_abs_value ? t : peak_value; - peak_pos = absolute > peak_abs_value ? i : peak_pos; - peak_abs_value = absolute > peak_abs_value ? absolute : peak_abs_value; +static uint8_t find_peaks(const uint16_t transform[128]) { + uint16_t peak_abs = 0; + uint16_t peak = 0; + uint16_t pos = 0; + uint16_t t, abs, mask; + for (uint16_t i = 0 ; i < 128 ; i++) { + t = transform[i]; + abs = t ^ ((-(t >> 15)) & (t ^ -t)); // t = abs(t) + mask = -(((uint16_t)(peak_abs - abs)) >> 15); + peak ^= mask & (peak ^ t); + pos ^= mask & (pos ^ i); + peak_abs ^= mask & (peak_abs ^ abs); } - // set bit 7 - peak_pos |= 128 * (peak_value > 0); - return peak_pos; + pos |= 128 & ((peak >> 15) - 1); + return (uint8_t) pos; } + /** * @brief Encodes the received word * @@ -204,15 +190,13 @@ static int32_t find_peaks(expandedCodeword *transform) { */ void PQCLEAN_HQCRMRS128_CLEAN_reed_muller_encode(uint64_t *cdw, const uint64_t *msg) { uint8_t *message_array = (uint8_t *) msg; - codeword *codeArray = (codeword *) cdw; + uint32_t *codeArray = (uint32_t *) cdw; for (size_t i = 0 ; i < VEC_N1_SIZE_BYTES ; i++) { - // fill entries i * MULTIPLICITY to (i+1) * MULTIPLICITY - int32_t pos = i * MULTIPLICITY; // encode first word - encode(&codeArray[pos], message_array[i]); + encode(&codeArray[4 * i * MULTIPLICITY], message_array[i]); // copy to other identical codewords for (size_t copy = 1 ; copy < MULTIPLICITY ; copy++) { - memcpy(&codeArray[pos + copy], &codeArray[pos], sizeof(codeword)); + memcpy(&codeArray[4 * i * MULTIPLICITY + 4 * copy], &codeArray[4 * i * MULTIPLICITY], 4 * sizeof(uint32_t)); } } } @@ -230,17 +214,17 @@ void PQCLEAN_HQCRMRS128_CLEAN_reed_muller_encode(uint64_t *cdw, const uint64_t * */ void PQCLEAN_HQCRMRS128_CLEAN_reed_muller_decode(uint64_t *msg, const uint64_t *cdw) { uint8_t *message_array = (uint8_t *) msg; - codeword *codeArray = (codeword *) cdw; - expandedCodeword expanded; + uint32_t *codeArray = (uint32_t *) cdw; + uint16_t expanded[128]; + uint16_t transform[128]; for (size_t i = 0 ; i < VEC_N1_SIZE_BYTES ; i++) { // collect the codewords - expand_and_sum(&expanded, &codeArray[i * MULTIPLICITY]); + expand_and_sum(expanded, &codeArray[4 * i * MULTIPLICITY]); // apply hadamard transform - expandedCodeword transform; - hadamard(&expanded, &transform); + hadamard(expanded, transform); // fix the first entry to get the half Hadamard transform transform[0] -= 64 * MULTIPLICITY; // finish the decoding - message_array[i] = find_peaks(&transform); + message_array[i] = find_peaks(transform); } } diff --git a/crypto_kem/hqc-rmrs-192/clean/reed_muller.c b/crypto_kem/hqc-rmrs-192/clean/reed_muller.c index 014be214..5beb05e3 100644 --- a/crypto_kem/hqc-rmrs-192/clean/reed_muller.c +++ b/crypto_kem/hqc-rmrs-192/clean/reed_muller.c @@ -7,33 +7,19 @@ * Constant time implementation of Reed-Muller code RM(1,7) */ -// setting this will help the compiler with auto vectorization -#undef ALIGNVECTORS - // number of repeated code words #define MULTIPLICITY CEIL_DIVIDE(PARAM_N2, 128) -// codeword is 128 bits, seen multiple ways -typedef union { - uint8_t u8[16]; - uint32_t u32[4]; -} codeword -; - -// Expanded codeword has a short for every bit, for internal calculations -typedef int16_t expandedCodeword[128] -; - // copy bit 0 into all bits of a 32 bit value -#define BIT0MASK(x) (int32_t)(-((x) & 1)) +#define BIT0MASK(x) (-((x) & 1)) -static void encode(codeword *word, int32_t message); -static void hadamard(expandedCodeword *src, expandedCodeword *dst); -static void expand_and_sum(expandedCodeword *dest, codeword src[]); -static int32_t find_peaks(expandedCodeword *transform); +static void encode(uint32_t *word, const uint8_t message); +static void hadamard(uint16_t src[128], uint16_t dst[128]); +static void expand_and_sum(uint16_t dest[128], const uint32_t src[4 * MULTIPLICITY]); +static uint8_t find_peaks(const uint16_t transform[128]); @@ -54,10 +40,10 @@ static int32_t find_peaks(expandedCodeword *transform); * @param[out] word An RM(1,7) codeword * @param[in] message A message */ -static void encode(codeword *word, int32_t message) { +static void encode(uint32_t *word, uint8_t message) { // the four parts of the word are identical // except for encoding bits 5 and 6 - int32_t first_word; + uint32_t first_word; // bit 7 flips all the bits, do that first to save work first_word = BIT0MASK(message >> 7); // bits 0, 1, 2, 3, 4 are the same for all four longs @@ -68,14 +54,14 @@ static void encode(codeword *word, int32_t message) { first_word ^= BIT0MASK(message >> 3) & 0xff00ff00; first_word ^= BIT0MASK(message >> 4) & 0xffff0000; // we can store this in the first quarter - word->u32[0] = first_word; + word[0] = first_word; // bit 5 flips entries 1 and 3; bit 6 flips 2 and 3 first_word ^= BIT0MASK(message >> 5); - word->u32[1] = first_word; + word[1] = first_word; first_word ^= BIT0MASK(message >> 6); - word->u32[3] = first_word; + word[3] = first_word; first_word ^= BIT0MASK(message >> 5); - word->u32[2] = first_word; + word[2] = first_word; } @@ -111,19 +97,20 @@ static void encode(codeword *word, int32_t message) { * @param[out] src Structure that contain the expanded codeword * @param[out] dst Structure that contain the expanded codeword */ -static void hadamard(expandedCodeword *src, expandedCodeword *dst) { +static void hadamard(uint16_t src[128], uint16_t dst[128]) { // the passes move data: // src -> dst -> src -> dst -> src -> dst -> src -> dst // using p1 and p2 alternately - expandedCodeword *p1 = src; - expandedCodeword *p2 = dst; - for (int32_t pass = 0 ; pass < 7 ; pass++) { - for (int32_t i = 0 ; i < 64 ; i++) { - (*p2)[i] = (*p1)[2 * i] + (*p1)[2 * i + 1]; - (*p2)[i + 64] = (*p1)[2 * i] - (*p1)[2 * i + 1]; + uint16_t *p1 = src; + uint16_t *p2 = dst; + uint16_t *p3; + for (uint32_t pass = 0 ; pass < 7 ; pass++) { + for (uint32_t i = 0 ; i < 64 ; i++) { + p2[i] = p1[2 * i] + p1[2 * i + 1]; + p2[i + 64] = p1[2 * i] - p1[2 * i + 1]; } // swap p1, p2 for next round - expandedCodeword *p3 = p1; + p3 = p1; p1 = p2; p2 = p3; } @@ -144,18 +131,18 @@ static void hadamard(expandedCodeword *src, expandedCodeword *dst) { * @param[out] dest Structure that contain the expanded codeword * @param[in] src Structure that contain the codeword */ -static void expand_and_sum(expandedCodeword *dest, codeword src[]) { +static void expand_and_sum(uint16_t dest[128], const uint32_t src[4 * MULTIPLICITY]) { // start with the first copy - for (int32_t part = 0 ; part < 4 ; part++) { - for (int32_t bit = 0 ; bit < 32 ; bit++) { - (*dest)[part * 32 + bit] = src[0].u32[part] >> bit & 1; + for (uint32_t part = 0 ; part < 4 ; part++) { + for (uint32_t bit = 0 ; bit < 32 ; bit++) { + dest[part * 32 + bit] = (uint16_t) ((src[part] >> bit) & 1); } } // sum the rest of the copies - for (int32_t copy = 1 ; copy < MULTIPLICITY ; copy++) { - for (int32_t part = 0 ; part < 4 ; part++) { - for (int32_t bit = 0 ; bit < 32 ; bit++) { - (*dest)[part * 32 + bit] += src[copy].u32[part] >> bit & 1; + for (uint32_t copy = 1 ; copy < MULTIPLICITY ; copy++) { + for (uint32_t part = 0 ; part < 4 ; part++) { + for (uint32_t bit = 0 ; bit < 32 ; bit++) { + dest[part * 32 + bit] += (uint16_t) ((src[4 * copy + part] >> bit) & 1); } } } @@ -172,27 +159,26 @@ static void expand_and_sum(expandedCodeword *dest, codeword src[]) { * in the lowest 7 bits it taken * @param[in] transform Structure that contain the expanded codeword */ -static int32_t find_peaks(expandedCodeword *transform) { - int32_t peak_abs_value = 0; - int32_t peak_value = 0; - int32_t peak_pos = 0; - for (int32_t i = 0 ; i < 128 ; i++) { - // get absolute value - int32_t t = (*transform)[i]; - int32_t pos_mask = -(t > 0); - int32_t absolute = (pos_mask & t) | (~pos_mask & -t); - // all compilers nowadays compile with a conditional move - peak_value = absolute > peak_abs_value ? t : peak_value; - peak_pos = absolute > peak_abs_value ? i : peak_pos; - peak_abs_value = absolute > peak_abs_value ? absolute : peak_abs_value; +static uint8_t find_peaks(const uint16_t transform[128]) { + uint16_t peak_abs = 0; + uint16_t peak = 0; + uint16_t pos = 0; + uint16_t t, abs, mask; + for (uint16_t i = 0 ; i < 128 ; i++) { + t = transform[i]; + abs = t ^ ((-(t >> 15)) & (t ^ -t)); // t = abs(t) + mask = -(((uint16_t)(peak_abs - abs)) >> 15); + peak ^= mask & (peak ^ t); + pos ^= mask & (pos ^ i); + peak_abs ^= mask & (peak_abs ^ abs); } - // set bit 7 - peak_pos |= 128 * (peak_value > 0); - return peak_pos; + pos |= 128 & ((peak >> 15) - 1); + return (uint8_t) pos; } + /** * @brief Encodes the received word * @@ -204,15 +190,13 @@ static int32_t find_peaks(expandedCodeword *transform) { */ void PQCLEAN_HQCRMRS192_CLEAN_reed_muller_encode(uint64_t *cdw, const uint64_t *msg) { uint8_t *message_array = (uint8_t *) msg; - codeword *codeArray = (codeword *) cdw; + uint32_t *codeArray = (uint32_t *) cdw; for (size_t i = 0 ; i < VEC_N1_SIZE_BYTES ; i++) { - // fill entries i * MULTIPLICITY to (i+1) * MULTIPLICITY - int32_t pos = i * MULTIPLICITY; // encode first word - encode(&codeArray[pos], message_array[i]); + encode(&codeArray[4 * i * MULTIPLICITY], message_array[i]); // copy to other identical codewords for (size_t copy = 1 ; copy < MULTIPLICITY ; copy++) { - memcpy(&codeArray[pos + copy], &codeArray[pos], sizeof(codeword)); + memcpy(&codeArray[4 * i * MULTIPLICITY + 4 * copy], &codeArray[4 * i * MULTIPLICITY], 4 * sizeof(uint32_t)); } } } @@ -230,17 +214,17 @@ void PQCLEAN_HQCRMRS192_CLEAN_reed_muller_encode(uint64_t *cdw, const uint64_t * */ void PQCLEAN_HQCRMRS192_CLEAN_reed_muller_decode(uint64_t *msg, const uint64_t *cdw) { uint8_t *message_array = (uint8_t *) msg; - codeword *codeArray = (codeword *) cdw; - expandedCodeword expanded; + uint32_t *codeArray = (uint32_t *) cdw; + uint16_t expanded[128]; + uint16_t transform[128]; for (size_t i = 0 ; i < VEC_N1_SIZE_BYTES ; i++) { // collect the codewords - expand_and_sum(&expanded, &codeArray[i * MULTIPLICITY]); + expand_and_sum(expanded, &codeArray[4 * i * MULTIPLICITY]); // apply hadamard transform - expandedCodeword transform; - hadamard(&expanded, &transform); + hadamard(expanded, transform); // fix the first entry to get the half Hadamard transform transform[0] -= 64 * MULTIPLICITY; // finish the decoding - message_array[i] = find_peaks(&transform); + message_array[i] = find_peaks(transform); } } diff --git a/crypto_kem/hqc-rmrs-256/clean/reed_muller.c b/crypto_kem/hqc-rmrs-256/clean/reed_muller.c index d8cfde2e..ab3b66fa 100644 --- a/crypto_kem/hqc-rmrs-256/clean/reed_muller.c +++ b/crypto_kem/hqc-rmrs-256/clean/reed_muller.c @@ -7,33 +7,19 @@ * Constant time implementation of Reed-Muller code RM(1,7) */ -// setting this will help the compiler with auto vectorization -#undef ALIGNVECTORS - // number of repeated code words #define MULTIPLICITY CEIL_DIVIDE(PARAM_N2, 128) -// codeword is 128 bits, seen multiple ways -typedef union { - uint8_t u8[16]; - uint32_t u32[4]; -} codeword -; - -// Expanded codeword has a short for every bit, for internal calculations -typedef int16_t expandedCodeword[128] -; - // copy bit 0 into all bits of a 32 bit value -#define BIT0MASK(x) (int32_t)(-((x) & 1)) +#define BIT0MASK(x) (-((x) & 1)) -static void encode(codeword *word, int32_t message); -static void hadamard(expandedCodeword *src, expandedCodeword *dst); -static void expand_and_sum(expandedCodeword *dest, codeword src[]); -static int32_t find_peaks(expandedCodeword *transform); +static void encode(uint32_t *word, const uint8_t message); +static void hadamard(uint16_t src[128], uint16_t dst[128]); +static void expand_and_sum(uint16_t dest[128], const uint32_t src[4 * MULTIPLICITY]); +static uint8_t find_peaks(const uint16_t transform[128]); @@ -54,10 +40,10 @@ static int32_t find_peaks(expandedCodeword *transform); * @param[out] word An RM(1,7) codeword * @param[in] message A message */ -static void encode(codeword *word, int32_t message) { +static void encode(uint32_t *word, uint8_t message) { // the four parts of the word are identical // except for encoding bits 5 and 6 - int32_t first_word; + uint32_t first_word; // bit 7 flips all the bits, do that first to save work first_word = BIT0MASK(message >> 7); // bits 0, 1, 2, 3, 4 are the same for all four longs @@ -68,14 +54,14 @@ static void encode(codeword *word, int32_t message) { first_word ^= BIT0MASK(message >> 3) & 0xff00ff00; first_word ^= BIT0MASK(message >> 4) & 0xffff0000; // we can store this in the first quarter - word->u32[0] = first_word; + word[0] = first_word; // bit 5 flips entries 1 and 3; bit 6 flips 2 and 3 first_word ^= BIT0MASK(message >> 5); - word->u32[1] = first_word; + word[1] = first_word; first_word ^= BIT0MASK(message >> 6); - word->u32[3] = first_word; + word[3] = first_word; first_word ^= BIT0MASK(message >> 5); - word->u32[2] = first_word; + word[2] = first_word; } @@ -111,19 +97,20 @@ static void encode(codeword *word, int32_t message) { * @param[out] src Structure that contain the expanded codeword * @param[out] dst Structure that contain the expanded codeword */ -static void hadamard(expandedCodeword *src, expandedCodeword *dst) { +static void hadamard(uint16_t src[128], uint16_t dst[128]) { // the passes move data: // src -> dst -> src -> dst -> src -> dst -> src -> dst // using p1 and p2 alternately - expandedCodeword *p1 = src; - expandedCodeword *p2 = dst; - for (int32_t pass = 0 ; pass < 7 ; pass++) { - for (int32_t i = 0 ; i < 64 ; i++) { - (*p2)[i] = (*p1)[2 * i] + (*p1)[2 * i + 1]; - (*p2)[i + 64] = (*p1)[2 * i] - (*p1)[2 * i + 1]; + uint16_t *p1 = src; + uint16_t *p2 = dst; + uint16_t *p3; + for (uint32_t pass = 0 ; pass < 7 ; pass++) { + for (uint32_t i = 0 ; i < 64 ; i++) { + p2[i] = p1[2 * i] + p1[2 * i + 1]; + p2[i + 64] = p1[2 * i] - p1[2 * i + 1]; } // swap p1, p2 for next round - expandedCodeword *p3 = p1; + p3 = p1; p1 = p2; p2 = p3; } @@ -144,18 +131,18 @@ static void hadamard(expandedCodeword *src, expandedCodeword *dst) { * @param[out] dest Structure that contain the expanded codeword * @param[in] src Structure that contain the codeword */ -static void expand_and_sum(expandedCodeword *dest, codeword src[]) { +static void expand_and_sum(uint16_t dest[128], const uint32_t src[4 * MULTIPLICITY]) { // start with the first copy - for (int32_t part = 0 ; part < 4 ; part++) { - for (int32_t bit = 0 ; bit < 32 ; bit++) { - (*dest)[part * 32 + bit] = src[0].u32[part] >> bit & 1; + for (uint32_t part = 0 ; part < 4 ; part++) { + for (uint32_t bit = 0 ; bit < 32 ; bit++) { + dest[part * 32 + bit] = (uint16_t) ((src[part] >> bit) & 1); } } // sum the rest of the copies - for (int32_t copy = 1 ; copy < MULTIPLICITY ; copy++) { - for (int32_t part = 0 ; part < 4 ; part++) { - for (int32_t bit = 0 ; bit < 32 ; bit++) { - (*dest)[part * 32 + bit] += src[copy].u32[part] >> bit & 1; + for (uint32_t copy = 1 ; copy < MULTIPLICITY ; copy++) { + for (uint32_t part = 0 ; part < 4 ; part++) { + for (uint32_t bit = 0 ; bit < 32 ; bit++) { + dest[part * 32 + bit] += (uint16_t) ((src[4 * copy + part] >> bit) & 1); } } } @@ -172,27 +159,26 @@ static void expand_and_sum(expandedCodeword *dest, codeword src[]) { * in the lowest 7 bits it taken * @param[in] transform Structure that contain the expanded codeword */ -static int32_t find_peaks(expandedCodeword *transform) { - int32_t peak_abs_value = 0; - int32_t peak_value = 0; - int32_t peak_pos = 0; - for (int32_t i = 0 ; i < 128 ; i++) { - // get absolute value - int32_t t = (*transform)[i]; - int32_t pos_mask = -(t > 0); - int32_t absolute = (pos_mask & t) | (~pos_mask & -t); - // all compilers nowadays compile with a conditional move - peak_value = absolute > peak_abs_value ? t : peak_value; - peak_pos = absolute > peak_abs_value ? i : peak_pos; - peak_abs_value = absolute > peak_abs_value ? absolute : peak_abs_value; +static uint8_t find_peaks(const uint16_t transform[128]) { + uint16_t peak_abs = 0; + uint16_t peak = 0; + uint16_t pos = 0; + uint16_t t, abs, mask; + for (uint16_t i = 0 ; i < 128 ; i++) { + t = transform[i]; + abs = t ^ ((-(t >> 15)) & (t ^ -t)); // t = abs(t) + mask = -(((uint16_t)(peak_abs - abs)) >> 15); + peak ^= mask & (peak ^ t); + pos ^= mask & (pos ^ i); + peak_abs ^= mask & (peak_abs ^ abs); } - // set bit 7 - peak_pos |= 128 * (peak_value > 0); - return peak_pos; + pos |= 128 & ((peak >> 15) - 1); + return (uint8_t) pos; } + /** * @brief Encodes the received word * @@ -204,15 +190,13 @@ static int32_t find_peaks(expandedCodeword *transform) { */ void PQCLEAN_HQCRMRS256_CLEAN_reed_muller_encode(uint64_t *cdw, const uint64_t *msg) { uint8_t *message_array = (uint8_t *) msg; - codeword *codeArray = (codeword *) cdw; + uint32_t *codeArray = (uint32_t *) cdw; for (size_t i = 0 ; i < VEC_N1_SIZE_BYTES ; i++) { - // fill entries i * MULTIPLICITY to (i+1) * MULTIPLICITY - int32_t pos = i * MULTIPLICITY; // encode first word - encode(&codeArray[pos], message_array[i]); + encode(&codeArray[4 * i * MULTIPLICITY], message_array[i]); // copy to other identical codewords for (size_t copy = 1 ; copy < MULTIPLICITY ; copy++) { - memcpy(&codeArray[pos + copy], &codeArray[pos], sizeof(codeword)); + memcpy(&codeArray[4 * i * MULTIPLICITY + 4 * copy], &codeArray[4 * i * MULTIPLICITY], 4 * sizeof(uint32_t)); } } } @@ -230,17 +214,17 @@ void PQCLEAN_HQCRMRS256_CLEAN_reed_muller_encode(uint64_t *cdw, const uint64_t * */ void PQCLEAN_HQCRMRS256_CLEAN_reed_muller_decode(uint64_t *msg, const uint64_t *cdw) { uint8_t *message_array = (uint8_t *) msg; - codeword *codeArray = (codeword *) cdw; - expandedCodeword expanded; + uint32_t *codeArray = (uint32_t *) cdw; + uint16_t expanded[128]; + uint16_t transform[128]; for (size_t i = 0 ; i < VEC_N1_SIZE_BYTES ; i++) { // collect the codewords - expand_and_sum(&expanded, &codeArray[i * MULTIPLICITY]); + expand_and_sum(expanded, &codeArray[4 * i * MULTIPLICITY]); // apply hadamard transform - expandedCodeword transform; - hadamard(&expanded, &transform); + hadamard(expanded, transform); // fix the first entry to get the half Hadamard transform transform[0] -= 64 * MULTIPLICITY; // finish the decoding - message_array[i] = find_peaks(&transform); + message_array[i] = find_peaks(transform); } }