350 lines
12 KiB
C
350 lines
12 KiB
C
/// @file parallel_matrix_op.c
|
|
/// @brief the standard implementations for functions in parallel_matrix_op.h
|
|
///
|
|
/// the standard implementations for functions in parallel_matrix_op.h
|
|
///
|
|
|
|
#include "blas_comm.h"
|
|
#include "blas.h"
|
|
|
|
#include "parallel_matrix_op.h"
|
|
|
|
|
|
//////////////// Section: triangle matrix <-> rectangle matrix ///////////////////////////////////
|
|
|
|
|
|
void PQCLEAN_RAINBOWIACLASSIC_CLEAN_UpperTrianglize( unsigned char *btriC, const unsigned char *bA, unsigned Awidth, unsigned size_batch ) {
|
|
unsigned char *runningC = btriC;
|
|
unsigned Aheight = Awidth;
|
|
for (unsigned i = 0; i < Aheight; i++) {
|
|
for (unsigned j = 0; j < i; j++) {
|
|
unsigned idx = idx_of_trimat(j, i, Aheight);
|
|
gf256v_add( btriC + idx * size_batch, bA + size_batch * (i * Awidth + j), size_batch );
|
|
}
|
|
gf256v_add( runningC, bA + size_batch * (i * Awidth + i), size_batch * (Aheight - i) );
|
|
runningC += size_batch * (Aheight - i);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
///////////////// Section: matrix multiplications ///////////////////////////////
|
|
|
|
|
|
|
|
void PQCLEAN_RAINBOWIACLASSIC_CLEAN_batch_trimat_madd_gf16( unsigned char *bC, const unsigned char *btriA,
|
|
const unsigned char *B, unsigned Bheight, unsigned size_Bcolvec, unsigned Bwidth, unsigned size_batch ) {
|
|
unsigned Awidth = Bheight;
|
|
unsigned Aheight = Awidth;
|
|
for (unsigned i = 0; i < Aheight; i++) {
|
|
for (unsigned j = 0; j < Bwidth; j++) {
|
|
for (unsigned k = 0; k < Bheight; k++) {
|
|
if (k < i) {
|
|
continue;
|
|
}
|
|
gf16v_madd( bC, & btriA[ (k - i)*size_batch ], gf16v_get_ele( &B[j * size_Bcolvec], k ), size_batch );
|
|
}
|
|
bC += size_batch;
|
|
}
|
|
btriA += (Aheight - i) * size_batch;
|
|
}
|
|
}
|
|
|
|
void PQCLEAN_RAINBOWIACLASSIC_CLEAN_batch_trimat_madd_gf256( unsigned char *bC, const unsigned char *btriA,
|
|
const unsigned char *B, unsigned Bheight, unsigned size_Bcolvec, unsigned Bwidth, unsigned size_batch ) {
|
|
unsigned Awidth = Bheight;
|
|
unsigned Aheight = Awidth;
|
|
for (unsigned i = 0; i < Aheight; i++) {
|
|
for (unsigned j = 0; j < Bwidth; j++) {
|
|
for (unsigned k = 0; k < Bheight; k++) {
|
|
if (k < i) {
|
|
continue;
|
|
}
|
|
gf256v_madd( bC, & btriA[ (k - i)*size_batch ], gf256v_get_ele( &B[j * size_Bcolvec], k ), size_batch );
|
|
}
|
|
bC += size_batch;
|
|
}
|
|
btriA += (Aheight - i) * size_batch;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void PQCLEAN_RAINBOWIACLASSIC_CLEAN_batch_trimatTr_madd_gf16( unsigned char *bC, const unsigned char *btriA,
|
|
const unsigned char *B, unsigned Bheight, unsigned size_Bcolvec, unsigned Bwidth, unsigned size_batch ) {
|
|
unsigned Aheight = Bheight;
|
|
for (unsigned i = 0; i < Aheight; i++) {
|
|
for (unsigned j = 0; j < Bwidth; j++) {
|
|
for (unsigned k = 0; k < Bheight; k++) {
|
|
if (i < k) {
|
|
continue;
|
|
}
|
|
gf16v_madd( bC, & btriA[ size_batch * (idx_of_trimat(k, i, Aheight)) ], gf16v_get_ele( &B[j * size_Bcolvec], k ), size_batch );
|
|
}
|
|
bC += size_batch;
|
|
}
|
|
}
|
|
}
|
|
|
|
void PQCLEAN_RAINBOWIACLASSIC_CLEAN_batch_trimatTr_madd_gf256( unsigned char *bC, const unsigned char *btriA,
|
|
const unsigned char *B, unsigned Bheight, unsigned size_Bcolvec, unsigned Bwidth, unsigned size_batch ) {
|
|
unsigned Aheight = Bheight;
|
|
for (unsigned i = 0; i < Aheight; i++) {
|
|
for (unsigned j = 0; j < Bwidth; j++) {
|
|
for (unsigned k = 0; k < Bheight; k++) {
|
|
if (i < k) {
|
|
continue;
|
|
}
|
|
gf256v_madd( bC, & btriA[ size_batch * (idx_of_trimat(k, i, Aheight)) ], gf256v_get_ele( &B[j * size_Bcolvec], k ), size_batch );
|
|
}
|
|
bC += size_batch;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
void PQCLEAN_RAINBOWIACLASSIC_CLEAN_batch_2trimat_madd_gf16( unsigned char *bC, const unsigned char *btriA,
|
|
const unsigned char *B, unsigned Bheight, unsigned size_Bcolvec, unsigned Bwidth, unsigned size_batch ) {
|
|
unsigned Aheight = Bheight;
|
|
for (unsigned i = 0; i < Aheight; i++) {
|
|
for (unsigned j = 0; j < Bwidth; j++) {
|
|
for (unsigned k = 0; k < Bheight; k++) {
|
|
if (i == k) {
|
|
continue;
|
|
}
|
|
gf16v_madd( bC, & btriA[ size_batch * (idx_of_2trimat(i, k, Aheight)) ], gf16v_get_ele( &B[j * size_Bcolvec], k ), size_batch );
|
|
}
|
|
bC += size_batch;
|
|
}
|
|
}
|
|
}
|
|
|
|
void PQCLEAN_RAINBOWIACLASSIC_CLEAN_batch_2trimat_madd_gf256( unsigned char *bC, const unsigned char *btriA,
|
|
const unsigned char *B, unsigned Bheight, unsigned size_Bcolvec, unsigned Bwidth, unsigned size_batch ) {
|
|
unsigned Aheight = Bheight;
|
|
for (unsigned i = 0; i < Aheight; i++) {
|
|
for (unsigned j = 0; j < Bwidth; j++) {
|
|
for (unsigned k = 0; k < Bheight; k++) {
|
|
if (i == k) {
|
|
continue;
|
|
}
|
|
gf256v_madd( bC, & btriA[ size_batch * (idx_of_2trimat(i, k, Aheight)) ], gf256v_get_ele( &B[j * size_Bcolvec], k ), size_batch );
|
|
}
|
|
bC += size_batch;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
void PQCLEAN_RAINBOWIACLASSIC_CLEAN_batch_matTr_madd_gf16( unsigned char *bC, const unsigned char *A_to_tr, unsigned Aheight, unsigned size_Acolvec, unsigned Awidth,
|
|
const unsigned char *bB, unsigned Bwidth, unsigned size_batch ) {
|
|
unsigned Atr_height = Awidth;
|
|
unsigned Atr_width = Aheight;
|
|
for (unsigned i = 0; i < Atr_height; i++) {
|
|
for (unsigned j = 0; j < Atr_width; j++) {
|
|
gf16v_madd( bC, & bB[ j * Bwidth * size_batch ], gf16v_get_ele( &A_to_tr[size_Acolvec * i], j ), size_batch * Bwidth );
|
|
}
|
|
bC += size_batch * Bwidth;
|
|
}
|
|
}
|
|
|
|
void PQCLEAN_RAINBOWIACLASSIC_CLEAN_batch_matTr_madd_gf256( unsigned char *bC, const unsigned char *A_to_tr, unsigned Aheight, unsigned size_Acolvec, unsigned Awidth,
|
|
const unsigned char *bB, unsigned Bwidth, unsigned size_batch ) {
|
|
unsigned Atr_height = Awidth;
|
|
unsigned Atr_width = Aheight;
|
|
for (unsigned i = 0; i < Atr_height; i++) {
|
|
for (unsigned j = 0; j < Atr_width; j++) {
|
|
gf256v_madd( bC, & bB[ j * Bwidth * size_batch ], gf256v_get_ele( &A_to_tr[size_Acolvec * i], j ), size_batch * Bwidth );
|
|
}
|
|
bC += size_batch * Bwidth;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
void PQCLEAN_RAINBOWIACLASSIC_CLEAN_batch_bmatTr_madd_gf16( unsigned char *bC, const unsigned char *bA_to_tr, unsigned Awidth_before_tr,
|
|
const unsigned char *B, unsigned Bheight, unsigned size_Bcolvec, unsigned Bwidth, unsigned size_batch ) {
|
|
const unsigned char *bA = bA_to_tr;
|
|
unsigned Aheight = Awidth_before_tr;
|
|
for (unsigned i = 0; i < Aheight; i++) {
|
|
for (unsigned j = 0; j < Bwidth; j++) {
|
|
for (unsigned k = 0; k < Bheight; k++) {
|
|
gf16v_madd( bC, & bA[ size_batch * (i + k * Aheight) ], gf16v_get_ele( &B[j * size_Bcolvec], k ), size_batch );
|
|
}
|
|
bC += size_batch;
|
|
}
|
|
}
|
|
}
|
|
|
|
void PQCLEAN_RAINBOWIACLASSIC_CLEAN_batch_bmatTr_madd_gf256( unsigned char *bC, const unsigned char *bA_to_tr, unsigned Awidth_before_tr,
|
|
const unsigned char *B, unsigned Bheight, unsigned size_Bcolvec, unsigned Bwidth, unsigned size_batch ) {
|
|
const unsigned char *bA = bA_to_tr;
|
|
unsigned Aheight = Awidth_before_tr;
|
|
for (unsigned i = 0; i < Aheight; i++) {
|
|
for (unsigned j = 0; j < Bwidth; j++) {
|
|
for (unsigned k = 0; k < Bheight; k++) {
|
|
gf256v_madd( bC, & bA[ size_batch * (i + k * Aheight) ], gf256v_get_ele( &B[j * size_Bcolvec], k ), size_batch );
|
|
}
|
|
bC += size_batch;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void PQCLEAN_RAINBOWIACLASSIC_CLEAN_batch_mat_madd_gf16( unsigned char *bC, const unsigned char *bA, unsigned Aheight,
|
|
const unsigned char *B, unsigned Bheight, unsigned size_Bcolvec, unsigned Bwidth, unsigned size_batch ) {
|
|
unsigned Awidth = Bheight;
|
|
for (unsigned i = 0; i < Aheight; i++) {
|
|
for (unsigned j = 0; j < Bwidth; j++) {
|
|
for (unsigned k = 0; k < Bheight; k++) {
|
|
gf16v_madd( bC, & bA[ k * size_batch ], gf16v_get_ele( &B[j * size_Bcolvec], k ), size_batch );
|
|
}
|
|
bC += size_batch;
|
|
}
|
|
bA += (Awidth) * size_batch;
|
|
}
|
|
}
|
|
|
|
void PQCLEAN_RAINBOWIACLASSIC_CLEAN_batch_mat_madd_gf256( unsigned char *bC, const unsigned char *bA, unsigned Aheight,
|
|
const unsigned char *B, unsigned Bheight, unsigned size_Bcolvec, unsigned Bwidth, unsigned size_batch ) {
|
|
unsigned Awidth = Bheight;
|
|
for (unsigned i = 0; i < Aheight; i++) {
|
|
for (unsigned j = 0; j < Bwidth; j++) {
|
|
for (unsigned k = 0; k < Bheight; k++) {
|
|
gf256v_madd( bC, & bA[ k * size_batch ], gf256v_get_ele( &B[j * size_Bcolvec], k ), size_batch );
|
|
}
|
|
bC += size_batch;
|
|
}
|
|
bA += (Awidth) * size_batch;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
//////////////////// Section: "quadratric" matrix evaluation ///////////////////////////////
|
|
|
|
|
|
|
|
|
|
void PQCLEAN_RAINBOWIACLASSIC_CLEAN_batch_quad_trimat_eval_gf16( unsigned char *y, const unsigned char *trimat, const unsigned char *x, unsigned dim, unsigned size_batch ) {
|
|
///
|
|
/// assert( dim <= 128 );
|
|
/// assert( size_batch <= 128 );
|
|
unsigned char tmp[256];
|
|
|
|
unsigned char _x[256];
|
|
for (unsigned i = 0; i < dim; i++) {
|
|
_x[i] = gf16v_get_ele( x, i );
|
|
}
|
|
|
|
PQCLEAN_RAINBOWIACLASSIC_CLEAN_gf256v_set_zero( y, size_batch );
|
|
for (unsigned i = 0; i < dim; i++) {
|
|
PQCLEAN_RAINBOWIACLASSIC_CLEAN_gf256v_set_zero( tmp, size_batch );
|
|
for (unsigned j = i; j < dim; j++) {
|
|
gf16v_madd( tmp, trimat, _x[j], size_batch );
|
|
trimat += size_batch;
|
|
}
|
|
gf16v_madd( y, tmp, _x[i], size_batch );
|
|
}
|
|
}
|
|
|
|
void PQCLEAN_RAINBOWIACLASSIC_CLEAN_batch_quad_trimat_eval_gf256( unsigned char *y, const unsigned char *trimat, const unsigned char *x, unsigned dim, unsigned size_batch ) {
|
|
///
|
|
/// assert( dim <= 256 );
|
|
/// assert( size_batch <= 256 );
|
|
unsigned char tmp[256];
|
|
|
|
unsigned char _x[256];
|
|
for (unsigned i = 0; i < dim; i++) {
|
|
_x[i] = gf256v_get_ele( x, i );
|
|
}
|
|
|
|
PQCLEAN_RAINBOWIACLASSIC_CLEAN_gf256v_set_zero( y, size_batch );
|
|
for (unsigned i = 0; i < dim; i++) {
|
|
PQCLEAN_RAINBOWIACLASSIC_CLEAN_gf256v_set_zero( tmp, size_batch );
|
|
for (unsigned j = i; j < dim; j++) {
|
|
gf256v_madd( tmp, trimat, _x[j], size_batch );
|
|
trimat += size_batch;
|
|
}
|
|
gf256v_madd( y, tmp, _x[i], size_batch );
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void PQCLEAN_RAINBOWIACLASSIC_CLEAN_batch_quad_recmat_eval_gf16( unsigned char *z, const unsigned char *y, unsigned dim_y, const unsigned char *mat,
|
|
const unsigned char *x, unsigned dim_x, unsigned size_batch ) {
|
|
///
|
|
/// assert( dim_x <= 128 );
|
|
/// assert( dim_y <= 128 );
|
|
/// assert( size_batch <= 128 );
|
|
unsigned char tmp[128];
|
|
|
|
unsigned char _x[128];
|
|
for (unsigned i = 0; i < dim_x; i++) {
|
|
_x[i] = gf16v_get_ele( x, i );
|
|
}
|
|
unsigned char _y[128];
|
|
for (unsigned i = 0; i < dim_y; i++) {
|
|
_y[i] = gf16v_get_ele( y, i );
|
|
}
|
|
|
|
PQCLEAN_RAINBOWIACLASSIC_CLEAN_gf256v_set_zero( z, size_batch );
|
|
for (unsigned i = 0; i < dim_y; i++) {
|
|
PQCLEAN_RAINBOWIACLASSIC_CLEAN_gf256v_set_zero( tmp, size_batch );
|
|
for (unsigned j = 0; j < dim_x; j++) {
|
|
gf16v_madd( tmp, mat, _x[j], size_batch );
|
|
mat += size_batch;
|
|
}
|
|
gf16v_madd( z, tmp, _y[i], size_batch );
|
|
}
|
|
}
|
|
|
|
|
|
void PQCLEAN_RAINBOWIACLASSIC_CLEAN_batch_quad_recmat_eval_gf256( unsigned char *z, const unsigned char *y, unsigned dim_y, const unsigned char *mat,
|
|
const unsigned char *x, unsigned dim_x, unsigned size_batch ) {
|
|
///
|
|
/// assert( dim_x <= 128 );
|
|
/// assert( dim_y <= 128 );
|
|
/// assert( size_batch <= 128 );
|
|
unsigned char tmp[128];
|
|
|
|
unsigned char _x[128];
|
|
for (unsigned i = 0; i < dim_x; i++) {
|
|
_x[i] = gf256v_get_ele( x, i );
|
|
}
|
|
unsigned char _y[128];
|
|
for (unsigned i = 0; i < dim_y; i++) {
|
|
_y[i] = gf256v_get_ele( y, i );
|
|
}
|
|
|
|
PQCLEAN_RAINBOWIACLASSIC_CLEAN_gf256v_set_zero( z, size_batch );
|
|
for (unsigned i = 0; i < dim_y; i++) {
|
|
PQCLEAN_RAINBOWIACLASSIC_CLEAN_gf256v_set_zero( tmp, size_batch );
|
|
for (unsigned j = 0; j < dim_x; j++) {
|
|
gf256v_madd( tmp, mat, _x[j], size_batch );
|
|
mat += size_batch;
|
|
}
|
|
gf256v_madd( z, tmp, _y[i], size_batch );
|
|
}
|
|
}
|
|
|
|
|
|
|