|
|
@@ -251,69 +251,4 @@ void RSAZ_1024_mod_exp_avx2(BN_ULONG result_norm[16], |
|
|
|
OPENSSL_cleanse(storage,sizeof(storage)); |
|
|
|
} |
|
|
|
|
|
|
|
/* |
|
|
|
* See crypto/bn/rsaz-x86_64.pl for further details. |
|
|
|
*/ |
|
|
|
void rsaz_512_mul(void *ret,const void *a,const void *b,const void *n,BN_ULONG k); |
|
|
|
void rsaz_512_mul_scatter4(void *ret,const void *a,const void *n,BN_ULONG k,const void *tbl,unsigned int power); |
|
|
|
void rsaz_512_mul_gather4(void *ret,const void *a,const void *tbl,const void *n,BN_ULONG k,unsigned int power); |
|
|
|
void rsaz_512_mul_by_one(void *ret,const void *a,const void *n,BN_ULONG k); |
|
|
|
void rsaz_512_sqr(void *ret,const void *a,const void *n,BN_ULONG k,int cnt); |
|
|
|
void rsaz_512_scatter4(void *tbl, const BN_ULONG *val, int power); |
|
|
|
void rsaz_512_gather4(BN_ULONG *val, const void *tbl, int power); |
|
|
|
|
|
|
|
void RSAZ_512_mod_exp(BN_ULONG result[8], |
|
|
|
const BN_ULONG base[8], const BN_ULONG exponent[8], |
|
|
|
const BN_ULONG m[8], BN_ULONG k0, const BN_ULONG RR[8]) |
|
|
|
{ |
|
|
|
alignas(64) uint8_t storage[(16*8*8) + (64 * 2)]; /* 1.2KB */ |
|
|
|
unsigned char *table = storage; |
|
|
|
BN_ULONG *a_inv = (BN_ULONG *)(table+16*8*8), |
|
|
|
*temp = (BN_ULONG *)(table+16*8*8+8*8); |
|
|
|
int index; |
|
|
|
unsigned int wvalue; |
|
|
|
|
|
|
|
/* table[0] = 1_inv */ |
|
|
|
temp[0] = 0-m[0]; temp[1] = ~m[1]; |
|
|
|
temp[2] = ~m[2]; temp[3] = ~m[3]; |
|
|
|
temp[4] = ~m[4]; temp[5] = ~m[5]; |
|
|
|
temp[6] = ~m[6]; temp[7] = ~m[7]; |
|
|
|
rsaz_512_scatter4(table, temp, 0); |
|
|
|
|
|
|
|
/* table [1] = a_inv^1 */ |
|
|
|
rsaz_512_mul(a_inv, base, RR, m, k0); |
|
|
|
rsaz_512_scatter4(table, a_inv, 1); |
|
|
|
|
|
|
|
/* table [2] = a_inv^2 */ |
|
|
|
rsaz_512_sqr(temp, a_inv, m, k0, 1); |
|
|
|
rsaz_512_scatter4(table, temp, 2); |
|
|
|
|
|
|
|
for (index=3; index<16; index++) |
|
|
|
rsaz_512_mul_scatter4(temp, a_inv, m, k0, table, index); |
|
|
|
|
|
|
|
const uint8_t *p_str = (const uint8_t *)exponent; |
|
|
|
|
|
|
|
/* load first window */ |
|
|
|
wvalue = p_str[63]; |
|
|
|
|
|
|
|
rsaz_512_gather4(temp, table, wvalue>>4); |
|
|
|
rsaz_512_sqr(temp, temp, m, k0, 4); |
|
|
|
rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue&0xf); |
|
|
|
|
|
|
|
for (index=62; index>=0; index--) { |
|
|
|
wvalue = p_str[index]; |
|
|
|
|
|
|
|
rsaz_512_sqr(temp, temp, m, k0, 4); |
|
|
|
rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue>>4); |
|
|
|
|
|
|
|
rsaz_512_sqr(temp, temp, m, k0, 4); |
|
|
|
rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue&0x0f); |
|
|
|
} |
|
|
|
|
|
|
|
/* from Montgomery */ |
|
|
|
rsaz_512_mul_by_one(result, temp, m, k0); |
|
|
|
|
|
|
|
OPENSSL_cleanse(storage,sizeof(storage)); |
|
|
|
} |
|
|
|
|
|
|
|
#endif /* OPENSSL_X86_64 */ |