pqc/crypto_sign/dilithium2aes/avx2/shuffle.inc
John Schanck 32c613e8ec Round 3 update for Dilithium (from github source) (#369)
* Update Dilithium

* Alternative montgomery reduce to avoid i386 functest errors

* Explicit casts for msvc

* More casts; bump upstream version; fix metadata

* another cast
2021-03-24 21:02:50 +00:00

26 lines
615 B
PHP

.macro shuffle8 r0,r1,r2,r3
vperm2i128 $0x20,%ymm\r1,%ymm\r0,%ymm\r2
vperm2i128 $0x31,%ymm\r1,%ymm\r0,%ymm\r3
.endm
.macro shuffle4 r0,r1,r2,r3
vpunpcklqdq %ymm\r1,%ymm\r0,%ymm\r2
vpunpckhqdq %ymm\r1,%ymm\r0,%ymm\r3
.endm
.macro shuffle2 r0,r1,r2,r3
#vpsllq $32,%ymm\r1,%ymm\r2
vmovsldup %ymm\r1,%ymm\r2
vpblendd $0xAA,%ymm\r2,%ymm\r0,%ymm\r2
vpsrlq $32,%ymm\r0,%ymm\r0
#vmovshdup %ymm\r0,%ymm\r0
vpblendd $0xAA,%ymm\r1,%ymm\r0,%ymm\r3
.endm
.macro shuffle1 r0,r1,r2,r3
vpslld $16,%ymm\r1,%ymm\r2
vpblendw $0xAA,%ymm\r2,%ymm\r0,%ymm\r2
vpsrld $16,%ymm\r0,%ymm\r0
vpblendw $0xAA,%ymm\r1,%ymm\r0,%ymm\r3
.endm