Merge pull request #319 from jschanck/ntru
NTRU: inline the one call that needed @plt
This commit is contained in:
commit
a631583fe2
@ -23,9 +23,9 @@ auxiliary-submitters:
|
|||||||
- Zhenfei Zhang
|
- Zhenfei Zhang
|
||||||
implementations:
|
implementations:
|
||||||
- name: clean
|
- name: clean
|
||||||
version: https://github.com/jschanck/ntru/tree/b43afe59 reference implementation
|
version: https://github.com/jschanck/ntru/tree/ff3c84e1 reference implementation
|
||||||
- name: avx2
|
- name: avx2
|
||||||
version: https://github.com/jschanck/ntru/tree/b43afe59 avx2 implementation
|
version: https://github.com/jschanck/ntru/tree/ff3c84e1 avx2 implementation
|
||||||
supported_platforms:
|
supported_platforms:
|
||||||
- architecture: x86_64
|
- architecture: x86_64
|
||||||
operating_systems:
|
operating_systems:
|
||||||
|
@ -23,9 +23,9 @@ auxiliary-submitters:
|
|||||||
- Zhenfei Zhang
|
- Zhenfei Zhang
|
||||||
implementations:
|
implementations:
|
||||||
- name: clean
|
- name: clean
|
||||||
version: https://github.com/jschanck/ntru/tree/b43afe59 reference implementation
|
version: https://github.com/jschanck/ntru/tree/ff3c84e1 reference implementation
|
||||||
- name: avx2
|
- name: avx2
|
||||||
version: https://github.com/jschanck/ntru/tree/b43afe59 avx2 implementation
|
version: https://github.com/jschanck/ntru/tree/ff3c84e1 avx2 implementation
|
||||||
supported_platforms:
|
supported_platforms:
|
||||||
- architecture: x86_64
|
- architecture: x86_64
|
||||||
operating_systems:
|
operating_systems:
|
||||||
|
@ -23,9 +23,9 @@ auxiliary-submitters:
|
|||||||
- Zhenfei Zhang
|
- Zhenfei Zhang
|
||||||
implementations:
|
implementations:
|
||||||
- name: clean
|
- name: clean
|
||||||
version: https://github.com/jschanck/ntru/tree/b43afe59 reference implementation
|
version: https://github.com/jschanck/ntru/tree/ff3c84e1 reference implementation
|
||||||
- name: avx2
|
- name: avx2
|
||||||
version: https://github.com/jschanck/ntru/tree/b43afe59 avx2 implementation
|
version: https://github.com/jschanck/ntru/tree/ff3c84e1 avx2 implementation
|
||||||
supported_platforms:
|
supported_platforms:
|
||||||
- architecture: x86_64
|
- architecture: x86_64
|
||||||
operating_systems:
|
operating_systems:
|
||||||
|
@ -23,9 +23,9 @@ auxiliary-submitters:
|
|||||||
- Zhenfei Zhang
|
- Zhenfei Zhang
|
||||||
implementations:
|
implementations:
|
||||||
- name: clean
|
- name: clean
|
||||||
version: https://github.com/jschanck/ntru/tree/b43afe59 reference implementation
|
version: https://github.com/jschanck/ntru/tree/ff3c84e1 reference implementation
|
||||||
- name: avx2
|
- name: avx2
|
||||||
version: https://github.com/jschanck/ntru/tree/b43afe59 avx2 implementation
|
version: https://github.com/jschanck/ntru/tree/ff3c84e1 avx2 implementation
|
||||||
supported_platforms:
|
supported_platforms:
|
||||||
- architecture: x86_64
|
- architecture: x86_64
|
||||||
operating_systems:
|
operating_systems:
|
||||||
|
@ -4,7 +4,7 @@ LIB=libntruhrss701_avx2.a
|
|||||||
HEADERS=api.h cmov.h owcpa.h params.h poly.h poly_r2_inv.h sample.h
|
HEADERS=api.h cmov.h owcpa.h params.h poly.h poly_r2_inv.h sample.h
|
||||||
OBJECTS=cmov.o kem.o owcpa.o pack3.o packq.o poly.o poly_r2_inv.o sample.o sample_iid.o \
|
OBJECTS=cmov.o kem.o owcpa.o pack3.o packq.o poly.o poly_r2_inv.o sample.o sample_iid.o \
|
||||||
square_1_701_patience.o square_3_701_patience.o square_6_701_patience.o square_12_701_shufbytes.o square_15_701_shufbytes.o square_27_701_shufbytes.o square_42_701_shufbytes.o square_84_701_shufbytes.o square_168_701_shufbytes.o square_336_701_shufbytes.o \
|
square_1_701_patience.o square_3_701_patience.o square_6_701_patience.o square_12_701_shufbytes.o square_15_701_shufbytes.o square_27_701_shufbytes.o square_42_701_shufbytes.o square_84_701_shufbytes.o square_168_701_shufbytes.o square_336_701_shufbytes.o \
|
||||||
poly_mod_3_Phi_n.o poly_mod_q_Phi_n.o poly_r2_mul.o poly_rq_mul.o poly_rq_mul_x_minus_1.o poly_rq_to_s3.o poly_s3_inv.o poly_s3_to_rq.o vec32_sample_iid.o
|
poly_lift.o poly_mod_3_Phi_n.o poly_mod_q_Phi_n.o poly_r2_mul.o poly_rq_mul.o poly_rq_to_s3.o poly_s3_inv.o vec32_sample_iid.o
|
||||||
|
|
||||||
CFLAGS=-O3 -mavx2 -mbmi2 -Wall -Wextra -Wpedantic -Wvla -Werror -Wredundant-decls -Wmissing-prototypes -std=c99 -I../../../common $(EXTRAFLAGS)
|
CFLAGS=-O3 -mavx2 -mbmi2 -Wall -Wextra -Wpedantic -Wvla -Werror -Wredundant-decls -Wmissing-prototypes -std=c99 -I../../../common $(EXTRAFLAGS)
|
||||||
|
|
||||||
|
@ -288,6 +288,90 @@ mask_n:
|
|||||||
.word 0
|
.word 0
|
||||||
.word 0
|
.word 0
|
||||||
.word 0
|
.word 0
|
||||||
|
mask_mod8192:
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
mask_mod8192_omit_lowest:
|
||||||
|
.word 0
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
.word 8191
|
||||||
|
mask_mod8192_only_lowest:
|
||||||
|
.word 8191
|
||||||
|
.word 0
|
||||||
|
.word 0
|
||||||
|
.word 0
|
||||||
|
.word 0
|
||||||
|
.word 0
|
||||||
|
.word 0
|
||||||
|
.word 0
|
||||||
|
.word 0
|
||||||
|
.word 0
|
||||||
|
.word 0
|
||||||
|
.word 0
|
||||||
|
.word 0
|
||||||
|
.word 0
|
||||||
|
.word 0
|
||||||
|
.word 0
|
||||||
|
shuf_5_to_0_zerorest:
|
||||||
|
.byte 10
|
||||||
|
.byte 11
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
|
.byte 255
|
||||||
.text
|
.text
|
||||||
.global PQCLEAN_NTRUHRSS701_AVX2_poly_lift
|
.global PQCLEAN_NTRUHRSS701_AVX2_poly_lift
|
||||||
.global _PQCLEAN_NTRUHRSS701_AVX2_poly_lift
|
.global _PQCLEAN_NTRUHRSS701_AVX2_poly_lift
|
||||||
@ -3112,7 +3196,190 @@ vpand const_modq(%rip), %ymm2, %ymm2
|
|||||||
vpand const_1s(%rip), %ymm3, %ymm3
|
vpand const_1s(%rip), %ymm3, %ymm3
|
||||||
vpor %ymm3, %ymm2, %ymm3
|
vpor %ymm3, %ymm2, %ymm3
|
||||||
vmovdqa %ymm3, 1376(%rsp)
|
vmovdqa %ymm3, 1376(%rsp)
|
||||||
mov %rsp, %rsi
|
vmovdqu 1374(%rsp), %ymm0
|
||||||
call PQCLEAN_NTRUHRSS701_AVX2_poly_Rq_mul_x_minus_1@plt
|
vpsubw 1376(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 1376(%rdi)
|
||||||
|
vextracti128 $1, %ymm0, %xmm4
|
||||||
|
vpshufb shuf_5_to_0_zerorest(%rip), %ymm4, %ymm4
|
||||||
|
vpsubw 0(%rsp), %ymm4, %ymm4
|
||||||
|
vpand mask_mod8192_only_lowest(%rip), %ymm4, %ymm4
|
||||||
|
vmovdqu 1342(%rsp), %ymm0
|
||||||
|
vpsubw 1344(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 1344(%rdi)
|
||||||
|
vmovdqu 1310(%rsp), %ymm0
|
||||||
|
vpsubw 1312(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 1312(%rdi)
|
||||||
|
vmovdqu 1278(%rsp), %ymm0
|
||||||
|
vpsubw 1280(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 1280(%rdi)
|
||||||
|
vmovdqu 1246(%rsp), %ymm0
|
||||||
|
vpsubw 1248(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 1248(%rdi)
|
||||||
|
vmovdqu 1214(%rsp), %ymm0
|
||||||
|
vpsubw 1216(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 1216(%rdi)
|
||||||
|
vmovdqu 1182(%rsp), %ymm0
|
||||||
|
vpsubw 1184(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 1184(%rdi)
|
||||||
|
vmovdqu 1150(%rsp), %ymm0
|
||||||
|
vpsubw 1152(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 1152(%rdi)
|
||||||
|
vmovdqu 1118(%rsp), %ymm0
|
||||||
|
vpsubw 1120(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 1120(%rdi)
|
||||||
|
vmovdqu 1086(%rsp), %ymm0
|
||||||
|
vpsubw 1088(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 1088(%rdi)
|
||||||
|
vmovdqu 1054(%rsp), %ymm0
|
||||||
|
vpsubw 1056(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 1056(%rdi)
|
||||||
|
vmovdqu 1022(%rsp), %ymm0
|
||||||
|
vpsubw 1024(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 1024(%rdi)
|
||||||
|
vmovdqu 990(%rsp), %ymm0
|
||||||
|
vpsubw 992(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 992(%rdi)
|
||||||
|
vmovdqu 958(%rsp), %ymm0
|
||||||
|
vpsubw 960(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 960(%rdi)
|
||||||
|
vmovdqu 926(%rsp), %ymm0
|
||||||
|
vpsubw 928(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 928(%rdi)
|
||||||
|
vmovdqu 894(%rsp), %ymm0
|
||||||
|
vpsubw 896(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 896(%rdi)
|
||||||
|
vmovdqu 862(%rsp), %ymm0
|
||||||
|
vpsubw 864(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 864(%rdi)
|
||||||
|
vmovdqu 830(%rsp), %ymm0
|
||||||
|
vpsubw 832(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 832(%rdi)
|
||||||
|
vmovdqu 798(%rsp), %ymm0
|
||||||
|
vpsubw 800(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 800(%rdi)
|
||||||
|
vmovdqu 766(%rsp), %ymm0
|
||||||
|
vpsubw 768(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 768(%rdi)
|
||||||
|
vmovdqu 734(%rsp), %ymm0
|
||||||
|
vpsubw 736(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 736(%rdi)
|
||||||
|
vmovdqu 702(%rsp), %ymm0
|
||||||
|
vpsubw 704(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 704(%rdi)
|
||||||
|
vmovdqu 670(%rsp), %ymm0
|
||||||
|
vpsubw 672(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 672(%rdi)
|
||||||
|
vmovdqu 638(%rsp), %ymm0
|
||||||
|
vpsubw 640(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 640(%rdi)
|
||||||
|
vmovdqu 606(%rsp), %ymm0
|
||||||
|
vpsubw 608(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 608(%rdi)
|
||||||
|
vmovdqu 574(%rsp), %ymm0
|
||||||
|
vpsubw 576(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 576(%rdi)
|
||||||
|
vmovdqu 542(%rsp), %ymm0
|
||||||
|
vpsubw 544(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 544(%rdi)
|
||||||
|
vmovdqu 510(%rsp), %ymm0
|
||||||
|
vpsubw 512(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 512(%rdi)
|
||||||
|
vmovdqu 478(%rsp), %ymm0
|
||||||
|
vpsubw 480(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 480(%rdi)
|
||||||
|
vmovdqu 446(%rsp), %ymm0
|
||||||
|
vpsubw 448(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 448(%rdi)
|
||||||
|
vmovdqu 414(%rsp), %ymm0
|
||||||
|
vpsubw 416(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 416(%rdi)
|
||||||
|
vmovdqu 382(%rsp), %ymm0
|
||||||
|
vpsubw 384(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 384(%rdi)
|
||||||
|
vmovdqu 350(%rsp), %ymm0
|
||||||
|
vpsubw 352(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 352(%rdi)
|
||||||
|
vmovdqu 318(%rsp), %ymm0
|
||||||
|
vpsubw 320(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 320(%rdi)
|
||||||
|
vmovdqu 286(%rsp), %ymm0
|
||||||
|
vpsubw 288(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 288(%rdi)
|
||||||
|
vmovdqu 254(%rsp), %ymm0
|
||||||
|
vpsubw 256(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 256(%rdi)
|
||||||
|
vmovdqu 222(%rsp), %ymm0
|
||||||
|
vpsubw 224(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 224(%rdi)
|
||||||
|
vmovdqu 190(%rsp), %ymm0
|
||||||
|
vpsubw 192(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 192(%rdi)
|
||||||
|
vmovdqu 158(%rsp), %ymm0
|
||||||
|
vpsubw 160(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 160(%rdi)
|
||||||
|
vmovdqu 126(%rsp), %ymm0
|
||||||
|
vpsubw 128(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 128(%rdi)
|
||||||
|
vmovdqu 94(%rsp), %ymm0
|
||||||
|
vpsubw 96(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 96(%rdi)
|
||||||
|
vmovdqu 62(%rsp), %ymm0
|
||||||
|
vpsubw 64(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 64(%rdi)
|
||||||
|
vmovdqu 30(%rsp), %ymm0
|
||||||
|
vpsubw 32(%rsp), %ymm0, %ymm1
|
||||||
|
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
||||||
|
vmovdqa %ymm1, 32(%rdi)
|
||||||
|
vmovdqa 0(%rsp), %ymm3
|
||||||
|
vpsrlq $48, %ymm3, %ymm0
|
||||||
|
vpermq $147, %ymm0, %ymm0
|
||||||
|
vpsllq $16, %ymm3, %ymm2
|
||||||
|
vpxor %ymm0, %ymm2, %ymm2
|
||||||
|
vpsubw %ymm3, %ymm2, %ymm3
|
||||||
|
vpand mask_mod8192_omit_lowest(%rip), %ymm3, %ymm3
|
||||||
|
vpxor %ymm3, %ymm4, %ymm3
|
||||||
|
vmovdqa %ymm3, 0(%rdi)
|
||||||
mov %r8, %rsp
|
mov %r8, %rsp
|
||||||
ret
|
ret
|
@ -1,277 +0,0 @@
|
|||||||
.data
|
|
||||||
.p2align 5
|
|
||||||
mask_mod8192:
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
mask_mod8192_omit_lowest:
|
|
||||||
.word 0
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
.word 8191
|
|
||||||
mask_mod8192_only_lowest:
|
|
||||||
.word 8191
|
|
||||||
.word 0
|
|
||||||
.word 0
|
|
||||||
.word 0
|
|
||||||
.word 0
|
|
||||||
.word 0
|
|
||||||
.word 0
|
|
||||||
.word 0
|
|
||||||
.word 0
|
|
||||||
.word 0
|
|
||||||
.word 0
|
|
||||||
.word 0
|
|
||||||
.word 0
|
|
||||||
.word 0
|
|
||||||
.word 0
|
|
||||||
.word 0
|
|
||||||
shuf_5_to_0_zerorest:
|
|
||||||
.byte 10
|
|
||||||
.byte 11
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.byte 255
|
|
||||||
.text
|
|
||||||
.global PQCLEAN_NTRUHRSS701_AVX2_poly_Rq_mul_x_minus_1
|
|
||||||
.global _PQCLEAN_NTRUHRSS701_AVX2_poly_Rq_mul_x_minus_1
|
|
||||||
PQCLEAN_NTRUHRSS701_AVX2_poly_Rq_mul_x_minus_1:
|
|
||||||
_PQCLEAN_NTRUHRSS701_AVX2_poly_Rq_mul_x_minus_1:
|
|
||||||
vmovdqu 1374(%rsi), %ymm0
|
|
||||||
vpsubw 1376(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 1376(%rdi)
|
|
||||||
vextracti128 $1, %ymm0, %xmm4
|
|
||||||
vpshufb shuf_5_to_0_zerorest(%rip), %ymm4, %ymm4
|
|
||||||
vpsubw 0(%rsi), %ymm4, %ymm4
|
|
||||||
vpand mask_mod8192_only_lowest(%rip), %ymm4, %ymm4
|
|
||||||
vmovdqu 1342(%rsi), %ymm0
|
|
||||||
vpsubw 1344(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 1344(%rdi)
|
|
||||||
vmovdqu 1310(%rsi), %ymm0
|
|
||||||
vpsubw 1312(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 1312(%rdi)
|
|
||||||
vmovdqu 1278(%rsi), %ymm0
|
|
||||||
vpsubw 1280(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 1280(%rdi)
|
|
||||||
vmovdqu 1246(%rsi), %ymm0
|
|
||||||
vpsubw 1248(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 1248(%rdi)
|
|
||||||
vmovdqu 1214(%rsi), %ymm0
|
|
||||||
vpsubw 1216(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 1216(%rdi)
|
|
||||||
vmovdqu 1182(%rsi), %ymm0
|
|
||||||
vpsubw 1184(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 1184(%rdi)
|
|
||||||
vmovdqu 1150(%rsi), %ymm0
|
|
||||||
vpsubw 1152(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 1152(%rdi)
|
|
||||||
vmovdqu 1118(%rsi), %ymm0
|
|
||||||
vpsubw 1120(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 1120(%rdi)
|
|
||||||
vmovdqu 1086(%rsi), %ymm0
|
|
||||||
vpsubw 1088(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 1088(%rdi)
|
|
||||||
vmovdqu 1054(%rsi), %ymm0
|
|
||||||
vpsubw 1056(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 1056(%rdi)
|
|
||||||
vmovdqu 1022(%rsi), %ymm0
|
|
||||||
vpsubw 1024(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 1024(%rdi)
|
|
||||||
vmovdqu 990(%rsi), %ymm0
|
|
||||||
vpsubw 992(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 992(%rdi)
|
|
||||||
vmovdqu 958(%rsi), %ymm0
|
|
||||||
vpsubw 960(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 960(%rdi)
|
|
||||||
vmovdqu 926(%rsi), %ymm0
|
|
||||||
vpsubw 928(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 928(%rdi)
|
|
||||||
vmovdqu 894(%rsi), %ymm0
|
|
||||||
vpsubw 896(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 896(%rdi)
|
|
||||||
vmovdqu 862(%rsi), %ymm0
|
|
||||||
vpsubw 864(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 864(%rdi)
|
|
||||||
vmovdqu 830(%rsi), %ymm0
|
|
||||||
vpsubw 832(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 832(%rdi)
|
|
||||||
vmovdqu 798(%rsi), %ymm0
|
|
||||||
vpsubw 800(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 800(%rdi)
|
|
||||||
vmovdqu 766(%rsi), %ymm0
|
|
||||||
vpsubw 768(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 768(%rdi)
|
|
||||||
vmovdqu 734(%rsi), %ymm0
|
|
||||||
vpsubw 736(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 736(%rdi)
|
|
||||||
vmovdqu 702(%rsi), %ymm0
|
|
||||||
vpsubw 704(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 704(%rdi)
|
|
||||||
vmovdqu 670(%rsi), %ymm0
|
|
||||||
vpsubw 672(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 672(%rdi)
|
|
||||||
vmovdqu 638(%rsi), %ymm0
|
|
||||||
vpsubw 640(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 640(%rdi)
|
|
||||||
vmovdqu 606(%rsi), %ymm0
|
|
||||||
vpsubw 608(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 608(%rdi)
|
|
||||||
vmovdqu 574(%rsi), %ymm0
|
|
||||||
vpsubw 576(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 576(%rdi)
|
|
||||||
vmovdqu 542(%rsi), %ymm0
|
|
||||||
vpsubw 544(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 544(%rdi)
|
|
||||||
vmovdqu 510(%rsi), %ymm0
|
|
||||||
vpsubw 512(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 512(%rdi)
|
|
||||||
vmovdqu 478(%rsi), %ymm0
|
|
||||||
vpsubw 480(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 480(%rdi)
|
|
||||||
vmovdqu 446(%rsi), %ymm0
|
|
||||||
vpsubw 448(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 448(%rdi)
|
|
||||||
vmovdqu 414(%rsi), %ymm0
|
|
||||||
vpsubw 416(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 416(%rdi)
|
|
||||||
vmovdqu 382(%rsi), %ymm0
|
|
||||||
vpsubw 384(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 384(%rdi)
|
|
||||||
vmovdqu 350(%rsi), %ymm0
|
|
||||||
vpsubw 352(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 352(%rdi)
|
|
||||||
vmovdqu 318(%rsi), %ymm0
|
|
||||||
vpsubw 320(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 320(%rdi)
|
|
||||||
vmovdqu 286(%rsi), %ymm0
|
|
||||||
vpsubw 288(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 288(%rdi)
|
|
||||||
vmovdqu 254(%rsi), %ymm0
|
|
||||||
vpsubw 256(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 256(%rdi)
|
|
||||||
vmovdqu 222(%rsi), %ymm0
|
|
||||||
vpsubw 224(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 224(%rdi)
|
|
||||||
vmovdqu 190(%rsi), %ymm0
|
|
||||||
vpsubw 192(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 192(%rdi)
|
|
||||||
vmovdqu 158(%rsi), %ymm0
|
|
||||||
vpsubw 160(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 160(%rdi)
|
|
||||||
vmovdqu 126(%rsi), %ymm0
|
|
||||||
vpsubw 128(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 128(%rdi)
|
|
||||||
vmovdqu 94(%rsi), %ymm0
|
|
||||||
vpsubw 96(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 96(%rdi)
|
|
||||||
vmovdqu 62(%rsi), %ymm0
|
|
||||||
vpsubw 64(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 64(%rdi)
|
|
||||||
vmovdqu 30(%rsi), %ymm0
|
|
||||||
vpsubw 32(%rsi), %ymm0, %ymm1
|
|
||||||
vpand mask_mod8192(%rip), %ymm1, %ymm1
|
|
||||||
vmovdqa %ymm1, 32(%rdi)
|
|
||||||
vmovdqa 0(%rsi), %ymm3
|
|
||||||
vpsrlq $48, %ymm3, %ymm0
|
|
||||||
vpermq $147, %ymm0, %ymm0
|
|
||||||
vpsllq $16, %ymm3, %ymm2
|
|
||||||
vpxor %ymm0, %ymm2, %ymm2
|
|
||||||
vpsubw %ymm3, %ymm2, %ymm3
|
|
||||||
vpand mask_mod8192_omit_lowest(%rip), %ymm3, %ymm3
|
|
||||||
vpxor %ymm3, %ymm4, %ymm3
|
|
||||||
vmovdqa %ymm3, 0(%rdi)
|
|
||||||
ret
|
|
Loading…
Reference in New Issue
Block a user