From ef4895b55614f6926c384c8986d41d1e7eb6b2d8 Mon Sep 17 00:00:00 2001
From: Kris Kwiatkowski <contact@amongbytes.com>
Date: Tue, 6 Apr 2021 23:41:01 +0100
Subject: [PATCH 01/12] pull SIKE

---
 src/kem/sike/sike-p434-sha256/asm/fp-x86_64.S | 1095 +++++++++++++++++
 .../sike/sike-p434-sha256/asm/fp_generic.c    |  179 +++
 src/kem/sike/sike-p434-sha256/fpx.c           |  282 +++++
 src/kem/sike/sike-p434-sha256/fpx.h           |  112 ++
 src/kem/sike/sike-p434-sha256/isogeny.c       |  262 ++++
 src/kem/sike/sike-p434-sha256/isogeny.h       |   49 +
 src/kem/sike/sike-p434-sha256/params.c        |  128 ++
 src/kem/sike/sike-p434-sha256/sike.c          |  517 ++++++++
 src/kem/sike/sike-p434-sha256/utils.h         |  231 ++++
 9 files changed, 2855 insertions(+)
 create mode 100644 src/kem/sike/sike-p434-sha256/asm/fp-x86_64.S
 create mode 100644 src/kem/sike/sike-p434-sha256/asm/fp_generic.c
 create mode 100644 src/kem/sike/sike-p434-sha256/fpx.c
 create mode 100644 src/kem/sike/sike-p434-sha256/fpx.h
 create mode 100644 src/kem/sike/sike-p434-sha256/isogeny.c
 create mode 100644 src/kem/sike/sike-p434-sha256/isogeny.h
 create mode 100644 src/kem/sike/sike-p434-sha256/params.c
 create mode 100644 src/kem/sike/sike-p434-sha256/sike.c
 create mode 100644 src/kem/sike/sike-p434-sha256/utils.h

diff --git a/src/kem/sike/sike-p434-sha256/asm/fp-x86_64.S b/src/kem/sike/sike-p434-sha256/asm/fp-x86_64.S
new file mode 100644
index 00000000..4e2d7b74
--- /dev/null
+++ b/src/kem/sike/sike-p434-sha256/asm/fp-x86_64.S
@@ -0,0 +1,1095 @@
+# This file is generated from a similarly-named Perl script in the BoringSSL
+# source tree. Do not edit by hand.
+
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+#endif
+
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+#if defined(BORINGSSL_PREFIX)
+#include <boringssl_prefix_symbols_asm.h>
+#endif
+.text	
+
+
+.Lp434x2:
+.quad	0xFFFFFFFFFFFFFFFE
+.quad	0xFFFFFFFFFFFFFFFF
+.quad	0xFB82ECF5C5FFFFFF
+.quad	0xF78CB8F062B15D47
+.quad	0xD9F8BFAD038A40AC
+.quad	0x0004683E4E2EE688
+
+
+.Lp434p1:
+.quad	0xFDC1767AE3000000
+.quad	0x7BC65C783158AEA3
+.quad	0x6CFC5FD681C52056
+.quad	0x0002341F27177344
+
+.globl	sike_fpadd
+.hidden sike_fpadd
+.type	sike_fpadd,@function
+sike_fpadd:
+.cfi_startproc	
+	pushq	%r12
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r12, -16
+	pushq	%r13
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r13, -24
+	pushq	%r14
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r14, -32
+
+	xorq	%rax,%rax
+
+	movq	0(%rdi),%r8
+	addq	0(%rsi),%r8
+	movq	8(%rdi),%r9
+	adcq	8(%rsi),%r9
+	movq	16(%rdi),%r10
+	adcq	16(%rsi),%r10
+	movq	24(%rdi),%r11
+	adcq	24(%rsi),%r11
+	movq	32(%rdi),%r12
+	adcq	32(%rsi),%r12
+	movq	40(%rdi),%r13
+	adcq	40(%rsi),%r13
+	movq	48(%rdi),%r14
+	adcq	48(%rsi),%r14
+
+	movq	.Lp434x2(%rip),%rcx
+	subq	%rcx,%r8
+	movq	8+.Lp434x2(%rip),%rcx
+	sbbq	%rcx,%r9
+	sbbq	%rcx,%r10
+	movq	16+.Lp434x2(%rip),%rcx
+	sbbq	%rcx,%r11
+	movq	24+.Lp434x2(%rip),%rcx
+	sbbq	%rcx,%r12
+	movq	32+.Lp434x2(%rip),%rcx
+	sbbq	%rcx,%r13
+	movq	40+.Lp434x2(%rip),%rcx
+	sbbq	%rcx,%r14
+
+	sbbq	$0,%rax
+
+	movq	.Lp434x2(%rip),%rdi
+	andq	%rax,%rdi
+	movq	8+.Lp434x2(%rip),%rsi
+	andq	%rax,%rsi
+	movq	16+.Lp434x2(%rip),%rcx
+	andq	%rax,%rcx
+
+	addq	%rdi,%r8
+	movq	%r8,0(%rdx)
+	adcq	%rsi,%r9
+	movq	%r9,8(%rdx)
+	adcq	%rsi,%r10
+	movq	%r10,16(%rdx)
+	adcq	%rcx,%r11
+	movq	%r11,24(%rdx)
+
+	setc	%cl
+	movq	24+.Lp434x2(%rip),%r8
+	andq	%rax,%r8
+	movq	32+.Lp434x2(%rip),%r9
+	andq	%rax,%r9
+	movq	40+.Lp434x2(%rip),%r10
+	andq	%rax,%r10
+	btq	$0,%rcx
+
+	adcq	%r8,%r12
+	movq	%r12,32(%rdx)
+	adcq	%r9,%r13
+	movq	%r13,40(%rdx)
+	adcq	%r10,%r14
+	movq	%r14,48(%rdx)
+
+	popq	%r14
+.cfi_adjust_cfa_offset	-8
+	popq	%r13
+.cfi_adjust_cfa_offset	-8
+	popq	%r12
+.cfi_adjust_cfa_offset	-8
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.globl	sike_cswap_asm
+.hidden sike_cswap_asm
+.type	sike_cswap_asm,@function
+sike_cswap_asm:
+
+
+	movq	%rdx,%xmm3
+
+
+
+
+
+	pshufd	$68,%xmm3,%xmm3
+
+	movdqu	0(%rdi),%xmm0
+	movdqu	0(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,0(%rdi)
+	movdqu	%xmm1,0(%rsi)
+
+	movdqu	16(%rdi),%xmm0
+	movdqu	16(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,16(%rdi)
+	movdqu	%xmm1,16(%rsi)
+
+	movdqu	32(%rdi),%xmm0
+	movdqu	32(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,32(%rdi)
+	movdqu	%xmm1,32(%rsi)
+
+	movdqu	48(%rdi),%xmm0
+	movdqu	48(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,48(%rdi)
+	movdqu	%xmm1,48(%rsi)
+
+	movdqu	64(%rdi),%xmm0
+	movdqu	64(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,64(%rdi)
+	movdqu	%xmm1,64(%rsi)
+
+	movdqu	80(%rdi),%xmm0
+	movdqu	80(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,80(%rdi)
+	movdqu	%xmm1,80(%rsi)
+
+	movdqu	96(%rdi),%xmm0
+	movdqu	96(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,96(%rdi)
+	movdqu	%xmm1,96(%rsi)
+
+	movdqu	112(%rdi),%xmm0
+	movdqu	112(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,112(%rdi)
+	movdqu	%xmm1,112(%rsi)
+
+	movdqu	128(%rdi),%xmm0
+	movdqu	128(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,128(%rdi)
+	movdqu	%xmm1,128(%rsi)
+
+	movdqu	144(%rdi),%xmm0
+	movdqu	144(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,144(%rdi)
+	movdqu	%xmm1,144(%rsi)
+
+	movdqu	160(%rdi),%xmm0
+	movdqu	160(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,160(%rdi)
+	movdqu	%xmm1,160(%rsi)
+
+	movdqu	176(%rdi),%xmm0
+	movdqu	176(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,176(%rdi)
+	movdqu	%xmm1,176(%rsi)
+
+	movdqu	192(%rdi),%xmm0
+	movdqu	192(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,192(%rdi)
+	movdqu	%xmm1,192(%rsi)
+
+	movdqu	208(%rdi),%xmm0
+	movdqu	208(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,208(%rdi)
+	movdqu	%xmm1,208(%rsi)
+
+	.byte	0xf3,0xc3
+.globl	sike_fpsub
+.hidden sike_fpsub
+.type	sike_fpsub,@function
+sike_fpsub:
+.cfi_startproc	
+	pushq	%r12
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r12, -16
+	pushq	%r13
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r13, -24
+	pushq	%r14
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r14, -32
+
+	xorq	%rax,%rax
+
+	movq	0(%rdi),%r8
+	subq	0(%rsi),%r8
+	movq	8(%rdi),%r9
+	sbbq	8(%rsi),%r9
+	movq	16(%rdi),%r10
+	sbbq	16(%rsi),%r10
+	movq	24(%rdi),%r11
+	sbbq	24(%rsi),%r11
+	movq	32(%rdi),%r12
+	sbbq	32(%rsi),%r12
+	movq	40(%rdi),%r13
+	sbbq	40(%rsi),%r13
+	movq	48(%rdi),%r14
+	sbbq	48(%rsi),%r14
+
+	sbbq	$0x0,%rax
+
+	movq	.Lp434x2(%rip),%rdi
+	andq	%rax,%rdi
+	movq	8+.Lp434x2(%rip),%rsi
+	andq	%rax,%rsi
+	movq	16+.Lp434x2(%rip),%rcx
+	andq	%rax,%rcx
+
+	addq	%rdi,%r8
+	movq	%r8,0(%rdx)
+	adcq	%rsi,%r9
+	movq	%r9,8(%rdx)
+	adcq	%rsi,%r10
+	movq	%r10,16(%rdx)
+	adcq	%rcx,%r11
+	movq	%r11,24(%rdx)
+
+	setc	%cl
+	movq	24+.Lp434x2(%rip),%r8
+	andq	%rax,%r8
+	movq	32+.Lp434x2(%rip),%r9
+	andq	%rax,%r9
+	movq	40+.Lp434x2(%rip),%r10
+	andq	%rax,%r10
+	btq	$0x0,%rcx
+
+	adcq	%r8,%r12
+	adcq	%r9,%r13
+	adcq	%r10,%r14
+	movq	%r12,32(%rdx)
+	movq	%r13,40(%rdx)
+	movq	%r14,48(%rdx)
+
+	popq	%r14
+.cfi_adjust_cfa_offset	-8
+	popq	%r13
+.cfi_adjust_cfa_offset	-8
+	popq	%r12
+.cfi_adjust_cfa_offset	-8
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.globl	sike_mpadd_asm
+.hidden sike_mpadd_asm
+.type	sike_mpadd_asm,@function
+sike_mpadd_asm:
+.cfi_startproc	
+	movq	0(%rdi),%r8;
+	movq	8(%rdi),%r9
+	movq	16(%rdi),%r10
+	movq	24(%rdi),%r11
+	movq	32(%rdi),%rcx
+	addq	0(%rsi),%r8
+	adcq	8(%rsi),%r9
+	adcq	16(%rsi),%r10
+	adcq	24(%rsi),%r11
+	adcq	32(%rsi),%rcx
+	movq	%r8,0(%rdx)
+	movq	%r9,8(%rdx)
+	movq	%r10,16(%rdx)
+	movq	%r11,24(%rdx)
+	movq	%rcx,32(%rdx)
+
+	movq	40(%rdi),%r8
+	movq	48(%rdi),%r9
+	adcq	40(%rsi),%r8
+	adcq	48(%rsi),%r9
+	movq	%r8,40(%rdx)
+	movq	%r9,48(%rdx)
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.globl	sike_mpsubx2_asm
+.hidden sike_mpsubx2_asm
+.type	sike_mpsubx2_asm,@function
+sike_mpsubx2_asm:
+.cfi_startproc	
+	xorq	%rax,%rax
+
+	movq	0(%rdi),%r8
+	movq	8(%rdi),%r9
+	movq	16(%rdi),%r10
+	movq	24(%rdi),%r11
+	movq	32(%rdi),%rcx
+	subq	0(%rsi),%r8
+	sbbq	8(%rsi),%r9
+	sbbq	16(%rsi),%r10
+	sbbq	24(%rsi),%r11
+	sbbq	32(%rsi),%rcx
+	movq	%r8,0(%rdx)
+	movq	%r9,8(%rdx)
+	movq	%r10,16(%rdx)
+	movq	%r11,24(%rdx)
+	movq	%rcx,32(%rdx)
+
+	movq	40(%rdi),%r8
+	movq	48(%rdi),%r9
+	movq	56(%rdi),%r10
+	movq	64(%rdi),%r11
+	movq	72(%rdi),%rcx
+	sbbq	40(%rsi),%r8
+	sbbq	48(%rsi),%r9
+	sbbq	56(%rsi),%r10
+	sbbq	64(%rsi),%r11
+	sbbq	72(%rsi),%rcx
+	movq	%r8,40(%rdx)
+	movq	%r9,48(%rdx)
+	movq	%r10,56(%rdx)
+	movq	%r11,64(%rdx)
+	movq	%rcx,72(%rdx)
+
+	movq	80(%rdi),%r8
+	movq	88(%rdi),%r9
+	movq	96(%rdi),%r10
+	movq	104(%rdi),%r11
+	sbbq	80(%rsi),%r8
+	sbbq	88(%rsi),%r9
+	sbbq	96(%rsi),%r10
+	sbbq	104(%rsi),%r11
+	sbbq	$0x0,%rax
+	movq	%r8,80(%rdx)
+	movq	%r9,88(%rdx)
+	movq	%r10,96(%rdx)
+	movq	%r11,104(%rdx)
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.globl	sike_mpdblsubx2_asm
+.hidden sike_mpdblsubx2_asm
+.type	sike_mpdblsubx2_asm,@function
+sike_mpdblsubx2_asm:
+.cfi_startproc	
+	pushq	%r12
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r12, -16
+	pushq	%r13
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r13, -24
+
+	xorq	%rax,%rax
+
+
+	movq	0(%rdx),%r8
+	movq	8(%rdx),%r9
+	movq	16(%rdx),%r10
+	movq	24(%rdx),%r11
+	movq	32(%rdx),%r12
+	movq	40(%rdx),%r13
+	movq	48(%rdx),%rcx
+	subq	0(%rdi),%r8
+	sbbq	8(%rdi),%r9
+	sbbq	16(%rdi),%r10
+	sbbq	24(%rdi),%r11
+	sbbq	32(%rdi),%r12
+	sbbq	40(%rdi),%r13
+	sbbq	48(%rdi),%rcx
+	adcq	$0x0,%rax
+
+
+	subq	0(%rsi),%r8
+	sbbq	8(%rsi),%r9
+	sbbq	16(%rsi),%r10
+	sbbq	24(%rsi),%r11
+	sbbq	32(%rsi),%r12
+	sbbq	40(%rsi),%r13
+	sbbq	48(%rsi),%rcx
+	adcq	$0x0,%rax
+
+
+	movq	%r8,0(%rdx)
+	movq	%r9,8(%rdx)
+	movq	%r10,16(%rdx)
+	movq	%r11,24(%rdx)
+	movq	%r12,32(%rdx)
+	movq	%r13,40(%rdx)
+	movq	%rcx,48(%rdx)
+
+
+	movq	56(%rdx),%r8
+	movq	64(%rdx),%r9
+	movq	72(%rdx),%r10
+	movq	80(%rdx),%r11
+	movq	88(%rdx),%r12
+	movq	96(%rdx),%r13
+	movq	104(%rdx),%rcx
+
+	subq	%rax,%r8
+	sbbq	56(%rdi),%r8
+	sbbq	64(%rdi),%r9
+	sbbq	72(%rdi),%r10
+	sbbq	80(%rdi),%r11
+	sbbq	88(%rdi),%r12
+	sbbq	96(%rdi),%r13
+	sbbq	104(%rdi),%rcx
+
+
+	subq	56(%rsi),%r8
+	sbbq	64(%rsi),%r9
+	sbbq	72(%rsi),%r10
+	sbbq	80(%rsi),%r11
+	sbbq	88(%rsi),%r12
+	sbbq	96(%rsi),%r13
+	sbbq	104(%rsi),%rcx
+
+
+	movq	%r8,56(%rdx)
+	movq	%r9,64(%rdx)
+	movq	%r10,72(%rdx)
+	movq	%r11,80(%rdx)
+	movq	%r12,88(%rdx)
+	movq	%r13,96(%rdx)
+	movq	%rcx,104(%rdx)
+
+	popq	%r13
+.cfi_adjust_cfa_offset	-8
+	popq	%r12
+.cfi_adjust_cfa_offset	-8
+	.byte	0xf3,0xc3
+.cfi_endproc	
+
+.globl	sike_fprdc
+.hidden sike_fprdc
+.type	sike_fprdc,@function
+sike_fprdc:
+.cfi_startproc	
+	pushq	%r12
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r12, -16
+	pushq	%r13
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r13, -24
+	pushq	%r14
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r14, -32
+	pushq	%r15
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r15, -40
+
+	xorq	%rax,%rax
+	movq	0+0(%rdi),%rdx
+	mulxq	0+.Lp434p1(%rip),%r8,%r9
+	mulxq	8+.Lp434p1(%rip),%r12,%r10
+	mulxq	16+.Lp434p1(%rip),%r13,%r11
+
+	adoxq	%r12,%r9
+	adoxq	%r13,%r10
+
+	mulxq	24+.Lp434p1(%rip),%r13,%r12
+	adoxq	%r13,%r11
+	adoxq	%rax,%r12
+
+	xorq	%rax,%rax
+	movq	0+8(%rdi),%rdx
+	mulxq	0+.Lp434p1(%rip),%r13,%rcx
+	adcxq	%r13,%r9
+	adcxq	%rcx,%r10
+
+	mulxq	8+.Lp434p1(%rip),%rcx,%r13
+	adcxq	%r13,%r11
+	adoxq	%rcx,%r10
+
+	mulxq	16+.Lp434p1(%rip),%rcx,%r13
+	adcxq	%r13,%r12
+	adoxq	%rcx,%r11
+
+	mulxq	24+.Lp434p1(%rip),%rcx,%r13
+	adcxq	%rax,%r13
+	adoxq	%rcx,%r12
+	adoxq	%rax,%r13
+
+	xorq	%rcx,%rcx
+	addq	24(%rdi),%r8
+	adcq	32(%rdi),%r9
+	adcq	40(%rdi),%r10
+	adcq	48(%rdi),%r11
+	adcq	56(%rdi),%r12
+	adcq	64(%rdi),%r13
+	adcq	72(%rdi),%rcx
+	movq	%r8,24(%rdi)
+	movq	%r9,32(%rdi)
+	movq	%r10,40(%rdi)
+	movq	%r11,48(%rdi)
+	movq	%r12,56(%rdi)
+	movq	%r13,64(%rdi)
+	movq	%rcx,72(%rdi)
+	movq	80(%rdi),%r8
+	movq	88(%rdi),%r9
+	movq	96(%rdi),%r10
+	movq	104(%rdi),%r11
+	adcq	$0x0,%r8
+	adcq	$0x0,%r9
+	adcq	$0x0,%r10
+	adcq	$0x0,%r11
+	movq	%r8,80(%rdi)
+	movq	%r9,88(%rdi)
+	movq	%r10,96(%rdi)
+	movq	%r11,104(%rdi)
+
+	xorq	%rax,%rax
+	movq	16+0(%rdi),%rdx
+	mulxq	0+.Lp434p1(%rip),%r8,%r9
+	mulxq	8+.Lp434p1(%rip),%r12,%r10
+	mulxq	16+.Lp434p1(%rip),%r13,%r11
+
+	adoxq	%r12,%r9
+	adoxq	%r13,%r10
+
+	mulxq	24+.Lp434p1(%rip),%r13,%r12
+	adoxq	%r13,%r11
+	adoxq	%rax,%r12
+
+	xorq	%rax,%rax
+	movq	16+8(%rdi),%rdx
+	mulxq	0+.Lp434p1(%rip),%r13,%rcx
+	adcxq	%r13,%r9
+	adcxq	%rcx,%r10
+
+	mulxq	8+.Lp434p1(%rip),%rcx,%r13
+	adcxq	%r13,%r11
+	adoxq	%rcx,%r10
+
+	mulxq	16+.Lp434p1(%rip),%rcx,%r13
+	adcxq	%r13,%r12
+	adoxq	%rcx,%r11
+
+	mulxq	24+.Lp434p1(%rip),%rcx,%r13
+	adcxq	%rax,%r13
+	adoxq	%rcx,%r12
+	adoxq	%rax,%r13
+
+	xorq	%rcx,%rcx
+	addq	40(%rdi),%r8
+	adcq	48(%rdi),%r9
+	adcq	56(%rdi),%r10
+	adcq	64(%rdi),%r11
+	adcq	72(%rdi),%r12
+	adcq	80(%rdi),%r13
+	adcq	88(%rdi),%rcx
+	movq	%r8,40(%rdi)
+	movq	%r9,48(%rdi)
+	movq	%r10,56(%rdi)
+	movq	%r11,64(%rdi)
+	movq	%r12,72(%rdi)
+	movq	%r13,80(%rdi)
+	movq	%rcx,88(%rdi)
+	movq	96(%rdi),%r8
+	movq	104(%rdi),%r9
+	adcq	$0x0,%r8
+	adcq	$0x0,%r9
+	movq	%r8,96(%rdi)
+	movq	%r9,104(%rdi)
+
+	xorq	%rax,%rax
+	movq	32+0(%rdi),%rdx
+	mulxq	0+.Lp434p1(%rip),%r8,%r9
+	mulxq	8+.Lp434p1(%rip),%r12,%r10
+	mulxq	16+.Lp434p1(%rip),%r13,%r11
+
+	adoxq	%r12,%r9
+	adoxq	%r13,%r10
+
+	mulxq	24+.Lp434p1(%rip),%r13,%r12
+	adoxq	%r13,%r11
+	adoxq	%rax,%r12
+
+	xorq	%rax,%rax
+	movq	32+8(%rdi),%rdx
+	mulxq	0+.Lp434p1(%rip),%r13,%rcx
+	adcxq	%r13,%r9
+	adcxq	%rcx,%r10
+
+	mulxq	8+.Lp434p1(%rip),%rcx,%r13
+	adcxq	%r13,%r11
+	adoxq	%rcx,%r10
+
+	mulxq	16+.Lp434p1(%rip),%rcx,%r13
+	adcxq	%r13,%r12
+	adoxq	%rcx,%r11
+
+	mulxq	24+.Lp434p1(%rip),%rcx,%r13
+	adcxq	%rax,%r13
+	adoxq	%rcx,%r12
+	adoxq	%rax,%r13
+
+	xorq	%rcx,%rcx
+	addq	56(%rdi),%r8
+	adcq	64(%rdi),%r9
+	adcq	72(%rdi),%r10
+	adcq	80(%rdi),%r11
+	adcq	88(%rdi),%r12
+	adcq	96(%rdi),%r13
+	adcq	104(%rdi),%rcx
+	movq	%r8,0(%rsi)
+	movq	%r9,8(%rsi)
+	movq	%r10,72(%rdi)
+	movq	%r11,80(%rdi)
+	movq	%r12,88(%rdi)
+	movq	%r13,96(%rdi)
+	movq	%rcx,104(%rdi)
+
+	xorq	%rax,%rax
+	movq	48(%rdi),%rdx
+	mulxq	0+.Lp434p1(%rip),%r8,%r9
+	mulxq	8+.Lp434p1(%rip),%r12,%r10
+	mulxq	16+.Lp434p1(%rip),%r13,%r11
+
+	adoxq	%r12,%r9
+	adoxq	%r13,%r10
+
+	mulxq	24+.Lp434p1(%rip),%r13,%r12
+	adoxq	%r13,%r11
+	adoxq	%rax,%r12
+
+	addq	72(%rdi),%r8
+	adcq	80(%rdi),%r9
+	adcq	88(%rdi),%r10
+	adcq	96(%rdi),%r11
+	adcq	104(%rdi),%r12
+	movq	%r8,16(%rsi)
+	movq	%r9,24(%rsi)
+	movq	%r10,32(%rsi)
+	movq	%r11,40(%rsi)
+	movq	%r12,48(%rsi)
+
+
+	popq	%r15
+.cfi_adjust_cfa_offset	-8
+	popq	%r14
+.cfi_adjust_cfa_offset	-8
+	popq	%r13
+.cfi_adjust_cfa_offset	-8
+	popq	%r12
+.cfi_adjust_cfa_offset	-8
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.globl	sike_mpmul
+.hidden sike_mpmul
+.type	sike_mpmul,@function
+sike_mpmul:
+.cfi_startproc	
+	pushq	%r12
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r12, -16
+	pushq	%r13
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r13, -24
+	pushq	%r14
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r14, -32
+	pushq	%r15
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r15, -40
+
+
+	movq	%rdx,%rcx
+	xorq	%rax,%rax
+
+
+	movq	0(%rdi),%r8
+	movq	8(%rdi),%r9
+	movq	16(%rdi),%r10
+	movq	24(%rdi),%r11
+
+	pushq	%rbx
+.cfi_adjust_cfa_offset	8
+.cfi_offset	rbx, -48
+	pushq	%rbp
+.cfi_offset	rbp, -56
+.cfi_adjust_cfa_offset	8
+	subq	$96,%rsp
+.cfi_adjust_cfa_offset	96
+
+	addq	32(%rdi),%r8
+	adcq	40(%rdi),%r9
+	adcq	48(%rdi),%r10
+	adcq	$0x0,%r11
+	sbbq	$0x0,%rax
+	movq	%r8,0(%rsp)
+	movq	%r9,8(%rsp)
+	movq	%r10,16(%rsp)
+	movq	%r11,24(%rsp)
+
+
+	xorq	%rbx,%rbx
+	movq	0(%rsi),%r12
+	movq	8(%rsi),%r13
+	movq	16(%rsi),%r14
+	movq	24(%rsi),%r15
+	addq	32(%rsi),%r12
+	adcq	40(%rsi),%r13
+	adcq	48(%rsi),%r14
+	adcq	$0x0,%r15
+	sbbq	$0x0,%rbx
+	movq	%r12,32(%rsp)
+	movq	%r13,40(%rsp)
+	movq	%r14,48(%rsp)
+	movq	%r15,56(%rsp)
+
+
+	andq	%rax,%r12
+	andq	%rax,%r13
+	andq	%rax,%r14
+	andq	%rax,%r15
+
+
+	andq	%rbx,%r8
+	andq	%rbx,%r9
+	andq	%rbx,%r10
+	andq	%rbx,%r11
+
+
+	addq	%r12,%r8
+	adcq	%r13,%r9
+	adcq	%r14,%r10
+	adcq	%r15,%r11
+	movq	%r8,64(%rsp)
+	movq	%r9,72(%rsp)
+	movq	%r10,80(%rsp)
+	movq	%r11,88(%rsp)
+
+
+	movq	0+0(%rsp),%rdx
+	mulxq	32+0(%rsp),%r9,%r8
+	movq	%r9,0+0(%rsp)
+	mulxq	32+8(%rsp),%r10,%r9
+	xorq	%rax,%rax
+	adoxq	%r10,%r8
+	mulxq	32+16(%rsp),%r11,%r10
+	adoxq	%r11,%r9
+	mulxq	32+24(%rsp),%r12,%r11
+	adoxq	%r12,%r10
+
+	movq	0+8(%rsp),%rdx
+	mulxq	32+0(%rsp),%r12,%r13
+	adoxq	%rax,%r11
+	xorq	%rax,%rax
+	mulxq	32+8(%rsp),%r15,%r14
+	adoxq	%r8,%r12
+	movq	%r12,0+8(%rsp)
+	adcxq	%r15,%r13
+	mulxq	32+16(%rsp),%rbx,%r15
+	adcxq	%rbx,%r14
+	adoxq	%r9,%r13
+	mulxq	32+24(%rsp),%rbp,%rbx
+	adcxq	%rbp,%r15
+	adcxq	%rax,%rbx
+	adoxq	%r10,%r14
+
+	movq	0+16(%rsp),%rdx
+	mulxq	32+0(%rsp),%r8,%r9
+	adoxq	%r11,%r15
+	adoxq	%rax,%rbx
+	xorq	%rax,%rax
+	mulxq	32+8(%rsp),%r11,%r10
+	adoxq	%r13,%r8
+	movq	%r8,0+16(%rsp)
+	adcxq	%r11,%r9
+	mulxq	32+16(%rsp),%r12,%r11
+	adcxq	%r12,%r10
+	adoxq	%r14,%r9
+	mulxq	32+24(%rsp),%rbp,%r12
+	adcxq	%rbp,%r11
+	adcxq	%rax,%r12
+
+	adoxq	%r15,%r10
+	adoxq	%rbx,%r11
+	adoxq	%rax,%r12
+
+	movq	0+24(%rsp),%rdx
+	mulxq	32+0(%rsp),%r8,%r13
+	xorq	%rax,%rax
+	mulxq	32+8(%rsp),%r15,%r14
+	adcxq	%r15,%r13
+	adoxq	%r8,%r9
+	mulxq	32+16(%rsp),%rbx,%r15
+	adcxq	%rbx,%r14
+	adoxq	%r13,%r10
+	mulxq	32+24(%rsp),%rbp,%rbx
+	adcxq	%rbp,%r15
+	adcxq	%rax,%rbx
+	adoxq	%r14,%r11
+	adoxq	%r15,%r12
+	adoxq	%rax,%rbx
+	movq	%r9,0+24(%rsp)
+	movq	%r10,0+32(%rsp)
+	movq	%r11,0+40(%rsp)
+	movq	%r12,0+48(%rsp)
+	movq	%rbx,0+56(%rsp)
+
+
+
+	movq	0+0(%rdi),%rdx
+	mulxq	0+0(%rsi),%r9,%r8
+	movq	%r9,0+0(%rcx)
+	mulxq	0+8(%rsi),%r10,%r9
+	xorq	%rax,%rax
+	adoxq	%r10,%r8
+	mulxq	0+16(%rsi),%r11,%r10
+	adoxq	%r11,%r9
+	mulxq	0+24(%rsi),%r12,%r11
+	adoxq	%r12,%r10
+
+	movq	0+8(%rdi),%rdx
+	mulxq	0+0(%rsi),%r12,%r13
+	adoxq	%rax,%r11
+	xorq	%rax,%rax
+	mulxq	0+8(%rsi),%r15,%r14
+	adoxq	%r8,%r12
+	movq	%r12,0+8(%rcx)
+	adcxq	%r15,%r13
+	mulxq	0+16(%rsi),%rbx,%r15
+	adcxq	%rbx,%r14
+	adoxq	%r9,%r13
+	mulxq	0+24(%rsi),%rbp,%rbx
+	adcxq	%rbp,%r15
+	adcxq	%rax,%rbx
+	adoxq	%r10,%r14
+
+	movq	0+16(%rdi),%rdx
+	mulxq	0+0(%rsi),%r8,%r9
+	adoxq	%r11,%r15
+	adoxq	%rax,%rbx
+	xorq	%rax,%rax
+	mulxq	0+8(%rsi),%r11,%r10
+	adoxq	%r13,%r8
+	movq	%r8,0+16(%rcx)
+	adcxq	%r11,%r9
+	mulxq	0+16(%rsi),%r12,%r11
+	adcxq	%r12,%r10
+	adoxq	%r14,%r9
+	mulxq	0+24(%rsi),%rbp,%r12
+	adcxq	%rbp,%r11
+	adcxq	%rax,%r12
+
+	adoxq	%r15,%r10
+	adoxq	%rbx,%r11
+	adoxq	%rax,%r12
+
+	movq	0+24(%rdi),%rdx
+	mulxq	0+0(%rsi),%r8,%r13
+	xorq	%rax,%rax
+	mulxq	0+8(%rsi),%r15,%r14
+	adcxq	%r15,%r13
+	adoxq	%r8,%r9
+	mulxq	0+16(%rsi),%rbx,%r15
+	adcxq	%rbx,%r14
+	adoxq	%r13,%r10
+	mulxq	0+24(%rsi),%rbp,%rbx
+	adcxq	%rbp,%r15
+	adcxq	%rax,%rbx
+	adoxq	%r14,%r11
+	adoxq	%r15,%r12
+	adoxq	%rax,%rbx
+	movq	%r9,0+24(%rcx)
+	movq	%r10,0+32(%rcx)
+	movq	%r11,0+40(%rcx)
+	movq	%r12,0+48(%rcx)
+	movq	%rbx,0+56(%rcx)
+
+
+
+	movq	32+0(%rdi),%rdx
+	mulxq	32+0(%rsi),%r9,%r8
+	movq	%r9,64+0(%rcx)
+	mulxq	32+8(%rsi),%r10,%r9
+	xorq	%rax,%rax
+	adoxq	%r10,%r8
+	mulxq	32+16(%rsi),%r11,%r10
+	adoxq	%r11,%r9
+
+	movq	32+8(%rdi),%rdx
+	mulxq	32+0(%rsi),%r12,%r11
+	adoxq	%rax,%r10
+	xorq	%rax,%rax
+
+	mulxq	32+8(%rsi),%r14,%r13
+	adoxq	%r8,%r12
+	movq	%r12,64+8(%rcx)
+	adcxq	%r14,%r11
+
+	mulxq	32+16(%rsi),%r8,%r14
+	adoxq	%r9,%r11
+	adcxq	%r8,%r13
+	adcxq	%rax,%r14
+	adoxq	%r10,%r13
+
+	movq	32+16(%rdi),%rdx
+	mulxq	32+0(%rsi),%r8,%r9
+	adoxq	%rax,%r14
+	xorq	%rax,%rax
+
+	mulxq	32+8(%rsi),%r10,%r12
+	adoxq	%r11,%r8
+	movq	%r8,64+16(%rcx)
+	adcxq	%r13,%r9
+
+	mulxq	32+16(%rsi),%r11,%r8
+	adcxq	%r14,%r12
+	adcxq	%rax,%r8
+	adoxq	%r10,%r9
+	adoxq	%r12,%r11
+	adoxq	%rax,%r8
+	movq	%r9,64+24(%rcx)
+	movq	%r11,64+32(%rcx)
+	movq	%r8,64+40(%rcx)
+
+
+
+
+	movq	64(%rsp),%r8
+	movq	72(%rsp),%r9
+	movq	80(%rsp),%r10
+	movq	88(%rsp),%r11
+
+	movq	32(%rsp),%rax
+	addq	%rax,%r8
+	movq	40(%rsp),%rax
+	adcq	%rax,%r9
+	movq	48(%rsp),%rax
+	adcq	%rax,%r10
+	movq	56(%rsp),%rax
+	adcq	%rax,%r11
+
+
+	movq	0(%rsp),%r12
+	movq	8(%rsp),%r13
+	movq	16(%rsp),%r14
+	movq	24(%rsp),%r15
+	subq	0(%rcx),%r12
+	sbbq	8(%rcx),%r13
+	sbbq	16(%rcx),%r14
+	sbbq	24(%rcx),%r15
+	sbbq	32(%rcx),%r8
+	sbbq	40(%rcx),%r9
+	sbbq	48(%rcx),%r10
+	sbbq	56(%rcx),%r11
+
+
+	subq	64(%rcx),%r12
+	sbbq	72(%rcx),%r13
+	sbbq	80(%rcx),%r14
+	sbbq	88(%rcx),%r15
+	sbbq	96(%rcx),%r8
+	sbbq	104(%rcx),%r9
+	sbbq	$0x0,%r10
+	sbbq	$0x0,%r11
+
+	addq	32(%rcx),%r12
+	movq	%r12,32(%rcx)
+	adcq	40(%rcx),%r13
+	movq	%r13,40(%rcx)
+	adcq	48(%rcx),%r14
+	movq	%r14,48(%rcx)
+	adcq	56(%rcx),%r15
+	movq	%r15,56(%rcx)
+	adcq	64(%rcx),%r8
+	movq	%r8,64(%rcx)
+	adcq	72(%rcx),%r9
+	movq	%r9,72(%rcx)
+	adcq	80(%rcx),%r10
+	movq	%r10,80(%rcx)
+	adcq	88(%rcx),%r11
+	movq	%r11,88(%rcx)
+	movq	96(%rcx),%r12
+	adcq	$0x0,%r12
+	movq	%r12,96(%rcx)
+	movq	104(%rcx),%r13
+	adcq	$0x0,%r13
+	movq	%r13,104(%rcx)
+
+	addq	$96,%rsp
+.cfi_adjust_cfa_offset	-96
+	popq	%rbp
+.cfi_adjust_cfa_offset	-8
+.cfi_same_value	rbp
+	popq	%rbx
+.cfi_adjust_cfa_offset	-8
+.cfi_same_value	rbx
+
+
+	popq	%r15
+.cfi_adjust_cfa_offset	-8
+	popq	%r14
+.cfi_adjust_cfa_offset	-8
+	popq	%r13
+.cfi_adjust_cfa_offset	-8
+	popq	%r12
+.cfi_adjust_cfa_offset	-8
+	.byte	0xf3,0xc3
+.cfi_endproc	
+#endif
diff --git a/src/kem/sike/sike-p434-sha256/asm/fp_generic.c b/src/kem/sike/sike-p434-sha256/asm/fp_generic.c
new file mode 100644
index 00000000..38e7645e
--- /dev/null
+++ b/src/kem/sike/sike-p434-sha256/asm/fp_generic.c
@@ -0,0 +1,179 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library
+*
+* Abstract: portable modular arithmetic for P503
+*********************************************************************************************/
+
+#if defined(ARCH_GENERIC) || \
+    (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64))
+
+#include "../utils.h"
+#include "../fpx.h"
+
+// Global constants
+extern const struct params_t params;
+
+static void digit_x_digit(const crypto_word_t a, const crypto_word_t b, crypto_word_t* c)
+{ // Digit multiplication, digit * digit -> 2-digit result
+    crypto_word_t al, ah, bl, bh, temp;
+    crypto_word_t albl, albh, ahbl, ahbh, res1, res2, res3, carry;
+    crypto_word_t mask_low = (crypto_word_t)(-1) >> (sizeof(crypto_word_t)*4);
+    crypto_word_t mask_high = (crypto_word_t)(-1) << (sizeof(crypto_word_t)*4);
+
+    al = a & mask_low;                              // Low part
+    ah = a >> (sizeof(crypto_word_t) * 4);          // High part
+    bl = b & mask_low;
+    bh = b >> (sizeof(crypto_word_t) * 4);
+
+    albl = al*bl;
+    albh = al*bh;
+    ahbl = ah*bl;
+    ahbh = ah*bh;
+    c[0] = albl & mask_low;                         // C00
+
+    res1 = albl >> (sizeof(crypto_word_t) * 4);
+    res2 = ahbl & mask_low;
+    res3 = albh & mask_low;
+    temp = res1 + res2 + res3;
+    carry = temp >> (sizeof(crypto_word_t) * 4);
+    c[0] ^= temp << (sizeof(crypto_word_t) * 4);    // C01
+
+    res1 = ahbl >> (sizeof(crypto_word_t) * 4);
+    res2 = albh >> (sizeof(crypto_word_t) * 4);
+    res3 = ahbh & mask_low;
+    temp = res1 + res2 + res3 + carry;
+    c[1] = temp & mask_low;                         // C10
+    carry = temp & mask_high;
+    c[1] ^= (ahbh & mask_high) + carry;             // C11
+}
+
+void sike_fpadd(const felm_t a, const felm_t b, felm_t c)
+{ // Modular addition, c = a+b mod p434.
+  // Inputs: a, b in [0, 2*p434-1]
+  // Output: c in [0, 2*p434-1]
+    unsigned int i, carry = 0;
+    crypto_word_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], b[i], carry, c[i]);
+    }
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(carry, c[i], params.prime_x2[i], carry, c[i]);
+    }
+    mask = 0 - (crypto_word_t)carry;
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, c[i], params.prime_x2[i] & mask, carry, c[i]);
+    }
+}
+
+void sike_fpsub(const felm_t a, const felm_t b, felm_t c)
+{ // Modular subtraction, c = a-b mod p434.
+  // Inputs: a, b in [0, 2*p434-1]
+  // Output: c in [0, 2*p434-1]
+    unsigned int i, borrow = 0;
+    crypto_word_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]);
+    }
+    mask = 0 - (crypto_word_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], params.prime_x2[i] & mask, borrow, c[i]);
+    }
+}
+
+void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c)
+{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = NWORDS_FIELD.
+    unsigned int i, j;
+    crypto_word_t t = 0, u = 0, v = 0, UV[2];
+    unsigned int carry = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        for (j = 0; j <= i; j++) {
+            MUL(a[j], b[i-j], UV+1, UV[0]);
+            ADDC(0, UV[0], v, carry, v);
+            ADDC(carry, UV[1], u, carry, u);
+            t += carry;
+        }
+        c[i] = v;
+        v = u;
+        u = t;
+        t = 0;
+    }
+
+    for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) {
+        for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) {
+            MUL(a[j], b[i-j], UV+1, UV[0]);
+            ADDC(0, UV[0], v, carry, v);
+            ADDC(carry, UV[1], u, carry, u);
+            t += carry;
+        }
+        c[i] = v;
+        v = u;
+        u = t;
+        t = 0;
+    }
+    c[2*NWORDS_FIELD-1] = v;
+}
+
+void sike_fprdc(const felm_t ma, felm_t mc)
+{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p434.
+  // mc = ma*R^-1 mod p434x2, where R = 2^448.
+  // If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1].
+  // ma is assumed to be in Montgomery representation.
+    unsigned int i, j, carry, count = ZERO_WORDS;
+    crypto_word_t UV[2], t = 0, u = 0, v = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        mc[i] = 0;
+    }
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        for (j = 0; j < i; j++) {
+            if (j < (i-ZERO_WORDS+1)) {
+                MUL(mc[j], params.prime_p1[i-j], UV+1, UV[0]);
+                ADDC(0, UV[0], v, carry, v);
+                ADDC(carry, UV[1], u, carry, u);
+                t += carry;
+            }
+        }
+        ADDC(0, v, ma[i], carry, v);
+        ADDC(carry, u, 0, carry, u);
+        t += carry;
+        mc[i] = v;
+        v = u;
+        u = t;
+        t = 0;
+    }
+
+    for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) {
+        if (count > 0) {
+            count -= 1;
+        }
+        for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) {
+            if (j < (NWORDS_FIELD-count)) {
+                MUL(mc[j], params.prime_p1[i-j], UV+1, UV[0]);
+                ADDC(0, UV[0], v, carry, v);
+                ADDC(carry, UV[1], u, carry, u);
+                t += carry;
+            }
+        }
+        ADDC(0, v, ma[i], carry, v);
+        ADDC(carry, u, 0, carry, u);
+        t += carry;
+        mc[i-NWORDS_FIELD] = v;
+        v = u;
+        u = t;
+        t = 0;
+    }
+    ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v);
+    mc[NWORDS_FIELD-1] = v;
+}
+
+#endif  // NO_ASM || (!X86_64 && !AARCH64)
diff --git a/src/kem/sike/sike-p434-sha256/fpx.c b/src/kem/sike/sike-p434-sha256/fpx.c
new file mode 100644
index 00000000..30233406
--- /dev/null
+++ b/src/kem/sike/sike-p434-sha256/fpx.c
@@ -0,0 +1,282 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library
+*
+* Abstract: core functions over GF(p) and GF(p^2)
+*********************************************************************************************/
+#include <stddef.h>
+#include "utils.h"
+#include "fpx.h"
+
+extern const struct params_t params;
+
+// Multiprecision squaring, c = a^2 mod p.
+static void fpsqr_mont(const felm_t ma, felm_t mc)
+{
+    dfelm_t temp = {0};
+    sike_mpmul(ma, ma, temp);
+    sike_fprdc(temp, mc);
+}
+
+// Chain to compute a^(p-3)/4 using Montgomery arithmetic.
+static void fpinv_chain_mont(felm_t a)
+{
+    unsigned int i, j;
+    felm_t t[31], tt;
+
+    // Precomputed table
+    fpsqr_mont(a, tt);
+    sike_fpmul_mont(a, tt, t[0]);
+    for (i = 0; i <= 29; i++) sike_fpmul_mont(t[i], tt, t[i+1]);
+
+    sike_fpcopy(a, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[5], tt, tt);
+    for (i = 0; i < 10; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[14], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[3], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[23], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[13], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[24], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[7], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[12], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[30], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[1], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[30], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[21], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[2], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[19], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[1], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[24], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[26], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[16], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[10], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[6], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[0], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[20], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[9], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[25], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[30], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[26], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(a, tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[28], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[6], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[10], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[22], tt, tt);
+    for (j = 0; j < 35; j++) {
+        for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+        sike_fpmul_mont(t[30], tt, tt);
+    }
+    sike_fpcopy(tt, a);
+}
+
+// Field inversion using Montgomery arithmetic, a = a^(-1)*R mod p.
+static void fpinv_mont(felm_t a)
+{
+    felm_t tt = {0};
+    sike_fpcopy(a, tt);
+    fpinv_chain_mont(tt);
+    fpsqr_mont(tt, tt);
+    fpsqr_mont(tt, tt);
+    sike_fpmul_mont(a, tt, a);
+}
+
+// Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit.
+#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64))
+inline static unsigned int mp_add(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) {
+    uint8_t carry = 0;
+    for (size_t i = 0; i < nwords; i++) {
+        ADDC(carry, a[i], b[i], carry, c[i]);
+    }
+    return carry;
+}
+
+// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit.
+inline static unsigned int mp_sub(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) {
+    uint32_t borrow = 0;
+    for (size_t i = 0; i < nwords; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]);
+    }
+    return borrow;
+}
+#endif
+
+// Multiprecision addition, c = a+b.
+inline static void mp_addfast(const felm_t a, const felm_t b, felm_t c)
+{
+#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64))
+    mp_add(a, b, c, NWORDS_FIELD);
+#else
+    sike_mpadd_asm(a, b, c);
+#endif
+}
+
+// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD.
+// If c < 0 then returns mask = 0xFF..F, else mask = 0x00..0
+inline static crypto_word_t mp_subfast(const dfelm_t a, const dfelm_t b, dfelm_t c) {
+#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64))
+    return (0 - (crypto_word_t)mp_sub(a, b, c, 2*NWORDS_FIELD));
+#else
+    return sike_mpsubx2_asm(a, b, c);
+#endif
+}
+
+// Multiprecision subtraction, c = c-a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD.
+// Inputs should be s.t. c > a and c > b
+inline static void mp_dblsubfast(const dfelm_t a, const dfelm_t b, dfelm_t c) {
+#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64))
+    mp_sub(c, a, c, 2*NWORDS_FIELD);
+    mp_sub(c, b, c, 2*NWORDS_FIELD);
+#else
+    sike_mpdblsubx2_asm(a, b, c);
+#endif
+}
+
+// Copy a field element, c = a.
+void sike_fpcopy(const felm_t a, felm_t c) {
+    for (size_t i = 0; i < NWORDS_FIELD; i++) {
+        c[i] = a[i];
+    }
+}
+
+// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod prime, where R=2^768
+void sike_fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc)
+{
+    dfelm_t temp = {0};
+    sike_mpmul(ma, mb, temp);
+    sike_fprdc(temp, mc);
+}
+
+// Conversion from Montgomery representation to standard representation,
+// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1].
+void sike_from_mont(const felm_t ma, felm_t c)
+{
+    felm_t one = {0};
+    one[0] = 1;
+
+    sike_fpmul_mont(ma, one, c);
+    sike_fpcorrection(c);
+}
+
+// GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2).
+// Inputs: a = a0+a1*i, where a0, a1 are in [0, 2*p-1]
+// Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1]
+void sike_fp2sqr_mont(const f2elm_t a, f2elm_t c) {
+    felm_t t1, t2, t3;
+
+    mp_addfast(a->c0, a->c1, t1);                      // t1 = a0+a1
+    sike_fpsub(a->c0, a->c1, t2);                      // t2 = a0-a1
+    mp_addfast(a->c0, a->c0, t3);                      // t3 = 2a0
+    sike_fpmul_mont(t1, t2, c->c0);                    // c0 = (a0+a1)(a0-a1)
+    sike_fpmul_mont(t3, a->c1, c->c1);                 // c1 = 2a0*a1
+}
+
+// Modular negation, a = -a mod p503.
+// Input/output: a in [0, 2*p503-1]
+void sike_fpneg(felm_t a) {
+  uint32_t borrow = 0;
+  for (size_t i = 0; i < NWORDS_FIELD; i++) {
+    SUBC(borrow, params.prime_x2[i], a[i], borrow, a[i]);
+  }
+}
+
+// Modular division by two, c = a/2 mod p503.
+// Input : a in [0, 2*p503-1]
+// Output: c in [0, 2*p503-1]
+void sike_fpdiv2(const felm_t a, felm_t c) {
+  uint32_t carry = 0;
+  crypto_word_t mask;
+
+  mask = 0 - (crypto_word_t)(a[0] & 1);    // If a is odd compute a+p503
+  for (size_t i = 0; i < NWORDS_FIELD; i++) {
+    ADDC(carry, a[i], params.prime[i] & mask, carry, c[i]);
+  }
+
+  // Multiprecision right shift by one.
+  for (size_t i = 0; i < NWORDS_FIELD-1; i++) {
+    c[i] = (c[i] >> 1) ^ (c[i+1] << (RADIX - 1));
+  }
+  c[NWORDS_FIELD-1] >>= 1;
+}
+
+// Modular correction to reduce field element a in [0, 2*p503-1] to [0, p503-1].
+void sike_fpcorrection(felm_t a) {
+  uint32_t borrow = 0;
+  crypto_word_t mask;
+
+  for (size_t i = 0; i < NWORDS_FIELD; i++) {
+    SUBC(borrow, a[i], params.prime[i], borrow, a[i]);
+  }
+  mask = 0 - (crypto_word_t)borrow;
+
+  borrow = 0;
+  for (size_t i = 0; i < NWORDS_FIELD; i++) {
+    ADDC(borrow, a[i], params.prime[i] & mask, borrow, a[i]);
+  }
+}
+
+// GF(p^2) multiplication using Montgomery arithmetic, c = a*b in GF(p^2).
+// Inputs: a = a0+a1*i and b = b0+b1*i, where a0, a1, b0, b1 are in [0, 2*p-1]
+// Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1]
+void sike_fp2mul_mont(const f2elm_t a, const f2elm_t b, f2elm_t c) {
+    felm_t t1, t2;
+    dfelm_t tt1, tt2, tt3;
+    crypto_word_t mask;
+
+    mp_addfast(a->c0, a->c1, t1);                      // t1 = a0+a1
+    mp_addfast(b->c0, b->c1, t2);                      // t2 = b0+b1
+    sike_mpmul(a->c0, b->c0, tt1);                     // tt1 = a0*b0
+    sike_mpmul(a->c1, b->c1, tt2);                     // tt2 = a1*b1
+    sike_mpmul(t1, t2, tt3);                           // tt3 = (a0+a1)*(b0+b1)
+    mp_dblsubfast(tt1, tt2, tt3);                      // tt3 = (a0+a1)*(b0+b1) - a0*b0 - a1*b1
+    mask = mp_subfast(tt1, tt2, tt1);                  // tt1 = a0*b0 - a1*b1. If tt1 < 0 then mask = 0xFF..F, else if tt1 >= 0 then mask = 0x00..0
+
+    for (size_t i = 0; i < NWORDS_FIELD; i++) {
+        t1[i] = params.prime[i] & mask;
+    }
+
+    sike_fprdc(tt3, c->c1);                             // c[1] = (a0+a1)*(b0+b1) - a0*b0 - a1*b1
+    mp_addfast(&tt1[NWORDS_FIELD], t1, &tt1[NWORDS_FIELD]);
+    sike_fprdc(tt1, c->c0);                             // c[0] = a0*b0 - a1*b1
+}
+
+// GF(p^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2).
+void sike_fp2inv_mont(f2elm_t a) {
+    f2elm_t t1;
+
+    fpsqr_mont(a->c0, t1->c0);                         // t10 = a0^2
+    fpsqr_mont(a->c1, t1->c1);                         // t11 = a1^2
+    sike_fpadd(t1->c0, t1->c1, t1->c0);                // t10 = a0^2+a1^2
+    fpinv_mont(t1->c0);                                // t10 = (a0^2+a1^2)^-1
+    sike_fpneg(a->c1);                                 // a = a0-i*a1
+    sike_fpmul_mont(a->c0, t1->c0, a->c0);
+    sike_fpmul_mont(a->c1, t1->c0, a->c1);             // a = (a0-i*a1)*(a0^2+a1^2)^-1
+}
diff --git a/src/kem/sike/sike-p434-sha256/fpx.h b/src/kem/sike/sike-p434-sha256/fpx.h
new file mode 100644
index 00000000..b9255ac7
--- /dev/null
+++ b/src/kem/sike/sike-p434-sha256/fpx.h
@@ -0,0 +1,112 @@
+#ifndef FPX_H_
+#define FPX_H_
+
+#include "utils.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+// Modular addition, c = a+b mod p.
+void sike_fpadd(const felm_t a, const felm_t b, felm_t c);
+// Modular subtraction, c = a-b mod p.
+void sike_fpsub(const felm_t a, const felm_t b, felm_t c);
+// Modular division by two, c = a/2 mod p.
+void sike_fpdiv2(const felm_t a, felm_t c);
+// Modular correction to reduce field element a in [0, 2*p-1] to [0, p-1].
+void sike_fpcorrection(felm_t a);
+// Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords.
+void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c);
+// 443-bit Montgomery reduction, c = a mod p
+void sike_fprdc(const dfelm_t a, felm_t c);
+// Double 2x443-bit multiprecision subtraction, c = c-a-b
+void sike_mpdblsubx2_asm(const felm_t a, const felm_t b, felm_t c);
+// Multiprecision subtraction, c = a-b
+crypto_word_t sike_mpsubx2_asm(const dfelm_t a, const dfelm_t b, dfelm_t c);
+// 443-bit multiprecision addition, c = a+b
+void sike_mpadd_asm(const felm_t a, const felm_t b, felm_t c);
+// Modular negation, a = -a mod p.
+void sike_fpneg(felm_t a);
+// Copy of a field element, c = a
+void sike_fpcopy(const felm_t a, felm_t c);
+// Copy a field element, c = a.
+void sike_fpzero(felm_t a);
+// If option = 0xFF...FF x=y; y=x, otherwise swap doesn't happen. Constant time.
+void sike_cswap_asm(point_proj_t x, point_proj_t y, const crypto_word_t option);
+// Conversion from Montgomery representation to standard representation,
+// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1].
+void sike_from_mont(const felm_t ma, felm_t c);
+// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p443, where R=2^768
+void sike_fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc);
+// GF(p443^2) multiplication using Montgomery arithmetic, c = a*b in GF(p443^2)
+void sike_fp2mul_mont(const f2elm_t a, const f2elm_t b, f2elm_t c);
+// GF(p443^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2)
+void sike_fp2inv_mont(f2elm_t a);
+// GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2).
+void sike_fp2sqr_mont(const f2elm_t a, f2elm_t c);
+// Modular correction, a = a in GF(p^2).
+void sike_fp2correction(f2elm_t a);
+
+#if defined(__cplusplus)
+}  // extern C
+#endif
+
+// GF(p^2) addition, c = a+b in GF(p^2).
+#define sike_fp2add(a, b, c)             \
+do {                                     \
+    sike_fpadd(a->c0, b->c0, c->c0);     \
+    sike_fpadd(a->c1, b->c1, c->c1);     \
+} while(0)
+
+// GF(p^2) subtraction, c = a-b in GF(p^2).
+#define sike_fp2sub(a,b,c)               \
+do {                                     \
+    sike_fpsub(a->c0, b->c0, c->c0);     \
+    sike_fpsub(a->c1, b->c1, c->c1);     \
+} while(0)
+
+// Copy a GF(p^2) element, c = a.
+#define sike_fp2copy(a, c)               \
+do {                                     \
+    sike_fpcopy(a->c0, c->c0);           \
+    sike_fpcopy(a->c1, c->c1);           \
+} while(0)
+
+// GF(p^2) negation, a = -a in GF(p^2).
+#define sike_fp2neg(a)                   \
+do {                                     \
+    sike_fpneg(a->c0);                   \
+    sike_fpneg(a->c1);                   \
+} while(0)
+
+// GF(p^2) division by two, c = a/2  in GF(p^2).
+#define sike_fp2div2(a, c)               \
+do {                                     \
+    sike_fpdiv2(a->c0, c->c0);           \
+    sike_fpdiv2(a->c1, c->c1);           \
+} while(0)
+
+// Modular correction, a = a in GF(p^2).
+#define sike_fp2correction(a)            \
+do {                                     \
+    sike_fpcorrection(a->c0);            \
+    sike_fpcorrection(a->c1);            \
+} while(0)
+
+// Conversion of a GF(p^2) element to Montgomery representation,
+// mc_i = a_i*R^2*R^(-1) = a_i*R in GF(p^2).
+#define sike_to_fp2mont(a, mc)           \
+do {                                     \
+    sike_fpmul_mont(a->c0, params.mont_R2, mc->c0);   \
+    sike_fpmul_mont(a->c1, params.mont_R2, mc->c1);   \
+} while(0)
+
+// Conversion of a GF(p^2) element from Montgomery representation to standard representation,
+// c_i = ma_i*R^(-1) = a_i in GF(p^2).
+#define sike_from_fp2mont(ma, c)         \
+do {                                     \
+    sike_from_mont(ma->c0, c->c0);       \
+    sike_from_mont(ma->c1, c->c1);       \
+} while(0)
+
+#endif // FPX_H_
diff --git a/src/kem/sike/sike-p434-sha256/isogeny.c b/src/kem/sike/sike-p434-sha256/isogeny.c
new file mode 100644
index 00000000..661410e4
--- /dev/null
+++ b/src/kem/sike/sike-p434-sha256/isogeny.c
@@ -0,0 +1,262 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library
+*
+* Abstract: elliptic curve and isogeny functions
+*********************************************************************************************/
+#include <stddef.h>
+#include <string.h>
+#include "utils.h"
+#include "isogeny.h"
+#include "fpx.h"
+
+static void xDBL(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24)
+{ // Doubling of a Montgomery point in projective coordinates (X:Z).
+  // Input: projective Montgomery x-coordinates P = (X1:Z1), where x1=X1/Z1 and Montgomery curve constants A+2C and 4C.
+  // Output: projective Montgomery x-coordinates Q = 2*P = (X2:Z2).
+    f2elm_t t0, t1;
+
+    sike_fp2sub(P->X, P->Z, t0);                         // t0 = X1-Z1
+    sike_fp2add(P->X, P->Z, t1);                         // t1 = X1+Z1
+    sike_fp2sqr_mont(t0, t0);                            // t0 = (X1-Z1)^2
+    sike_fp2sqr_mont(t1, t1);                            // t1 = (X1+Z1)^2
+    sike_fp2mul_mont(C24, t0, Q->Z);                     // Z2 = C24*(X1-Z1)^2
+    sike_fp2mul_mont(t1, Q->Z, Q->X);                    // X2 = C24*(X1-Z1)^2*(X1+Z1)^2
+    sike_fp2sub(t1, t0, t1);                             // t1 = (X1+Z1)^2-(X1-Z1)^2
+    sike_fp2mul_mont(A24plus, t1, t0);                   // t0 = A24plus*[(X1+Z1)^2-(X1-Z1)^2]
+    sike_fp2add(Q->Z, t0, Q->Z);                         // Z2 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2
+    sike_fp2mul_mont(Q->Z, t1, Q->Z);                    // Z2 = [A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2]*[(X1+Z1)^2-(X1-Z1)^2]
+}
+
+void xDBLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24, size_t e)
+{ // Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings.
+  // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A+2C and 4C.
+  // Output: projective Montgomery x-coordinates Q <- (2^e)*P.
+
+    memmove(Q, P, sizeof(*P));
+    for (size_t i = 0; i < e; i++) {
+        xDBL(Q, Q, A24plus, C24);
+    }
+}
+
+void get_4_isog(const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t* coeff)
+{ // Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4.
+  // Input:  projective point of order four P = (X4:Z4).
+  // Output: the 4-isogenous Montgomery curve with projective coefficients A+2C/4C and the 3 coefficients
+  //         that are used to evaluate the isogeny at a point in eval_4_isog().
+
+    sike_fp2sub(P->X, P->Z, coeff[1]);                   // coeff[1] = X4-Z4
+    sike_fp2add(P->X, P->Z, coeff[2]);                   // coeff[2] = X4+Z4
+    sike_fp2sqr_mont(P->Z, coeff[0]);                    // coeff[0] = Z4^2
+    sike_fp2add(coeff[0], coeff[0], coeff[0]);           // coeff[0] = 2*Z4^2
+    sike_fp2sqr_mont(coeff[0], C24);                     // C24 = 4*Z4^4
+    sike_fp2add(coeff[0], coeff[0], coeff[0]);           // coeff[0] = 4*Z4^2
+    sike_fp2sqr_mont(P->X, A24plus);                     // A24plus = X4^2
+    sike_fp2add(A24plus, A24plus, A24plus);              // A24plus = 2*X4^2
+    sike_fp2sqr_mont(A24plus, A24plus);                  // A24plus = 4*X4^4
+}
+
+void eval_4_isog(point_proj_t P, f2elm_t* coeff)
+{ // Evaluates the isogeny at the point (X:Z) in the domain of the isogeny, given a 4-isogeny phi defined
+  // by the 3 coefficients in coeff (computed in the function get_4_isog()).
+  // Inputs: the coefficients defining the isogeny, and the projective point P = (X:Z).
+  // Output: the projective point P = phi(P) = (X:Z) in the codomain.
+    f2elm_t t0, t1;
+
+    sike_fp2add(P->X, P->Z, t0);                         // t0 = X+Z
+    sike_fp2sub(P->X, P->Z, t1);                         // t1 = X-Z
+    sike_fp2mul_mont(t0, coeff[1], P->X);                // X = (X+Z)*coeff[1]
+    sike_fp2mul_mont(t1, coeff[2], P->Z);                // Z = (X-Z)*coeff[2]
+    sike_fp2mul_mont(t0, t1, t0);                        // t0 = (X+Z)*(X-Z)
+    sike_fp2mul_mont(t0, coeff[0], t0);                  // t0 = coeff[0]*(X+Z)*(X-Z)
+    sike_fp2add(P->X, P->Z, t1);                         // t1 = (X-Z)*coeff[2] + (X+Z)*coeff[1]
+    sike_fp2sub(P->X, P->Z, P->Z);                       // Z = (X-Z)*coeff[2] - (X+Z)*coeff[1]
+    sike_fp2sqr_mont(t1, t1);                            // t1 = [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2
+    sike_fp2sqr_mont(P->Z, P->Z);                        // Z = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2
+    sike_fp2add(t1, t0, P->X);                           // X = coeff[0]*(X+Z)*(X-Z) + [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2
+    sike_fp2sub(P->Z, t0, t0);                           // t0 = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 - coeff[0]*(X+Z)*(X-Z)
+    sike_fp2mul_mont(P->X, t1, P->X);                    // Xfinal
+    sike_fp2mul_mont(P->Z, t0, P->Z);                    // Zfinal
+}
+
+
+void xTPL(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus)
+{ // Tripling of a Montgomery point in projective coordinates (X:Z).
+  // Input: projective Montgomery x-coordinates P = (X:Z), where x=X/Z and Montgomery curve constants A24plus = A+2C and A24minus = A-2C.
+  // Output: projective Montgomery x-coordinates Q = 3*P = (X3:Z3).
+    f2elm_t t0, t1, t2, t3, t4, t5, t6;
+
+    sike_fp2sub(P->X, P->Z, t0);                         // t0 = X-Z
+    sike_fp2sqr_mont(t0, t2);                            // t2 = (X-Z)^2
+    sike_fp2add(P->X, P->Z, t1);                         // t1 = X+Z
+    sike_fp2sqr_mont(t1, t3);                            // t3 = (X+Z)^2
+    sike_fp2add(t0, t1, t4);                             // t4 = 2*X
+    sike_fp2sub(t1, t0, t0);                             // t0 = 2*Z
+    sike_fp2sqr_mont(t4, t1);                            // t1 = 4*X^2
+    sike_fp2sub(t1, t3, t1);                             // t1 = 4*X^2 - (X+Z)^2
+    sike_fp2sub(t1, t2, t1);                             // t1 = 4*X^2 - (X+Z)^2 - (X-Z)^2
+    sike_fp2mul_mont(t3, A24plus, t5);                   // t5 = A24plus*(X+Z)^2
+    sike_fp2mul_mont(t3, t5, t3);                        // t3 = A24plus*(X+Z)^3
+    sike_fp2mul_mont(A24minus, t2, t6);                  // t6 = A24minus*(X-Z)^2
+    sike_fp2mul_mont(t2, t6, t2);                        // t2 = A24minus*(X-Z)^3
+    sike_fp2sub(t2, t3, t3);                             // t3 = A24minus*(X-Z)^3 - coeff*(X+Z)^3
+    sike_fp2sub(t5, t6, t2);                             // t2 = A24plus*(X+Z)^2 - A24minus*(X-Z)^2
+    sike_fp2mul_mont(t1, t2, t1);                        // t1 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2]
+    sike_fp2add(t3, t1, t2);                             // t2 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + A24minus*(X-Z)^3 - coeff*(X+Z)^3
+    sike_fp2sqr_mont(t2, t2);                            // t2 = t2^2
+    sike_fp2mul_mont(t4, t2, Q->X);                      // X3 = 2*X*t2
+    sike_fp2sub(t3, t1, t1);                             // t1 = A24minus*(X-Z)^3 - A24plus*(X+Z)^3 - [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2]
+    sike_fp2sqr_mont(t1, t1);                            // t1 = t1^2
+    sike_fp2mul_mont(t0, t1, Q->Z);                      // Z3 = 2*Z*t1
+}
+
+void xTPLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus, size_t e)
+{ // Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings.
+  // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A24plus = A+2C and A24minus = A-2C.
+  // Output: projective Montgomery x-coordinates Q <- (3^e)*P.
+    memmove(Q, P, sizeof(*P));
+    for (size_t i = 0; i < e; i++) {
+        xTPL(Q, Q, A24minus, A24plus);
+    }
+}
+
+void get_3_isog(const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus, f2elm_t* coeff)
+{ // Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3.
+  // Input:  projective point of order three P = (X3:Z3).
+  // Output: the 3-isogenous Montgomery curve with projective coefficient A/C.
+    f2elm_t t0, t1, t2, t3, t4;
+
+    sike_fp2sub(P->X, P->Z, coeff[0]);                   // coeff0 = X-Z
+    sike_fp2sqr_mont(coeff[0], t0);                      // t0 = (X-Z)^2
+    sike_fp2add(P->X, P->Z, coeff[1]);                   // coeff1 = X+Z
+    sike_fp2sqr_mont(coeff[1], t1);                      // t1 = (X+Z)^2
+    sike_fp2add(t0, t1, t2);                             // t2 = (X+Z)^2 + (X-Z)^2
+    sike_fp2add(coeff[0], coeff[1], t3);                 // t3 = 2*X
+    sike_fp2sqr_mont(t3, t3);                            // t3 = 4*X^2
+    sike_fp2sub(t3, t2, t3);                             // t3 = 4*X^2 - (X+Z)^2 - (X-Z)^2
+    sike_fp2add(t1, t3, t2);                             // t2 = 4*X^2 - (X-Z)^2
+    sike_fp2add(t3, t0, t3);                             // t3 = 4*X^2 - (X+Z)^2
+    sike_fp2add(t0, t3, t4);                             // t4 = 4*X^2 - (X+Z)^2 + (X-Z)^2
+    sike_fp2add(t4, t4, t4);                             // t4 = 2(4*X^2 - (X+Z)^2 + (X-Z)^2)
+    sike_fp2add(t1, t4, t4);                             // t4 = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2
+    sike_fp2mul_mont(t2, t4, A24minus);                  // A24minus = [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2]
+    sike_fp2add(t1, t2, t4);                             // t4 = 4*X^2 + (X+Z)^2 - (X-Z)^2
+    sike_fp2add(t4, t4, t4);                             // t4 = 2(4*X^2 + (X+Z)^2 - (X-Z)^2)
+    sike_fp2add(t0, t4, t4);                             // t4 = 8*X^2 + 2*(X+Z)^2 - (X-Z)^2
+    sike_fp2mul_mont(t3, t4, t4);                        // t4 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2]
+    sike_fp2sub(t4, A24minus, t0);                       // t0 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2] - [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2]
+    sike_fp2add(A24minus, t0, A24plus);                  // A24plus = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2
+}
+
+
+void eval_3_isog(point_proj_t Q, f2elm_t* coeff)
+{ // Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and
+  // a point P with 2 coefficients in coeff (computed in the function get_3_isog()).
+  // Inputs: projective points P = (X3:Z3) and Q = (X:Z).
+  // Output: the projective point Q <- phi(Q) = (X3:Z3).
+    f2elm_t t0, t1, t2;
+
+    sike_fp2add(Q->X, Q->Z, t0);                       // t0 = X+Z
+    sike_fp2sub(Q->X, Q->Z, t1);                       // t1 = X-Z
+    sike_fp2mul_mont(t0, coeff[0], t0);                // t0 = coeff0*(X+Z)
+    sike_fp2mul_mont(t1, coeff[1], t1);                // t1 = coeff1*(X-Z)
+    sike_fp2add(t0, t1, t2);                           // t2 = coeff0*(X+Z) + coeff1*(X-Z)
+    sike_fp2sub(t1, t0, t0);                           // t0 = coeff1*(X-Z) - coeff0*(X+Z)
+    sike_fp2sqr_mont(t2, t2);                          // t2 = [coeff0*(X+Z) + coeff1*(X-Z)]^2
+    sike_fp2sqr_mont(t0, t0);                          // t0 = [coeff1*(X-Z) - coeff0*(X+Z)]^2
+    sike_fp2mul_mont(Q->X, t2, Q->X);                  // X3final = X*[coeff0*(X+Z) + coeff1*(X-Z)]^2
+    sike_fp2mul_mont(Q->Z, t0, Q->Z);                  // Z3final = Z*[coeff1*(X-Z) - coeff0*(X+Z)]^2
+}
+
+
+void inv_3_way(f2elm_t z1, f2elm_t z2, f2elm_t z3)
+{ // 3-way simultaneous inversion
+  // Input:  z1,z2,z3
+  // Output: 1/z1,1/z2,1/z3 (override inputs).
+    f2elm_t t0, t1, t2, t3;
+
+    sike_fp2mul_mont(z1, z2, t0);                      // t0 = z1*z2
+    sike_fp2mul_mont(z3, t0, t1);                      // t1 = z1*z2*z3
+    sike_fp2inv_mont(t1);                              // t1 = 1/(z1*z2*z3)
+    sike_fp2mul_mont(z3, t1, t2);                      // t2 = 1/(z1*z2)
+    sike_fp2mul_mont(t2, z2, t3);                      // t3 = 1/z1
+    sike_fp2mul_mont(t2, z1, z2);                      // z2 = 1/z2
+    sike_fp2mul_mont(t0, t1, z3);                      // z3 = 1/z3
+    sike_fp2copy(t3, z1);                              // z1 = 1/z1
+}
+
+
+void get_A(const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A)
+{ // Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A.
+  // Input:  the x-coordinates xP, xQ, and xR of the points P, Q and R.
+  // Output: the coefficient A corresponding to the curve E_A: y^2=x^3+A*x^2+x.
+    f2elm_t t0, t1, one = F2ELM_INIT;
+
+    extern const struct params_t params;
+    sike_fpcopy(params.mont_one, one->c0);
+    sike_fp2add(xP, xQ, t1);                           // t1 = xP+xQ
+    sike_fp2mul_mont(xP, xQ, t0);                      // t0 = xP*xQ
+    sike_fp2mul_mont(xR, t1, A);                       // A = xR*t1
+    sike_fp2add(t0, A, A);                             // A = A+t0
+    sike_fp2mul_mont(t0, xR, t0);                      // t0 = t0*xR
+    sike_fp2sub(A, one, A);                            // A = A-1
+    sike_fp2add(t0, t0, t0);                           // t0 = t0+t0
+    sike_fp2add(t1, xR, t1);                           // t1 = t1+xR
+    sike_fp2add(t0, t0, t0);                           // t0 = t0+t0
+    sike_fp2sqr_mont(A, A);                            // A = A^2
+    sike_fp2inv_mont(t0);                              // t0 = 1/t0
+    sike_fp2mul_mont(A, t0, A);                        // A = A*t0
+    sike_fp2sub(A, t1, A);                             // Afinal = A-t1
+}
+
+
+void j_inv(const f2elm_t A, const f2elm_t C, f2elm_t jinv)
+{ // Computes the j-invariant of a Montgomery curve with projective constant.
+  // Input: A,C in GF(p^2).
+  // Output: j=256*(A^2-3*C^2)^3/(C^4*(A^2-4*C^2)), which is the j-invariant of the Montgomery curve B*y^2=x^3+(A/C)*x^2+x or (equivalently) j-invariant of B'*y^2=C*x^3+A*x^2+C*x.
+    f2elm_t t0, t1;
+
+    sike_fp2sqr_mont(A, jinv);                           // jinv = A^2
+    sike_fp2sqr_mont(C, t1);                             // t1 = C^2
+    sike_fp2add(t1, t1, t0);                             // t0 = t1+t1
+    sike_fp2sub(jinv, t0, t0);                           // t0 = jinv-t0
+    sike_fp2sub(t0, t1, t0);                             // t0 = t0-t1
+    sike_fp2sub(t0, t1, jinv);                           // jinv = t0-t1
+    sike_fp2sqr_mont(t1, t1);                            // t1 = t1^2
+    sike_fp2mul_mont(jinv, t1, jinv);                    // jinv = jinv*t1
+    sike_fp2add(t0, t0, t0);                             // t0 = t0+t0
+    sike_fp2add(t0, t0, t0);                             // t0 = t0+t0
+    sike_fp2sqr_mont(t0, t1);                            // t1 = t0^2
+    sike_fp2mul_mont(t0, t1, t0);                        // t0 = t0*t1
+    sike_fp2add(t0, t0, t0);                             // t0 = t0+t0
+    sike_fp2add(t0, t0, t0);                             // t0 = t0+t0
+    sike_fp2inv_mont(jinv);                              // jinv = 1/jinv
+    sike_fp2mul_mont(jinv, t0, jinv);                    // jinv = t0*jinv
+}
+
+
+void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t xPQ, const f2elm_t A24)
+{ // Simultaneous doubling and differential addition.
+  // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4.
+  // Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP.
+    f2elm_t t0, t1, t2;
+
+    sike_fp2add(P->X, P->Z, t0);                         // t0 = XP+ZP
+    sike_fp2sub(P->X, P->Z, t1);                         // t1 = XP-ZP
+    sike_fp2sqr_mont(t0, P->X);                          // XP = (XP+ZP)^2
+    sike_fp2sub(Q->X, Q->Z, t2);                         // t2 = XQ-ZQ
+    sike_fp2correction(t2);
+    sike_fp2add(Q->X, Q->Z, Q->X);                       // XQ = XQ+ZQ
+    sike_fp2mul_mont(t0, t2, t0);                        // t0 = (XP+ZP)*(XQ-ZQ)
+    sike_fp2sqr_mont(t1, P->Z);                          // ZP = (XP-ZP)^2
+    sike_fp2mul_mont(t1, Q->X, t1);                      // t1 = (XP-ZP)*(XQ+ZQ)
+    sike_fp2sub(P->X, P->Z, t2);                         // t2 = (XP+ZP)^2-(XP-ZP)^2
+    sike_fp2mul_mont(P->X, P->Z, P->X);                  // XP = (XP+ZP)^2*(XP-ZP)^2
+    sike_fp2mul_mont(t2, A24, Q->X);                     // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2]
+    sike_fp2sub(t0, t1, Q->Z);                           // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)
+    sike_fp2add(Q->X, P->Z, P->Z);                       // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2
+    sike_fp2add(t0, t1, Q->X);                           // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)
+    sike_fp2mul_mont(P->Z, t2, P->Z);                    // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2]
+    sike_fp2sqr_mont(Q->Z, Q->Z);                        // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2
+    sike_fp2sqr_mont(Q->X, Q->X);                        // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2
+    sike_fp2mul_mont(Q->Z, xPQ, Q->Z);                   // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2
+}
diff --git a/src/kem/sike/sike-p434-sha256/isogeny.h b/src/kem/sike/sike-p434-sha256/isogeny.h
new file mode 100644
index 00000000..460c8c66
--- /dev/null
+++ b/src/kem/sike/sike-p434-sha256/isogeny.h
@@ -0,0 +1,49 @@
+#ifndef ISOGENY_H_
+#define ISOGENY_H_
+
+// Computes [2^e](X:Z) on Montgomery curve with projective
+// constant via e repeated doublings.
+void xDBLe(
+    const point_proj_t P, point_proj_t Q, const f2elm_t A24plus,
+    const f2elm_t C24, size_t e);
+// Simultaneous doubling and differential addition.
+void xDBLADD(
+    point_proj_t P, point_proj_t Q, const f2elm_t xPQ,
+    const f2elm_t A24);
+// Tripling of a Montgomery point in projective coordinates (X:Z).
+void xTPL(
+    const point_proj_t P, point_proj_t Q, const f2elm_t A24minus,
+    const f2elm_t A24plus);
+// Computes [3^e](X:Z) on Montgomery curve with projective constant
+// via e repeated triplings.
+void xTPLe(
+    const point_proj_t P, point_proj_t Q, const f2elm_t A24minus,
+    const f2elm_t A24plus, size_t e);
+// Given the x-coordinates of P, Q, and R, returns the value A
+// corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A.
+void get_A(
+    const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A);
+// Computes the j-invariant of a Montgomery curve with projective constant.
+void j_inv(
+    const f2elm_t A, const f2elm_t C, f2elm_t jinv);
+// Computes the corresponding 4-isogeny of a projective Montgomery
+// point (X4:Z4) of order 4.
+void get_4_isog(
+    const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t* coeff);
+// Computes the corresponding 3-isogeny of a projective Montgomery
+// point (X3:Z3) of order 3.
+void get_3_isog(
+    const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus,
+    f2elm_t* coeff);
+// Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3)
+// of order 3 on a Montgomery curve and a point P with coefficients given in coeff.
+void eval_3_isog(
+    point_proj_t Q, f2elm_t* coeff);
+// Evaluates the isogeny at the point (X:Z) in the domain of the isogeny.
+void eval_4_isog(
+    point_proj_t P, f2elm_t* coeff);
+// 3-way simultaneous inversion
+void inv_3_way(
+    f2elm_t z1, f2elm_t z2, f2elm_t z3);
+
+#endif // ISOGENY_H_
diff --git a/src/kem/sike/sike-p434-sha256/params.c b/src/kem/sike/sike-p434-sha256/params.c
new file mode 100644
index 00000000..b13f4c87
--- /dev/null
+++ b/src/kem/sike/sike-p434-sha256/params.c
@@ -0,0 +1,128 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library
+*
+* Abstract: supersingular isogeny parameters and generation of functions for P434
+*********************************************************************************************/
+
+#include "utils.h"
+
+// Parameters for isogeny system "SIKE"
+const struct params_t params = {
+    .prime = {
+        U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFFFFFFFFFFFFFFFF),
+        U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFDC1767AE2FFFFFF),
+        U64_TO_WORDS(0x7BC65C783158AEA3), U64_TO_WORDS(0x6CFC5FD681C52056),
+        U64_TO_WORDS(0x0002341F27177344)
+    },
+    .prime_p1 = {
+        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
+        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0xFDC1767AE3000000),
+        U64_TO_WORDS(0x7BC65C783158AEA3), U64_TO_WORDS(0x6CFC5FD681C52056),
+        U64_TO_WORDS(0x0002341F27177344)
+    },
+    .prime_x2 = {
+        U64_TO_WORDS(0xFFFFFFFFFFFFFFFE), U64_TO_WORDS(0xFFFFFFFFFFFFFFFF),
+        U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFB82ECF5C5FFFFFF),
+        U64_TO_WORDS(0xF78CB8F062B15D47), U64_TO_WORDS(0xD9F8BFAD038A40AC),
+        U64_TO_WORDS(0x0004683E4E2EE688)
+    },
+    .A_gen = {
+        U64_TO_WORDS(0x05ADF455C5C345BF), U64_TO_WORDS(0x91935C5CC767AC2B),
+        U64_TO_WORDS(0xAFE4E879951F0257), U64_TO_WORDS(0x70E792DC89FA27B1),
+        U64_TO_WORDS(0xF797F526BB48C8CD), U64_TO_WORDS(0x2181DB6131AF621F),
+        U64_TO_WORDS(0x00000A1C08B1ECC4), // XPA0
+        U64_TO_WORDS(0x74840EB87CDA7788), U64_TO_WORDS(0x2971AA0ECF9F9D0B),
+        U64_TO_WORDS(0xCB5732BDF41715D5), U64_TO_WORDS(0x8CD8E51F7AACFFAA),
+        U64_TO_WORDS(0xA7F424730D7E419F), U64_TO_WORDS(0xD671EB919A179E8C),
+        U64_TO_WORDS(0x0000FFA26C5A924A), // XPA1
+        U64_TO_WORDS(0xFEC6E64588B7273B), U64_TO_WORDS(0xD2A626D74CBBF1C6),
+        U64_TO_WORDS(0xF8F58F07A78098C7), U64_TO_WORDS(0xE23941F470841B03),
+        U64_TO_WORDS(0x1B63EDA2045538DD), U64_TO_WORDS(0x735CFEB0FFD49215),
+        U64_TO_WORDS(0x0001C4CB77542876), // XQA0
+        U64_TO_WORDS(0xADB0F733C17FFDD6), U64_TO_WORDS(0x6AFFBD037DA0A050),
+        U64_TO_WORDS(0x680EC43DB144E02F), U64_TO_WORDS(0x1E2E5D5FF524E374),
+        U64_TO_WORDS(0xE2DDA115260E2995), U64_TO_WORDS(0xA6E4B552E2EDE508),
+        U64_TO_WORDS(0x00018ECCDDF4B53E), // XQA1
+        U64_TO_WORDS(0x01BA4DB518CD6C7D), U64_TO_WORDS(0x2CB0251FE3CC0611),
+        U64_TO_WORDS(0x259B0C6949A9121B), U64_TO_WORDS(0x60E17AC16D2F82AD),
+        U64_TO_WORDS(0x3AA41F1CE175D92D), U64_TO_WORDS(0x413FBE6A9B9BC4F3),
+        U64_TO_WORDS(0x00022A81D8D55643), // XRA0
+        U64_TO_WORDS(0xB8ADBC70FC82E54A), U64_TO_WORDS(0xEF9CDDB0D5FADDED),
+        U64_TO_WORDS(0x5820C734C80096A0), U64_TO_WORDS(0x7799994BAA96E0E4),
+        U64_TO_WORDS(0x044961599E379AF8), U64_TO_WORDS(0xDB2B94FBF09F27E2),
+        U64_TO_WORDS(0x0000B87FC716C0C6)  // XRA1
+    },
+    .B_gen = {
+        U64_TO_WORDS(0x6E5497556EDD48A3), U64_TO_WORDS(0x2A61B501546F1C05),
+        U64_TO_WORDS(0xEB919446D049887D), U64_TO_WORDS(0x5864A4A69D450C4F),
+        U64_TO_WORDS(0xB883F276A6490D2B), U64_TO_WORDS(0x22CC287022D5F5B9),
+        U64_TO_WORDS(0x0001BED4772E551F), // XPB0
+        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
+        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
+        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
+        U64_TO_WORDS(0x0000000000000000), // XPB1
+        U64_TO_WORDS(0xFAE2A3F93D8B6B8E), U64_TO_WORDS(0x494871F51700FE1C),
+        U64_TO_WORDS(0xEF1A94228413C27C), U64_TO_WORDS(0x498FF4A4AF60BD62),
+        U64_TO_WORDS(0xB00AD2A708267E8A), U64_TO_WORDS(0xF4328294E017837F),
+        U64_TO_WORDS(0x000034080181D8AE), // XQB0
+        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
+        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
+        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
+        U64_TO_WORDS(0x0000000000000000), // XQB1
+        U64_TO_WORDS(0x283B34FAFEFDC8E4), U64_TO_WORDS(0x9208F44977C3E647),
+        U64_TO_WORDS(0x7DEAE962816F4E9A), U64_TO_WORDS(0x68A2BA8AA262EC9D),
+        U64_TO_WORDS(0x8176F112EA43F45B), U64_TO_WORDS(0x02106D022634F504),
+        U64_TO_WORDS(0x00007E8A50F02E37), // XRB0
+        U64_TO_WORDS(0xB378B7C1DA22CCB1), U64_TO_WORDS(0x6D089C99AD1D9230),
+        U64_TO_WORDS(0xEBE15711813E2369), U64_TO_WORDS(0x2B35A68239D48A53),
+        U64_TO_WORDS(0x445F6FD138407C93), U64_TO_WORDS(0xBEF93B29A3F6B54B),
+        U64_TO_WORDS(0x000173FA910377D3)  // XRB1
+    },
+    .mont_R2 = {
+        U64_TO_WORDS(0x28E55B65DCD69B30), U64_TO_WORDS(0xACEC7367768798C2),
+        U64_TO_WORDS(0xAB27973F8311688D), U64_TO_WORDS(0x175CC6AF8D6C7C0B),
+        U64_TO_WORDS(0xABCD92BF2DDE347E), U64_TO_WORDS(0x69E16A61C7686D9A),
+        U64_TO_WORDS(0x000025A89BCDD12A)
+    },
+    .mont_one = {
+        U64_TO_WORDS(0x000000000000742C), U64_TO_WORDS(0x0000000000000000),
+        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0xB90FF404FC000000),
+        U64_TO_WORDS(0xD801A4FB559FACD4), U64_TO_WORDS(0xE93254545F77410C),
+        U64_TO_WORDS(0x0000ECEEA7BD2EDA)
+    },
+    .mont_six = {
+        U64_TO_WORDS(0x000000000002B90A), U64_TO_WORDS(0x0000000000000000),
+        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x5ADCCB2822000000),
+        U64_TO_WORDS(0x187D24F39F0CAFB4), U64_TO_WORDS(0x9D353A4D394145A0),
+        U64_TO_WORDS(0x00012559A0403298)
+    },
+    .A_strat = {
+        0x30, 0x1C, 0x10, 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01,
+        0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x08, 0x04,
+        0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01,
+        0x02, 0x01, 0x01, 0x0D, 0x07, 0x04, 0x02, 0x01, 0x01, 0x02,
+        0x01, 0x01, 0x03, 0x02, 0x01, 0x01, 0x01, 0x01, 0x05, 0x04,
+        0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01,
+        0x15, 0x0C, 0x07, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01,
+        0x03, 0x02, 0x01, 0x01, 0x01, 0x01, 0x05, 0x03, 0x02, 0x01,
+        0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x09, 0x05, 0x03,
+        0x02, 0x01, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x04,
+        0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01
+    },
+    .B_strat = {
+        0x42, 0x21, 0x11, 0x09, 0x05, 0x03, 0x02, 0x01, 0x01, 0x01,
+        0x01, 0x02, 0x01, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x01,
+        0x02, 0x01, 0x01, 0x08, 0x04, 0x02, 0x01, 0x01, 0x01, 0x02,
+        0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x10,
+        0x08, 0x04, 0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04,
+        0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x08, 0x04, 0x02, 0x01,
+        0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01,
+        0x01, 0x20, 0x10, 0x08, 0x04, 0x03, 0x01, 0x01, 0x01, 0x01,
+        0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01,
+        0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, 0x02,
+        0x01, 0x01, 0x02, 0x01, 0x01, 0x10, 0x08, 0x04, 0x02, 0x01,
+        0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01,
+        0x01, 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04,
+        0x02, 0x01, 0x01, 0x02, 0x01, 0x01
+    }
+};
diff --git a/src/kem/sike/sike-p434-sha256/sike.c b/src/kem/sike/sike-p434-sha256/sike.c
new file mode 100644
index 00000000..f00ebe76
--- /dev/null
+++ b/src/kem/sike/sike-p434-sha256/sike.c
@@ -0,0 +1,517 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library
+*
+* Abstract: supersingular isogeny key encapsulation (SIKE) protocol
+*********************************************************************************************/
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <sha2/sha256.h>
+#include <random/randombytes.h>
+
+#include "utils.h"
+#include "isogeny.h"
+#include "fpx.h"
+
+extern const struct params_t params;
+
+// SIDH_JINV_BYTESZ is a number of bytes used for encoding j-invariant.
+#define SIDH_JINV_BYTESZ    110U
+// SIDH_PRV_A_BITSZ is a number of bits of SIDH private key (2-isogeny)
+#define SIDH_PRV_A_BITSZ    216U
+// SIDH_PRV_A_BITSZ is a number of bits of SIDH private key (3-isogeny)
+#define SIDH_PRV_B_BITSZ    217U
+// MAX_INT_POINTS_ALICE is a number of points used in 2-isogeny tree computation
+#define MAX_INT_POINTS_ALICE    7U
+// MAX_INT_POINTS_ALICE is a number of points used in 3-isogeny tree computation
+#define MAX_INT_POINTS_BOB      8U
+
+// Swap points.
+// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P
+#if !defined(ARCH_X86_64) || defined(ARCH_GENERIC)
+static void sike_cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option)
+{
+    crypto_word_t temp;
+    for (size_t i = 0; i < NWORDS_FIELD; i++) {
+        temp = option & (P->X->c0[i] ^ Q->X->c0[i]);
+        P->X->c0[i] = temp ^ P->X->c0[i];
+        Q->X->c0[i] = temp ^ Q->X->c0[i];
+        temp = option & (P->Z->c0[i] ^ Q->Z->c0[i]);
+        P->Z->c0[i] = temp ^ P->Z->c0[i];
+        Q->Z->c0[i] = temp ^ Q->Z->c0[i];
+        temp = option & (P->X->c1[i] ^ Q->X->c1[i]);
+        P->X->c1[i] = temp ^ P->X->c1[i];
+        Q->X->c1[i] = temp ^ Q->X->c1[i];
+        temp = option & (P->Z->c1[i] ^ Q->Z->c1[i]);
+        P->Z->c1[i] = temp ^ P->Z->c1[i];
+        Q->Z->c1[i] = temp ^ Q->Z->c1[i];
+    }
+}
+#endif
+
+// Swap points.
+// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P
+static inline void sike_fp2cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option)
+{
+#if defined(ARCH_X86_64) && !defined(ARCH_GENERIC)
+    sike_cswap_asm(P, Q, option);
+#else
+    sike_cswap(P, Q, option);
+#endif
+}
+
+static void ladder3Pt(
+    const f2elm_t xP, const f2elm_t xQ, const f2elm_t xPQ, const uint8_t* m,
+    int is_A, point_proj_t R, const f2elm_t A) {
+    point_proj_t R0 = POINT_PROJ_INIT, R2 = POINT_PROJ_INIT;
+    f2elm_t A24 = F2ELM_INIT;
+    crypto_word_t mask;
+    int bit, swap, prevbit = 0;
+
+    const size_t nbits = is_A?SIDH_PRV_A_BITSZ:SIDH_PRV_B_BITSZ;
+
+    // Initializing constant
+    sike_fpcopy(params.mont_one, A24[0].c0);
+    sike_fp2add(A24, A24, A24);
+    sike_fp2add(A, A24, A24);
+    sike_fp2div2(A24, A24);
+    sike_fp2div2(A24, A24); // A24 = (A+2)/4
+
+    // Initializing points
+    sike_fp2copy(xQ, R0->X);
+    sike_fpcopy(params.mont_one, R0->Z[0].c0);
+    sike_fp2copy(xPQ, R2->X);
+    sike_fpcopy(params.mont_one, R2->Z[0].c0);
+    sike_fp2copy(xP, R->X);
+    sike_fpcopy(params.mont_one, R->Z[0].c0);
+    memset(R->Z->c1, 0, sizeof(R->Z->c1));
+
+    // Main loop
+    for (size_t i = 0; i < nbits; i++) {
+        bit = (m[i >> 3] >> (i & 7)) & 1;
+        swap = bit ^ prevbit;
+        prevbit = bit;
+        mask = 0 - (crypto_word_t)swap;
+
+        sike_fp2cswap(R, R2, mask);
+        xDBLADD(R0, R2, R->X, A24);
+        sike_fp2mul_mont(R2->X, R->Z, R2->X);
+    }
+    swap = 0 ^ prevbit;
+    mask = 0 - (crypto_word_t)swap;
+    sike_fp2cswap(R, R2, mask);
+}
+
+// Initialization of basis points
+static inline void sike_init_basis(const crypto_word_t *gen, f2elm_t XP, f2elm_t XQ, f2elm_t XR) {
+    sike_fpcopy(gen,                  XP->c0);
+    sike_fpcopy(gen +   NWORDS_FIELD, XP->c1);
+    sike_fpcopy(gen + 2*NWORDS_FIELD, XQ->c0);
+    sike_fpcopy(gen + 3*NWORDS_FIELD, XQ->c1);
+    sike_fpcopy(gen + 4*NWORDS_FIELD, XR->c0);
+    sike_fpcopy(gen + 5*NWORDS_FIELD, XR->c1);
+}
+
+// Conversion of GF(p^2) element from Montgomery to standard representation.
+static inline void sike_fp2_encode(const f2elm_t x, uint8_t *enc) {
+    f2elm_t t;
+    sike_from_fp2mont(x, t);
+
+    // convert to bytes in little endian form
+    for (size_t i=0; i<FIELD_BYTESZ; i++) {
+        enc[i+           0] = (t[0].c0[i/LSZ] >> (8*(i%LSZ))) & 0xFF;
+        enc[i+FIELD_BYTESZ] = (t[0].c1[i/LSZ] >> (8*(i%LSZ))) & 0xFF;
+    }
+}
+
+// Parse byte sequence back into GF(p^2) element, and conversion to Montgomery representation.
+// Elements over GF(p503) are encoded in 63 octets in little endian format
+// (i.e., the least significant octet is located in the lowest memory address).
+static inline void fp2_decode(const uint8_t *enc, f2elm_t t) {
+    memset(t[0].c0, 0, sizeof(t[0].c0));
+    memset(t[0].c1, 0, sizeof(t[0].c1));
+    // convert bytes in little endian form to f2elm_t
+    for (size_t i = 0; i < FIELD_BYTESZ; i++) {
+        t[0].c0[i/LSZ] |= ((crypto_word_t)enc[i+           0]) << (8*(i%LSZ));
+        t[0].c1[i/LSZ] |= ((crypto_word_t)enc[i+FIELD_BYTESZ]) << (8*(i%LSZ));
+    }
+    sike_to_fp2mont(t, t);
+}
+
+// Alice's ephemeral public key generation
+// Input:  a private key prA in the range [0, 2^250 - 1], stored in 32 bytes.
+// Output: the public key pkA consisting of 3 GF(p503^2) elements encoded in 378 bytes.
+static void gen_iso_A(const uint8_t* skA, uint8_t* pkA)
+{
+    point_proj_t R, pts[MAX_INT_POINTS_ALICE];
+    point_proj_t phiP = POINT_PROJ_INIT;
+    point_proj_t phiQ = POINT_PROJ_INIT;
+    point_proj_t phiR = POINT_PROJ_INIT;
+    f2elm_t XPA, XQA, XRA, coeff[3];
+    f2elm_t A24plus = F2ELM_INIT;
+    f2elm_t C24 = F2ELM_INIT;
+    f2elm_t A = F2ELM_INIT;
+    unsigned int m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0;
+
+    // Initialize basis points
+    sike_init_basis(params.A_gen, XPA, XQA, XRA);
+    sike_init_basis(params.B_gen, phiP->X, phiQ->X, phiR->X);
+    sike_fpcopy(params.mont_one, (phiP->Z)->c0);
+    sike_fpcopy(params.mont_one, (phiQ->Z)->c0);
+    sike_fpcopy(params.mont_one, (phiR->Z)->c0);
+
+    // Initialize constants: A24plus = A+2C, C24 = 4C, where A=6, C=1
+    sike_fpcopy(params.mont_one, A24plus->c0);
+    sike_fp2add(A24plus, A24plus, A24plus);
+    sike_fp2add(A24plus, A24plus, C24);
+    sike_fp2add(A24plus, C24, A);
+    sike_fp2add(C24, C24, A24plus);
+
+    // Retrieve kernel point
+    ladder3Pt(XPA, XQA, XRA, skA, 1, R, A);
+
+    // Traverse tree
+    index = 0;
+    for (size_t row = 1; row < A_max; row++) {
+        while (index < A_max-row) {
+            sike_fp2copy(R->X, pts[npts]->X);
+            sike_fp2copy(R->Z, pts[npts]->Z);
+            pts_index[npts++] = index;
+            m = params.A_strat[ii++];
+            xDBLe(R, R, A24plus, C24, (2*m));
+            index += m;
+        }
+        get_4_isog(R, A24plus, C24, coeff);
+
+        for (size_t i = 0; i < npts; i++) {
+            eval_4_isog(pts[i], coeff);
+        }
+        eval_4_isog(phiP, coeff);
+        eval_4_isog(phiQ, coeff);
+        eval_4_isog(phiR, coeff);
+
+        sike_fp2copy(pts[npts-1]->X, R->X);
+        sike_fp2copy(pts[npts-1]->Z, R->Z);
+        index = pts_index[npts-1];
+        npts -= 1;
+    }
+
+    get_4_isog(R, A24plus, C24, coeff);
+    eval_4_isog(phiP, coeff);
+    eval_4_isog(phiQ, coeff);
+    eval_4_isog(phiR, coeff);
+
+    inv_3_way(phiP->Z, phiQ->Z, phiR->Z);
+    sike_fp2mul_mont(phiP->X, phiP->Z, phiP->X);
+    sike_fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X);
+    sike_fp2mul_mont(phiR->X, phiR->Z, phiR->X);
+
+    // Format public key
+    sike_fp2_encode(phiP->X, pkA);
+    sike_fp2_encode(phiQ->X, pkA + SIDH_JINV_BYTESZ);
+    sike_fp2_encode(phiR->X, pkA + 2*SIDH_JINV_BYTESZ);
+}
+
+// Bob's ephemeral key-pair generation
+// It produces a private key skB and computes the public key pkB.
+// The private key is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes.
+// The public key consists of 3 GF(p503^2) elements encoded in 378 bytes.
+static void gen_iso_B(const uint8_t* skB, uint8_t* pkB)
+{
+    point_proj_t R, pts[MAX_INT_POINTS_BOB];
+    point_proj_t phiP = POINT_PROJ_INIT;
+    point_proj_t phiQ = POINT_PROJ_INIT;
+    point_proj_t phiR = POINT_PROJ_INIT;
+    f2elm_t XPB, XQB, XRB, coeff[3];
+    f2elm_t A24plus = F2ELM_INIT;
+    f2elm_t A24minus = F2ELM_INIT;
+    f2elm_t A = F2ELM_INIT;
+    unsigned int m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0;
+
+    // Initialize basis points
+    sike_init_basis(params.B_gen, XPB, XQB, XRB);
+    sike_init_basis(params.A_gen, phiP->X, phiQ->X, phiR->X);
+    sike_fpcopy(params.mont_one, (phiP->Z)->c0);
+    sike_fpcopy(params.mont_one, (phiQ->Z)->c0);
+    sike_fpcopy(params.mont_one, (phiR->Z)->c0);
+
+    // Initialize constants: A24minus = A-2C, A24plus = A+2C, where A=6, C=1
+    sike_fpcopy(params.mont_one, A24plus->c0);
+    sike_fp2add(A24plus, A24plus, A24plus);
+    sike_fp2add(A24plus, A24plus, A24minus);
+    sike_fp2add(A24plus, A24minus, A);
+    sike_fp2add(A24minus, A24minus, A24plus);
+
+    // Retrieve kernel point
+    ladder3Pt(XPB, XQB, XRB, skB, 0, R, A);
+
+    // Traverse tree
+    index = 0;
+    for (size_t row = 1; row < B_max; row++) {
+        while (index < B_max-row) {
+            sike_fp2copy(R->X, pts[npts]->X);
+            sike_fp2copy(R->Z, pts[npts]->Z);
+            pts_index[npts++] = index;
+            m = params.B_strat[ii++];
+            xTPLe(R, R, A24minus, A24plus, m);
+            index += m;
+        }
+        get_3_isog(R, A24minus, A24plus, coeff);
+
+        for (size_t i = 0; i < npts; i++) {
+            eval_3_isog(pts[i], coeff);
+        }
+        eval_3_isog(phiP, coeff);
+        eval_3_isog(phiQ, coeff);
+        eval_3_isog(phiR, coeff);
+
+        sike_fp2copy(pts[npts-1]->X, R->X);
+        sike_fp2copy(pts[npts-1]->Z, R->Z);
+        index = pts_index[npts-1];
+        npts -= 1;
+    }
+
+    get_3_isog(R, A24minus, A24plus, coeff);
+    eval_3_isog(phiP, coeff);
+    eval_3_isog(phiQ, coeff);
+    eval_3_isog(phiR, coeff);
+
+    inv_3_way(phiP->Z, phiQ->Z, phiR->Z);
+    sike_fp2mul_mont(phiP->X, phiP->Z, phiP->X);
+    sike_fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X);
+    sike_fp2mul_mont(phiR->X, phiR->Z, phiR->X);
+
+    // Format public key
+    sike_fp2_encode(phiP->X, pkB);
+    sike_fp2_encode(phiQ->X, pkB + SIDH_JINV_BYTESZ);
+    sike_fp2_encode(phiR->X, pkB + 2*SIDH_JINV_BYTESZ);
+}
+
+// Alice's ephemeral shared secret computation
+// It produces a shared secret key ssA using her secret key skA and Bob's public key pkB
+// Inputs: Alice's skA is an integer in the range [0, 2^250 - 1], stored in 32 bytes.
+//         Bob's pkB consists of 3 GF(p503^2) elements encoded in 378 bytes.
+// Output: a shared secret ssA that consists of one element in GF(p503^2) encoded in 126 bytes.
+static void ex_iso_A(const uint8_t* skA, const uint8_t* pkB, uint8_t* ssA)
+{
+    point_proj_t R, pts[MAX_INT_POINTS_ALICE];
+    f2elm_t coeff[3], PKB[3], jinv;
+    f2elm_t A24plus = F2ELM_INIT;
+    f2elm_t C24 = F2ELM_INIT;
+    f2elm_t A = F2ELM_INIT;
+    unsigned int m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0;
+
+    // Initialize images of Bob's basis
+    fp2_decode(pkB, PKB[0]);
+    fp2_decode(pkB + SIDH_JINV_BYTESZ, PKB[1]);
+    fp2_decode(pkB + 2*SIDH_JINV_BYTESZ, PKB[2]);
+
+    // Initialize constants
+    get_A(PKB[0], PKB[1], PKB[2], A);
+    sike_fpadd(params.mont_one, params.mont_one, C24->c0);
+    sike_fp2add(A, C24, A24plus);
+    sike_fpadd(C24->c0, C24->c0, C24->c0);
+
+    // Retrieve kernel point
+    ladder3Pt(PKB[0], PKB[1], PKB[2], skA, 1, R, A);
+
+    // Traverse tree
+    index = 0;
+    for (size_t row = 1; row < A_max; row++) {
+        while (index < A_max-row) {
+            sike_fp2copy(R->X, pts[npts]->X);
+            sike_fp2copy(R->Z, pts[npts]->Z);
+            pts_index[npts++] = index;
+            m = params.A_strat[ii++];
+            xDBLe(R, R, A24plus, C24, (2*m));
+            index += m;
+        }
+        get_4_isog(R, A24plus, C24, coeff);
+
+        for (size_t i = 0; i < npts; i++) {
+            eval_4_isog(pts[i], coeff);
+        }
+
+        sike_fp2copy(pts[npts-1]->X, R->X);
+        sike_fp2copy(pts[npts-1]->Z, R->Z);
+        index = pts_index[npts-1];
+        npts -= 1;
+    }
+
+    get_4_isog(R, A24plus, C24, coeff);
+    sike_fp2add(A24plus, A24plus, A24plus);
+    sike_fp2sub(A24plus, C24, A24plus);
+    sike_fp2add(A24plus, A24plus, A24plus);
+    j_inv(A24plus, C24, jinv);
+    sike_fp2_encode(jinv, ssA);
+}
+
+// Bob's ephemeral shared secret computation
+// It produces a shared secret key ssB using his secret key skB and Alice's public key pkA
+// Inputs: Bob's skB is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes.
+//         Alice's pkA consists of 3 GF(p503^2) elements encoded in 378 bytes.
+// Output: a shared secret ssB that consists of one element in GF(p503^2) encoded in 126 bytes.
+static void ex_iso_B(const uint8_t* skB, const uint8_t* pkA, uint8_t* ssB)
+{
+    point_proj_t R, pts[MAX_INT_POINTS_BOB];
+    f2elm_t coeff[3], PKB[3], jinv;
+    f2elm_t A24plus = F2ELM_INIT;
+    f2elm_t A24minus = F2ELM_INIT;
+    f2elm_t A = F2ELM_INIT;
+    unsigned int m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0;
+
+    // Initialize images of Alice's basis
+    fp2_decode(pkA, PKB[0]);
+    fp2_decode(pkA + SIDH_JINV_BYTESZ, PKB[1]);
+    fp2_decode(pkA + 2*SIDH_JINV_BYTESZ, PKB[2]);
+
+    // Initialize constants
+    get_A(PKB[0], PKB[1], PKB[2], A);
+    sike_fpadd(params.mont_one, params.mont_one, A24minus->c0);
+    sike_fp2add(A, A24minus, A24plus);
+    sike_fp2sub(A, A24minus, A24minus);
+
+    // Retrieve kernel point
+    ladder3Pt(PKB[0], PKB[1], PKB[2], skB, 0, R, A);
+
+    // Traverse tree
+    index = 0;
+    for (size_t row = 1; row < B_max; row++) {
+        while (index < B_max-row) {
+            sike_fp2copy(R->X, pts[npts]->X);
+            sike_fp2copy(R->Z, pts[npts]->Z);
+            pts_index[npts++] = index;
+            m = params.B_strat[ii++];
+            xTPLe(R, R, A24minus, A24plus, m);
+            index += m;
+        }
+        get_3_isog(R, A24minus, A24plus, coeff);
+
+        for (size_t i = 0; i < npts; i++) {
+            eval_3_isog(pts[i], coeff);
+        }
+
+        sike_fp2copy(pts[npts-1]->X, R->X);
+        sike_fp2copy(pts[npts-1]->Z, R->Z);
+        index = pts_index[npts-1];
+        npts -= 1;
+    }
+
+    get_3_isog(R, A24minus, A24plus, coeff);
+    sike_fp2add(A24plus, A24minus, A);
+    sike_fp2add(A, A, A);
+    sike_fp2sub(A24plus, A24minus, A24plus);
+    j_inv(A, A24plus, jinv);
+    sike_fp2_encode(jinv, ssB);
+}
+
+int SIKE_keypair(uint8_t out_priv[SIKE_PRV_BYTESZ],
+                 uint8_t out_pub[SIKE_PUB_BYTESZ]) {
+  // Calculate private key for Alice. Needs to be in range [0, 2^0xFA - 1] and <
+  // 253 bits
+  randombytes(out_priv, SIKE_PRV_BYTESZ);
+  out_priv[31] = (out_priv[31] | 0x01) & 0x03;
+
+  gen_iso_B(out_priv, out_pub);
+  return 1;
+}
+
+void SIKE_encaps(uint8_t out_shared_key[SIKE_SS_BYTESZ],
+                 uint8_t out_ciphertext[SIKE_CT_BYTESZ],
+                 const uint8_t pub_key[SIKE_PUB_BYTESZ]) {
+  // Secret buffer is reused by the function to store some ephemeral
+  // secret data. It's size must be maximum of 64,
+  // SIKE_MSG_BYTESZ and SIDH_PRV_A_BITSZ in bytes.
+  uint8_t secret[32]; // OZAPTF, why?
+  uint8_t j[SIDH_JINV_BYTESZ];
+  uint8_t temp[SIKE_MSG_BYTESZ + SIKE_CT_BYTESZ];
+  SHA256_CTX ctx;
+
+  // Generate secret key for A
+  // secret key A = SHA256({0,1}^n || pub_key)) mod SIDH_PRV_A_BITSZ
+  randombytes(temp, SIKE_MSG_BYTESZ);
+
+  sha256_init(&ctx);
+  sha256_update(&ctx, temp, SIKE_MSG_BYTESZ);
+  sha256_update(&ctx, pub_key, SIKE_PUB_BYTESZ);
+  sha256_final(&ctx, secret);
+
+  // Generate public key for A - first part of the ciphertext
+  gen_iso_A(secret, out_ciphertext);
+
+  // Generate c1:
+  //  h = SHA256(j-invariant)
+  // c1 = h ^ m
+  ex_iso_A(secret, pub_key, j);
+  sha256_init(&ctx);
+  sha256_update(&ctx, j, sizeof(j));
+  sha256_final(&ctx, secret);
+
+  // c1 = h ^ m
+  uint8_t *c1 = &out_ciphertext[SIKE_PUB_BYTESZ];
+  for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) {
+    c1[i] = temp[i] ^ secret[i];
+  }
+
+  sha256_init(&ctx);
+  sha256_update(&ctx, temp, SIKE_MSG_BYTESZ);
+  sha256_update(&ctx, out_ciphertext, SIKE_CT_BYTESZ);
+  sha256_final(&ctx, secret);
+  // Generate shared secret out_shared_key = SHA256(m||out_ciphertext)
+  memcpy(out_shared_key, secret, SIKE_SS_BYTESZ);
+}
+
+void SIKE_decaps(uint8_t out_shared_key[SIKE_SS_BYTESZ],
+                 const uint8_t ciphertext[SIKE_CT_BYTESZ],
+                 const uint8_t pub_key[SIKE_PUB_BYTESZ],
+                 const uint8_t priv_key[SIKE_PRV_BYTESZ]) {
+  // Secret buffer is reused by the function to store some ephemeral
+  // secret data. It's size must be maximum of 64,
+  // SIKE_MSG_BYTESZ and SIDH_PRV_A_BITSZ in bytes.
+  uint8_t secret[32];
+  uint8_t j[SIDH_JINV_BYTESZ];
+  uint8_t c0[SIKE_PUB_BYTESZ];
+  uint8_t temp[SIKE_MSG_BYTESZ];
+  uint8_t shared_nok[SIKE_MSG_BYTESZ];
+  SHA256_CTX ctx;
+
+  // This is OK as we are only using ephemeral keys in BoringSSL
+  randombytes(shared_nok, SIKE_MSG_BYTESZ);
+
+  // Recover m
+  // Let ciphertext = c0 || c1 - both have fixed sizes
+  // m = F(j-invariant(c0, priv_key)) ^ c1
+  ex_iso_B(priv_key, ciphertext, j);
+
+  sha256_init(&ctx);
+  sha256_update(&ctx, j, sizeof(j));
+  sha256_final(&ctx, secret);
+
+  const uint8_t *c1 = &ciphertext[sizeof(c0)];
+  for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) {
+    temp[i] = c1[i] ^ secret[i];
+  }
+
+  sha256_init(&ctx);
+  sha256_update(&ctx, temp, SIKE_MSG_BYTESZ);
+  sha256_update(&ctx, pub_key, SIKE_PUB_BYTESZ);
+  sha256_final(&ctx, secret);
+
+  // Recover c0 = public key A
+  gen_iso_A(secret, c0);
+  crypto_word_t ok = ct_uint_eq(
+    ct_mem_eq(c0, ciphertext, SIKE_PUB_BYTESZ), 1);
+  for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) {
+    temp[i] = ct_select_8(ok, temp[i], shared_nok[i]);
+  }
+
+  sha256_init(&ctx);
+  sha256_update(&ctx, temp, SIKE_MSG_BYTESZ);
+  sha256_update(&ctx, ciphertext, SIKE_CT_BYTESZ);
+  sha256_final(&ctx, secret);
+
+  // Generate shared secret out_shared_key = SHA256(m||ciphertext)
+  memcpy(out_shared_key, secret, SIKE_SS_BYTESZ);
+}
diff --git a/src/kem/sike/sike-p434-sha256/utils.h b/src/kem/sike/sike-p434-sha256/utils.h
new file mode 100644
index 00000000..87623d33
--- /dev/null
+++ b/src/kem/sike/sike-p434-sha256/utils.h
@@ -0,0 +1,231 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library
+*
+* Abstract: internal header file for P434
+*********************************************************************************************/
+
+#ifndef UTILS_H_
+#define UTILS_H_
+
+#include <stddef.h>
+#include <sike/sike.h>
+
+// Conversion macro from number of bits to number of bytes
+#define BITS_TO_BYTES(nbits)      (((nbits)+7)/8)
+
+// Bit size of the field
+#define BITS_FIELD              434
+// Byte size of the field
+#define FIELD_BYTESZ            BITS_TO_BYTES(BITS_FIELD)
+// Number of 64-bit words of a 224-bit element
+#define NBITS_ORDER             224
+#define NWORDS64_ORDER          ((NBITS_ORDER+63)/64)
+// Number of elements in Alice's strategy
+#define A_max                   108
+// Number of elements in Bob's strategy
+#define B_max                   137
+// Word size size
+#define RADIX                   sizeof(crypto_word_t)*8
+// Byte size of a limb
+#define LSZ                     sizeof(crypto_word_t)
+
+#if defined(CPU_64_BIT)
+    typedef uint64_t crypto_word_t;
+    // Number of words of a 434-bit field element
+    #define NWORDS_FIELD    7
+    // Number of "0" digits in the least significant part of p434 + 1
+    #define ZERO_WORDS 3
+    // U64_TO_WORDS expands |x| for a |crypto_word_t| array literal.
+    #define U64_TO_WORDS(x) UINT64_C(x)
+#else
+    typedef uint32_t crypto_word_t;
+    // Number of words of a 434-bit field element
+    #define NWORDS_FIELD    14
+    // Number of "0" digits in the least significant part of p434 + 1
+    #define ZERO_WORDS 6
+    // U64_TO_WORDS expands |x| for a |crypto_word_t| array literal.
+    #define U64_TO_WORDS(x) \
+        (uint32_t)(UINT64_C(x) & 0xffffffff), (uint32_t)(UINT64_C(x) >> 32)
+#endif
+
+// Extended datatype support
+#if !defined(HAS_UINT128)
+    typedef uint64_t uint128_t[2];
+#endif
+
+// The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise
+// Digit multiplication
+#define MUL(multiplier, multiplicand, hi, lo) digit_x_digit((multiplier), (multiplicand), &(lo));
+
+// If mask |x|==0xff.ff set |x| to 1, otherwise 0
+#define M2B(x) ((x)>>(RADIX-1))
+
+// Digit addition with carry
+#define ADDC(carryIn, addend1, addend2, carryOut, sumOut)                   \
+do {                                                                        \
+  crypto_word_t tempReg = (addend1) + (crypto_word_t)(carryIn);             \
+  (sumOut) = (addend2) + tempReg;                                           \
+  (carryOut) = M2B(ct_uint_lt(tempReg, (crypto_word_t)(carryIn)) |  \
+                   ct_uint_lt((sumOut), tempReg));                  \
+} while(0)
+
+// Digit subtraction with borrow
+#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut)           \
+do {                                                                            \
+    crypto_word_t tempReg = (minuend) - (subtrahend);                           \
+    crypto_word_t borrowReg = M2B(ct_uint_lt((minuend), (subtrahend))); \
+    borrowReg |= ((borrowIn) & ct_uint_eq(tempReg, 0));               \
+    (differenceOut) = tempReg - (crypto_word_t)(borrowIn);                      \
+    (borrowOut) = borrowReg;                                                    \
+} while(0)
+
+/* Old GCC 4.9 (jessie) doesn't implement {0} initialization properly,
+   which violates C11 as described in 6.7.9, 21 (similarily C99, 6.7.8).
+   Defines below are used to work around the bug, and provide a way
+   to initialize f2elem_t and point_proj_t structs.
+   Bug has been fixed in GCC6 (debian stretch).
+*/
+#define F2ELM_INIT {{ {0}, {0} }}
+#define POINT_PROJ_INIT {{ F2ELM_INIT, F2ELM_INIT }}
+
+// Datatype for representing 434-bit field elements (448-bit max.)
+// Elements over GF(p434) are encoded in 63 octets in little endian format
+// (i.e., the least significant octet is located in the lowest memory address).
+typedef crypto_word_t felm_t[NWORDS_FIELD];
+
+// An element in F_{p^2}, is composed of two coefficients from F_p, * i.e.
+// Fp2 element = c0 + c1*i in F_{p^2}
+// Datatype for representing double-precision 2x434-bit field elements (448-bit max.)
+// Elements (a+b*i) over GF(p434^2), where a and b are defined over GF(p434), are
+// encoded as {a, b}, with a in the lowest memory portion.
+typedef struct {
+    felm_t c0;
+    felm_t c1;
+} fp2;
+
+// Our F_{p^2} element type is a pointer to the struct.
+typedef fp2 f2elm_t[1];
+
+// Datatype for representing double-precision 2x434-bit
+// field elements in contiguous memory.
+typedef crypto_word_t dfelm_t[2*NWORDS_FIELD];
+
+// Constants used during SIKE computation.
+struct params_t {
+    // Stores a prime
+    const crypto_word_t prime[NWORDS_FIELD];
+    // Stores prime + 1
+    const crypto_word_t prime_p1[NWORDS_FIELD];
+    // Stores prime * 2
+    const crypto_word_t prime_x2[NWORDS_FIELD];
+    // Alice's generator values {XPA0 + XPA1*i, XQA0 + XQA1*i, XRA0 + XRA1*i}
+    // in GF(prime^2), expressed in Montgomery representation
+    const crypto_word_t A_gen[6*NWORDS_FIELD];
+    // Bob's generator values {XPB0 + XPB1*i, XQB0 + XQB1*i, XRB0 + XRB1*i}
+    // in GF(prime^2), expressed in Montgomery representation
+    const crypto_word_t B_gen[6*NWORDS_FIELD];
+    // Montgomery constant mont_R2 = (2^448)^2 mod prime
+    const crypto_word_t mont_R2[NWORDS_FIELD];
+    // Value 'one' in Montgomery representation
+    const crypto_word_t mont_one[NWORDS_FIELD];
+    // Value '6' in Montgomery representation
+    const crypto_word_t mont_six[NWORDS_FIELD];
+    // Fixed parameters for isogeny tree computation
+    const unsigned int A_strat[A_max-1];
+    const unsigned int B_strat[B_max-1];
+};
+
+// Point representation in projective XZ Montgomery coordinates.
+typedef struct {
+    f2elm_t X;
+    f2elm_t Z;
+} point_proj;
+typedef point_proj point_proj_t[1];
+
+// Checks whether two words are equal. Returns 1 in case it is,
+// otherwise 0.
+static inline crypto_word_t ct_uint_eq(crypto_word_t x, crypto_word_t y)
+{
+    // if x==y then t = 0
+    crypto_word_t t = x ^ y;
+    // if x!=y t will have first bit set
+    t = (t >> 1) - t;
+    // return MSB - 1 in case x==y, otherwise 0
+    return ((~t) >> (RADIX-1));
+}
+// Constant time select.
+// if pick == 1 (out = in1)
+// if pick == 0 (out = in2)
+// else out is undefined
+static inline uint8_t ct_select_8(uint8_t flag, uint8_t in1, uint8_t in2) {
+    uint8_t mask = ((int8_t)(flag << 7))>>7;
+    return (in1&mask) | (in2&(~mask));
+}
+
+// Constant time memcmp. Returns 1 if p==q, otherwise 0
+static inline int ct_mem_eq(const void *p, const void *q, size_t n)
+{
+  const uint8_t *pp = (uint8_t*)p, *qq = (uint8_t*)q;
+  uint8_t a = 0;
+
+  while (n--) a |= *pp++ ^ *qq++;
+  return (ct_uint_eq(a, 0));
+}
+
+/*
+// Returns 1 if x<y, otherwise 0
+static inline crypto_word_t ct_uint_lt(crypto_word_t x, crypto_word_t y) {
+  const crypto_word_t t1 = x^y;
+  const crypto_word_t t2 = x - y;
+  const crypto_word_t tt = x ^ (t1 | (t2^y));
+  return (tt >> (RADIX-1));
+}
+*/
+
+/// OZAPTF: coppied from boringssl
+static inline crypto_word_t constant_time_msb_w(crypto_word_t a) {
+  return 0u - (a >> (sizeof(a) * 8 - 1));
+}
+
+// constant_time_lt_w returns 0xff..f if a < b and 0 otherwise.
+static inline crypto_word_t ct_uint_lt(crypto_word_t x, crypto_word_t y)
+{
+  /*
+  const crypto_word_t t1 = x^y;
+  const crypto_word_t t2 = x - y;
+  const crypto_word_t tt = x ^ (t1 | (t2^y));
+  return (tt >> (RADIX-1));
+  */
+  // Consider the two cases of the problem:
+  //   msb(a) == msb(b): a < b iff the MSB of a - b is set.
+  //   msb(a) != msb(b): a < b iff the MSB of b is set.
+  //
+  // If msb(a) == msb(b) then the following evaluates as:
+  //   msb(a^((a^b)|((a-b)^a))) ==
+  //   msb(a^((a-b) ^ a))       ==   (because msb(a^b) == 0)
+  //   msb(a^a^(a-b))           ==   (rearranging)
+  //   msb(a-b)                      (because ∀x. x^x == 0)
+  //
+  // Else, if msb(a) != msb(b) then the following evaluates as:
+  //   msb(a^((a^b)|((a-b)^a))) ==
+  //   msb(a^(𝟙 | ((a-b)^a)))   ==   (because msb(a^b) == 1 and 𝟙
+  //                                  represents a value s.t. msb(𝟙) = 1)
+  //   msb(a^𝟙)                 ==   (because ORing with 1 results in 1)
+  //   msb(b)
+  //
+  //
+  // Here is an SMT-LIB verification of this formula:
+  //
+  // (define-fun lt ((a (_ BitVec 32)) (b (_ BitVec 32))) (_ BitVec 32)
+  //   (bvxor a (bvor (bvxor a b) (bvxor (bvsub a b) a)))
+  // )
+  //
+  // (declare-fun a () (_ BitVec 32))
+  // (declare-fun b () (_ BitVec 32))
+  //
+  // (assert (not (= (= #x00000001 (bvlshr (lt a b) #x0000001f)) (bvult a b))))
+  // (check-sat)
+  // (get-model)
+  return constant_time_msb_w(x^((x^y)|((x-y)^x)));
+}
+#endif // UTILS_H_
-- 
2.39.5


From 1096d2b87e58de568ad2e5d2109a53d48ff42667 Mon Sep 17 00:00:00 2001
From: Kris Kwiatkowski <contact@amongbytes.com>
Date: Fri, 9 Apr 2021 00:44:27 +0100
Subject: [PATCH 02/12] update sike

---
 CMakeLists.txt                                |    1 +
 public/pqc/pqc.h                              |    3 +-
 src/capi/pqapi.c                              |  118 +-
 src/capi/schemes.h                            |  118 ++
 src/kem/sike/includes/sike/sike.h             |   73 ++
 src/kem/sike/sike-p434-sha256/asm/fp-x86_64.S | 1095 -----------------
 .../sike/sike-p434-sha256/asm/fp_generic.c    |  179 ---
 src/kem/sike/sike-p434-sha256/fpx.c           |  282 -----
 src/kem/sike/sike-p434-sha256/fpx.h           |  112 --
 src/kem/sike/sike-p434-sha256/isogeny.c       |  262 ----
 src/kem/sike/sike-p434-sha256/isogeny.h       |   49 -
 src/kem/sike/sike-p434-sha256/params.c        |  128 --
 src/kem/sike/sike-p434-sha256/sike.c          |  517 --------
 src/kem/sike/sike-p434-sha256/utils.h         |  231 ----
 src/rustapi/pqc-sys/src/bindings.rs           |    3 +-
 test/katrunner/src/main.rs                    |    1 +
 16 files changed, 198 insertions(+), 2974 deletions(-)
 create mode 100644 src/capi/schemes.h
 create mode 100644 src/kem/sike/includes/sike/sike.h
 delete mode 100644 src/kem/sike/sike-p434-sha256/asm/fp-x86_64.S
 delete mode 100644 src/kem/sike/sike-p434-sha256/asm/fp_generic.c
 delete mode 100644 src/kem/sike/sike-p434-sha256/fpx.c
 delete mode 100644 src/kem/sike/sike-p434-sha256/fpx.h
 delete mode 100644 src/kem/sike/sike-p434-sha256/isogeny.c
 delete mode 100644 src/kem/sike/sike-p434-sha256/isogeny.h
 delete mode 100644 src/kem/sike/sike-p434-sha256/params.c
 delete mode 100644 src/kem/sike/sike-p434-sha256/sike.c
 delete mode 100644 src/kem/sike/sike-p434-sha256/utils.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7ee78b71..313200b3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -148,6 +148,7 @@ add_subdirectory(src/kem/ntru_prime/ntrulpr857/clean)
 add_subdirectory(src/kem/hqc/hqc-rmrs-128/clean)
 add_subdirectory(src/kem/hqc/hqc-rmrs-192/clean)
 add_subdirectory(src/kem/hqc/hqc-rmrs-256/clean)
+add_subdirectory(src/kem/sike)
 
 # Hardware optimized targets
 if(${ARCH} STREQUAL "ARCH_x86_64")
diff --git a/public/pqc/pqc.h b/public/pqc/pqc.h
index 8fd651a3..9cb862af 100644
--- a/public/pqc/pqc.h
+++ b/public/pqc/pqc.h
@@ -63,7 +63,8 @@ extern "C" {
     _(SABER)             \
     _(HQCRMRS128)        \
     _(HQCRMRS192)        \
-    _(HQCRMRS256)
+    _(HQCRMRS256)        \
+    _(SIKE434)
 
 // Defines IDs for each algorithm. The
 // PQC_ALG_SIG/KEM_MAX indicates number
diff --git a/src/capi/pqapi.c b/src/capi/pqapi.c
index bd41aa4f..d00260d3 100644
--- a/src/capi/pqapi.c
+++ b/src/capi/pqapi.c
@@ -2,123 +2,7 @@
 #include <stdbool.h>
 #include <pqc/pqc.h>
 
-// PQClean include
-#include "sign/rainbow/rainbowV-classic/clean/api.h"
-#include "sign/rainbow/rainbowI-classic/clean/api.h"
-#include "sign/rainbow/rainbowIII-classic/clean/api.h"
-#include "sign/sphincs/sphincs-sha256-192f-simple/clean/api.h"
-#include "sign/sphincs/sphincs-sha256-192f-simple/avx2/api.h"
-#include "sign/sphincs/sphincs-shake256-256f-simple/clean/api.h"
-#include "sign/sphincs/sphincs-shake256-256f-simple/avx2/api.h"
-#include "sign/sphincs/sphincs-shake256-192f-robust/clean/api.h"
-#include "sign/sphincs/sphincs-shake256-192f-robust/avx2/api.h"
-#include "sign/sphincs/sphincs-shake256-128f-simple/clean/api.h"
-#include "sign/sphincs/sphincs-shake256-128f-simple/avx2/api.h"
-#include "sign/sphincs/sphincs-shake256-256s-simple/clean/api.h"
-#include "sign/sphincs/sphincs-shake256-256s-simple/avx2/api.h"
-#include "sign/sphincs/sphincs-shake256-128s-simple/clean/api.h"
-#include "sign/sphincs/sphincs-shake256-128s-simple/avx2/api.h"
-#include "sign/sphincs/sphincs-sha256-128f-robust/clean/api.h"
-#include "sign/sphincs/sphincs-sha256-128f-robust/avx2/api.h"
-#include "sign/sphincs/sphincs-sha256-192s-robust/clean/api.h"
-#include "sign/sphincs/sphincs-sha256-192s-robust/avx2/api.h"
-#include "sign/sphincs/sphincs-shake256-128f-robust/clean/api.h"
-#include "sign/sphincs/sphincs-shake256-128f-robust/avx2/api.h"
-#include "sign/sphincs/sphincs-shake256-128s-robust/clean/api.h"
-#include "sign/sphincs/sphincs-shake256-128s-robust/avx2/api.h"
-#include "sign/sphincs/sphincs-shake256-256s-robust/clean/api.h"
-#include "sign/sphincs/sphincs-shake256-256s-robust/avx2/api.h"
-#include "sign/sphincs/sphincs-sha256-192s-simple/clean/api.h"
-#include "sign/sphincs/sphincs-sha256-192s-simple/avx2/api.h"
-#include "sign/sphincs/sphincs-shake256-192s-simple/clean/api.h"
-#include "sign/sphincs/sphincs-shake256-192s-simple/avx2/api.h"
-#include "sign/sphincs/sphincs-shake256-192s-robust/clean/api.h"
-#include "sign/sphincs/sphincs-shake256-192s-robust/avx2/api.h"
-#include "sign/sphincs/sphincs-shake256-192f-simple/clean/api.h"
-#include "sign/sphincs/sphincs-shake256-192f-simple/avx2/api.h"
-#include "sign/sphincs/sphincs-sha256-256s-simple/clean/api.h"
-#include "sign/sphincs/sphincs-sha256-256s-simple/avx2/api.h"
-#include "sign/sphincs/sphincs-sha256-128s-simple/clean/api.h"
-#include "sign/sphincs/sphincs-sha256-128s-simple/avx2/api.h"
-#include "sign/sphincs/sphincs-shake256-256f-robust/clean/api.h"
-#include "sign/sphincs/sphincs-shake256-256f-robust/avx2/api.h"
-#include "sign/sphincs/sphincs-sha256-256f-robust/clean/api.h"
-#include "sign/sphincs/sphincs-sha256-256f-robust/avx2/api.h"
-#include "sign/sphincs/sphincs-sha256-256f-simple/clean/api.h"
-#include "sign/sphincs/sphincs-sha256-256f-simple/avx2/api.h"
-#include "sign/sphincs/sphincs-sha256-256s-robust/clean/api.h"
-#include "sign/sphincs/sphincs-sha256-256s-robust/avx2/api.h"
-#include "sign/sphincs/sphincs-sha256-128s-robust/clean/api.h"
-#include "sign/sphincs/sphincs-sha256-128s-robust/avx2/api.h"
-#include "sign/sphincs/sphincs-sha256-128f-simple/clean/api.h"
-#include "sign/sphincs/sphincs-sha256-128f-simple/avx2/api.h"
-#include "sign/sphincs/sphincs-sha256-192f-robust/clean/api.h"
-#include "sign/sphincs/sphincs-sha256-192f-robust/avx2/api.h"
-#include "sign/falcon/falcon-1024/clean/api.h"
-#include "sign/falcon/falcon-1024/avx2/api.h"
-#include "sign/falcon/falcon-512/clean/api.h"
-#include "sign/falcon/falcon-512/avx2/api.h"
-#include "sign/dilithium/dilithium2/clean/api.h"
-#include "sign/dilithium/dilithium2/avx2/api.h"
-#include "sign/dilithium/dilithium3/clean/api.h"
-#include "sign/dilithium/dilithium3/avx2/api.h"
-#include "sign/dilithium/dilithium5/clean/api.h"
-#include "sign/dilithium/dilithium5/avx2/api.h"
-#include "kem/ntru/ntruhps4096821/clean/api.h"
-#include "kem/ntru/ntruhps4096821/avx2/api.h"
-#include "kem/ntru/ntruhps2048509/clean/api.h"
-#include "kem/ntru/ntruhps2048509/avx2/api.h"
-#include "kem/ntru/ntruhrss701/clean/api.h"
-#include "kem/ntru/ntruhrss701/avx2/api.h"
-#include "kem/ntru/ntruhps2048677/clean/api.h"
-#include "kem/ntru/ntruhps2048677/avx2/api.h"
-#include "kem/ntru_prime/ntrulpr761/clean/api.h"
-#include "kem/ntru_prime/ntrulpr761/avx2/api.h"
-#include "kem/ntru_prime/ntrulpr653/clean/api.h"
-#include "kem/ntru_prime/ntrulpr653/avx2/api.h"
-#include "kem/ntru_prime/ntrulpr857/clean/api.h"
-#include "kem/ntru_prime/ntrulpr857/avx2/api.h"
-#include "kem/kyber/kyber768/clean/api.h"
-#include "kem/kyber/kyber768/avx2/api.h"
-#include "kem/kyber/kyber1024/clean/api.h"
-#include "kem/kyber/kyber1024/avx2/api.h"
-#include "kem/kyber/kyber512/clean/api.h"
-#include "kem/kyber/kyber512/avx2/api.h"
-#include "kem/mceliece/mceliece460896f/avx/api.h"
-#include "kem/mceliece/mceliece460896f/clean/api.h"
-#include "kem/mceliece/mceliece8192128/avx/api.h"
-#include "kem/mceliece/mceliece8192128/clean/api.h"
-#include "kem/mceliece/mceliece6688128f/avx/api.h"
-#include "kem/mceliece/mceliece6688128f/clean/api.h"
-#include "kem/mceliece/mceliece8192128f/avx/api.h"
-#include "kem/mceliece/mceliece8192128f/clean/api.h"
-#include "kem/mceliece/mceliece6960119f/avx/api.h"
-#include "kem/mceliece/mceliece6960119f/clean/api.h"
-#include "kem/mceliece/mceliece460896/avx/api.h"
-#include "kem/mceliece/mceliece460896/clean/api.h"
-#include "kem/mceliece/mceliece6688128/avx/api.h"
-#include "kem/mceliece/mceliece6688128/clean/api.h"
-#include "kem/mceliece/mceliece348864f/avx/api.h"
-#include "kem/mceliece/mceliece348864f/clean/api.h"
-#include "kem/mceliece/mceliece6960119/avx/api.h"
-#include "kem/mceliece/mceliece6960119/clean/api.h"
-#include "kem/mceliece/mceliece348864/avx/api.h"
-#include "kem/mceliece/mceliece348864/clean/api.h"
-#include "kem/frodo/frodokem976shake/clean/api.h"
-#include "kem/frodo/frodokem1344shake/clean/api.h"
-#include "kem/frodo/frodokem640shake/clean/api.h"
-#include "kem/saber/lightsaber/clean/api.h"
-#include "kem/saber/lightsaber/avx2/api.h"
-#include "kem/saber/firesaber/clean/api.h"
-#include "kem/saber/firesaber/avx2/api.h"
-#include "kem/saber/saber/clean/api.h"
-#include "kem/saber/saber/avx2/api.h"
-#include "kem/hqc/hqc-rmrs-128/clean/api.h"
-#include "kem/hqc/hqc-rmrs-192/clean/api.h"
-#include "kem/hqc/hqc-rmrs-256/clean/api.h"
-#include "kem/hqc/hqc-rmrs-128/avx2/api.h"
-#include "kem/hqc/hqc-rmrs-192/avx2/api.h"
-#include "kem/hqc/hqc-rmrs-256/avx2/api.h"
+#include "schemes.h"
 
 // not proud of this thingy
 #define OPT_VERSION _CLEAN_
diff --git a/src/capi/schemes.h b/src/capi/schemes.h
new file mode 100644
index 00000000..60a68893
--- /dev/null
+++ b/src/capi/schemes.h
@@ -0,0 +1,118 @@
+// PQClean include
+#include "sign/rainbow/rainbowV-classic/clean/api.h"
+#include "sign/rainbow/rainbowI-classic/clean/api.h"
+#include "sign/rainbow/rainbowIII-classic/clean/api.h"
+#include "sign/sphincs/sphincs-sha256-192f-simple/clean/api.h"
+#include "sign/sphincs/sphincs-sha256-192f-simple/avx2/api.h"
+#include "sign/sphincs/sphincs-shake256-256f-simple/clean/api.h"
+#include "sign/sphincs/sphincs-shake256-256f-simple/avx2/api.h"
+#include "sign/sphincs/sphincs-shake256-192f-robust/clean/api.h"
+#include "sign/sphincs/sphincs-shake256-192f-robust/avx2/api.h"
+#include "sign/sphincs/sphincs-shake256-128f-simple/clean/api.h"
+#include "sign/sphincs/sphincs-shake256-128f-simple/avx2/api.h"
+#include "sign/sphincs/sphincs-shake256-256s-simple/clean/api.h"
+#include "sign/sphincs/sphincs-shake256-256s-simple/avx2/api.h"
+#include "sign/sphincs/sphincs-shake256-128s-simple/clean/api.h"
+#include "sign/sphincs/sphincs-shake256-128s-simple/avx2/api.h"
+#include "sign/sphincs/sphincs-sha256-128f-robust/clean/api.h"
+#include "sign/sphincs/sphincs-sha256-128f-robust/avx2/api.h"
+#include "sign/sphincs/sphincs-sha256-192s-robust/clean/api.h"
+#include "sign/sphincs/sphincs-sha256-192s-robust/avx2/api.h"
+#include "sign/sphincs/sphincs-shake256-128f-robust/clean/api.h"
+#include "sign/sphincs/sphincs-shake256-128f-robust/avx2/api.h"
+#include "sign/sphincs/sphincs-shake256-128s-robust/clean/api.h"
+#include "sign/sphincs/sphincs-shake256-128s-robust/avx2/api.h"
+#include "sign/sphincs/sphincs-shake256-256s-robust/clean/api.h"
+#include "sign/sphincs/sphincs-shake256-256s-robust/avx2/api.h"
+#include "sign/sphincs/sphincs-sha256-192s-simple/clean/api.h"
+#include "sign/sphincs/sphincs-sha256-192s-simple/avx2/api.h"
+#include "sign/sphincs/sphincs-shake256-192s-simple/clean/api.h"
+#include "sign/sphincs/sphincs-shake256-192s-simple/avx2/api.h"
+#include "sign/sphincs/sphincs-shake256-192s-robust/clean/api.h"
+#include "sign/sphincs/sphincs-shake256-192s-robust/avx2/api.h"
+#include "sign/sphincs/sphincs-shake256-192f-simple/clean/api.h"
+#include "sign/sphincs/sphincs-shake256-192f-simple/avx2/api.h"
+#include "sign/sphincs/sphincs-sha256-256s-simple/clean/api.h"
+#include "sign/sphincs/sphincs-sha256-256s-simple/avx2/api.h"
+#include "sign/sphincs/sphincs-sha256-128s-simple/clean/api.h"
+#include "sign/sphincs/sphincs-sha256-128s-simple/avx2/api.h"
+#include "sign/sphincs/sphincs-shake256-256f-robust/clean/api.h"
+#include "sign/sphincs/sphincs-shake256-256f-robust/avx2/api.h"
+#include "sign/sphincs/sphincs-sha256-256f-robust/clean/api.h"
+#include "sign/sphincs/sphincs-sha256-256f-robust/avx2/api.h"
+#include "sign/sphincs/sphincs-sha256-256f-simple/clean/api.h"
+#include "sign/sphincs/sphincs-sha256-256f-simple/avx2/api.h"
+#include "sign/sphincs/sphincs-sha256-256s-robust/clean/api.h"
+#include "sign/sphincs/sphincs-sha256-256s-robust/avx2/api.h"
+#include "sign/sphincs/sphincs-sha256-128s-robust/clean/api.h"
+#include "sign/sphincs/sphincs-sha256-128s-robust/avx2/api.h"
+#include "sign/sphincs/sphincs-sha256-128f-simple/clean/api.h"
+#include "sign/sphincs/sphincs-sha256-128f-simple/avx2/api.h"
+#include "sign/sphincs/sphincs-sha256-192f-robust/clean/api.h"
+#include "sign/sphincs/sphincs-sha256-192f-robust/avx2/api.h"
+#include "sign/falcon/falcon-1024/clean/api.h"
+#include "sign/falcon/falcon-1024/avx2/api.h"
+#include "sign/falcon/falcon-512/clean/api.h"
+#include "sign/falcon/falcon-512/avx2/api.h"
+#include "sign/dilithium/dilithium2/clean/api.h"
+#include "sign/dilithium/dilithium2/avx2/api.h"
+#include "sign/dilithium/dilithium3/clean/api.h"
+#include "sign/dilithium/dilithium3/avx2/api.h"
+#include "sign/dilithium/dilithium5/clean/api.h"
+#include "sign/dilithium/dilithium5/avx2/api.h"
+#include "kem/ntru/ntruhps4096821/clean/api.h"
+#include "kem/ntru/ntruhps4096821/avx2/api.h"
+#include "kem/ntru/ntruhps2048509/clean/api.h"
+#include "kem/ntru/ntruhps2048509/avx2/api.h"
+#include "kem/ntru/ntruhrss701/clean/api.h"
+#include "kem/ntru/ntruhrss701/avx2/api.h"
+#include "kem/ntru/ntruhps2048677/clean/api.h"
+#include "kem/ntru/ntruhps2048677/avx2/api.h"
+#include "kem/ntru_prime/ntrulpr761/clean/api.h"
+#include "kem/ntru_prime/ntrulpr761/avx2/api.h"
+#include "kem/ntru_prime/ntrulpr653/clean/api.h"
+#include "kem/ntru_prime/ntrulpr653/avx2/api.h"
+#include "kem/ntru_prime/ntrulpr857/clean/api.h"
+#include "kem/ntru_prime/ntrulpr857/avx2/api.h"
+#include "kem/kyber/kyber768/clean/api.h"
+#include "kem/kyber/kyber768/avx2/api.h"
+#include "kem/kyber/kyber1024/clean/api.h"
+#include "kem/kyber/kyber1024/avx2/api.h"
+#include "kem/kyber/kyber512/clean/api.h"
+#include "kem/kyber/kyber512/avx2/api.h"
+#include "kem/mceliece/mceliece460896f/avx/api.h"
+#include "kem/mceliece/mceliece460896f/clean/api.h"
+#include "kem/mceliece/mceliece8192128/avx/api.h"
+#include "kem/mceliece/mceliece8192128/clean/api.h"
+#include "kem/mceliece/mceliece6688128f/avx/api.h"
+#include "kem/mceliece/mceliece6688128f/clean/api.h"
+#include "kem/mceliece/mceliece8192128f/avx/api.h"
+#include "kem/mceliece/mceliece8192128f/clean/api.h"
+#include "kem/mceliece/mceliece6960119f/avx/api.h"
+#include "kem/mceliece/mceliece6960119f/clean/api.h"
+#include "kem/mceliece/mceliece460896/avx/api.h"
+#include "kem/mceliece/mceliece460896/clean/api.h"
+#include "kem/mceliece/mceliece6688128/avx/api.h"
+#include "kem/mceliece/mceliece6688128/clean/api.h"
+#include "kem/mceliece/mceliece348864f/avx/api.h"
+#include "kem/mceliece/mceliece348864f/clean/api.h"
+#include "kem/mceliece/mceliece6960119/avx/api.h"
+#include "kem/mceliece/mceliece6960119/clean/api.h"
+#include "kem/mceliece/mceliece348864/avx/api.h"
+#include "kem/mceliece/mceliece348864/clean/api.h"
+#include "kem/frodo/frodokem976shake/clean/api.h"
+#include "kem/frodo/frodokem1344shake/clean/api.h"
+#include "kem/frodo/frodokem640shake/clean/api.h"
+#include "kem/saber/lightsaber/clean/api.h"
+#include "kem/saber/lightsaber/avx2/api.h"
+#include "kem/saber/firesaber/clean/api.h"
+#include "kem/saber/firesaber/avx2/api.h"
+#include "kem/saber/saber/clean/api.h"
+#include "kem/saber/saber/avx2/api.h"
+#include "kem/hqc/hqc-rmrs-128/clean/api.h"
+#include "kem/hqc/hqc-rmrs-192/clean/api.h"
+#include "kem/hqc/hqc-rmrs-256/clean/api.h"
+#include "kem/hqc/hqc-rmrs-128/avx2/api.h"
+#include "kem/hqc/hqc-rmrs-192/avx2/api.h"
+#include "kem/hqc/hqc-rmrs-256/avx2/api.h"
+#include "kem/sike/includes/sike/sike.h"
\ No newline at end of file
diff --git a/src/kem/sike/includes/sike/sike.h b/src/kem/sike/includes/sike/sike.h
new file mode 100644
index 00000000..09d1e580
--- /dev/null
+++ b/src/kem/sike/includes/sike/sike.h
@@ -0,0 +1,73 @@
+#ifndef SIKE_H_
+#define SIKE_H_
+
+#include <stdint.h>
+#include <string.h>
+
+/* SIKE
+ *
+ * SIKE is a isogeny based post-quantum key encapsulation mechanism. Description of the
+ * algorithm is provided in [SIKE]. This implementation uses 434-bit field size. The code
+ * is based on "Additional_Implementations" from PQC NIST submission package which can
+ * be found here:
+ * https://csrc.nist.gov/CSRC/media/Projects/Post-Quantum-Cryptography/documents/round-1/submissions/SIKE.zip
+ *
+ * [SIKE] https://sike.org/files/SIDH-spec.pdf
+ */
+
+// SIKE_PUB_BYTESZ is the number of bytes in a public key.
+#define SIKE_PUB_BYTESZ 330
+// SIKE_PRV_BYTESZ is the number of bytes in a private key.
+#define SIKE_PRV_BYTESZ 28
+// SIKE_SS_BYTESZ is the number of bytes in a shared key.
+#define SIKE_SS_BYTESZ  16
+// SIKE_MSG_BYTESZ is the number of bytes in a random bit string concatenated
+// with the public key (see 1.4 of SIKE).
+#define SIKE_MSG_BYTESZ 16
+// SIKE_SS_BYTESZ is the number of bytes in a ciphertext.
+#define SIKE_CT_BYTESZ  (SIKE_PUB_BYTESZ + SIKE_MSG_BYTESZ)
+
+// SIKE_keypair outputs a public and secret key.  In case of success
+// function returns 1, otherwise 0.
+ int SIKE_keypair(
+    uint8_t out_priv[SIKE_PRV_BYTESZ],
+    uint8_t out_pub[SIKE_PUB_BYTESZ]);
+
+// SIKE_encaps generates and encrypts a random session key, writing those values to
+// |out_shared_key| and |out_ciphertext|, respectively.
+ void SIKE_encaps(
+    uint8_t out_shared_key[SIKE_SS_BYTESZ],
+    uint8_t out_ciphertext[SIKE_CT_BYTESZ],
+    const uint8_t pub_key[SIKE_PUB_BYTESZ]);
+
+// SIKE_decaps outputs a random session key, writing it to |out_shared_key|.
+ void SIKE_decaps(
+    uint8_t out_shared_key[SIKE_SS_BYTESZ],
+    const uint8_t ciphertext[SIKE_CT_BYTESZ],
+    const uint8_t pub_key[SIKE_PUB_BYTESZ],
+    const uint8_t priv_key[SIKE_PRV_BYTESZ]);
+
+// boilerplate needed for integration
+#define PQCLEAN_SIKE434_CLEAN_CRYPTO_SECRETKEYBYTES  SIKE_PRV_BYTESZ
+#define PQCLEAN_SIKE434_CLEAN_CRYPTO_PUBLICKEYBYTES  SIKE_PUB_BYTESZ
+#define PQCLEAN_SIKE434_CLEAN_CRYPTO_CIPHERTEXTBYTES SIKE_CT_BYTESZ
+#define PQCLEAN_SIKE434_CLEAN_CRYPTO_BYTES           SIKE_SS_BYTESZ
+#define PQCLEAN_SIKE434_CLEAN_CRYPTO_ALGNAME         "SIKE/p434"
+
+static inline int PQCLEAN_SIKE434_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) {
+	SIKE_keypair(sk, pk);
+	memcpy(&sk[SIKE_PRV_BYTESZ+SIKE_MSG_BYTESZ], pk, SIKE_PUB_BYTESZ);
+	return 1;
+}
+static inline int PQCLEAN_SIKE434_CLEAN_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) {
+	SIKE_encaps(ss,ct,pk);
+	return 1;
+}
+
+static inline int PQCLEAN_SIKE434_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) {
+	SIKE_decaps(ss, ct, &sk[SIKE_PRV_BYTESZ+SIKE_MSG_BYTESZ], sk);
+	return 1;
+}
+
+
+#endif
diff --git a/src/kem/sike/sike-p434-sha256/asm/fp-x86_64.S b/src/kem/sike/sike-p434-sha256/asm/fp-x86_64.S
deleted file mode 100644
index 4e2d7b74..00000000
--- a/src/kem/sike/sike-p434-sha256/asm/fp-x86_64.S
+++ /dev/null
@@ -1,1095 +0,0 @@
-# This file is generated from a similarly-named Perl script in the BoringSSL
-# source tree. Do not edit by hand.
-
-#if defined(__has_feature)
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-#endif
-
-#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-.text	
-
-
-.Lp434x2:
-.quad	0xFFFFFFFFFFFFFFFE
-.quad	0xFFFFFFFFFFFFFFFF
-.quad	0xFB82ECF5C5FFFFFF
-.quad	0xF78CB8F062B15D47
-.quad	0xD9F8BFAD038A40AC
-.quad	0x0004683E4E2EE688
-
-
-.Lp434p1:
-.quad	0xFDC1767AE3000000
-.quad	0x7BC65C783158AEA3
-.quad	0x6CFC5FD681C52056
-.quad	0x0002341F27177344
-
-.globl	sike_fpadd
-.hidden sike_fpadd
-.type	sike_fpadd,@function
-sike_fpadd:
-.cfi_startproc	
-	pushq	%r12
-.cfi_adjust_cfa_offset	8
-.cfi_offset	r12, -16
-	pushq	%r13
-.cfi_adjust_cfa_offset	8
-.cfi_offset	r13, -24
-	pushq	%r14
-.cfi_adjust_cfa_offset	8
-.cfi_offset	r14, -32
-
-	xorq	%rax,%rax
-
-	movq	0(%rdi),%r8
-	addq	0(%rsi),%r8
-	movq	8(%rdi),%r9
-	adcq	8(%rsi),%r9
-	movq	16(%rdi),%r10
-	adcq	16(%rsi),%r10
-	movq	24(%rdi),%r11
-	adcq	24(%rsi),%r11
-	movq	32(%rdi),%r12
-	adcq	32(%rsi),%r12
-	movq	40(%rdi),%r13
-	adcq	40(%rsi),%r13
-	movq	48(%rdi),%r14
-	adcq	48(%rsi),%r14
-
-	movq	.Lp434x2(%rip),%rcx
-	subq	%rcx,%r8
-	movq	8+.Lp434x2(%rip),%rcx
-	sbbq	%rcx,%r9
-	sbbq	%rcx,%r10
-	movq	16+.Lp434x2(%rip),%rcx
-	sbbq	%rcx,%r11
-	movq	24+.Lp434x2(%rip),%rcx
-	sbbq	%rcx,%r12
-	movq	32+.Lp434x2(%rip),%rcx
-	sbbq	%rcx,%r13
-	movq	40+.Lp434x2(%rip),%rcx
-	sbbq	%rcx,%r14
-
-	sbbq	$0,%rax
-
-	movq	.Lp434x2(%rip),%rdi
-	andq	%rax,%rdi
-	movq	8+.Lp434x2(%rip),%rsi
-	andq	%rax,%rsi
-	movq	16+.Lp434x2(%rip),%rcx
-	andq	%rax,%rcx
-
-	addq	%rdi,%r8
-	movq	%r8,0(%rdx)
-	adcq	%rsi,%r9
-	movq	%r9,8(%rdx)
-	adcq	%rsi,%r10
-	movq	%r10,16(%rdx)
-	adcq	%rcx,%r11
-	movq	%r11,24(%rdx)
-
-	setc	%cl
-	movq	24+.Lp434x2(%rip),%r8
-	andq	%rax,%r8
-	movq	32+.Lp434x2(%rip),%r9
-	andq	%rax,%r9
-	movq	40+.Lp434x2(%rip),%r10
-	andq	%rax,%r10
-	btq	$0,%rcx
-
-	adcq	%r8,%r12
-	movq	%r12,32(%rdx)
-	adcq	%r9,%r13
-	movq	%r13,40(%rdx)
-	adcq	%r10,%r14
-	movq	%r14,48(%rdx)
-
-	popq	%r14
-.cfi_adjust_cfa_offset	-8
-	popq	%r13
-.cfi_adjust_cfa_offset	-8
-	popq	%r12
-.cfi_adjust_cfa_offset	-8
-	.byte	0xf3,0xc3
-.cfi_endproc	
-.globl	sike_cswap_asm
-.hidden sike_cswap_asm
-.type	sike_cswap_asm,@function
-sike_cswap_asm:
-
-
-	movq	%rdx,%xmm3
-
-
-
-
-
-	pshufd	$68,%xmm3,%xmm3
-
-	movdqu	0(%rdi),%xmm0
-	movdqu	0(%rsi),%xmm1
-	movdqa	%xmm1,%xmm2
-	pxor	%xmm0,%xmm2
-	pand	%xmm3,%xmm2
-	pxor	%xmm2,%xmm0
-	pxor	%xmm2,%xmm1
-	movdqu	%xmm0,0(%rdi)
-	movdqu	%xmm1,0(%rsi)
-
-	movdqu	16(%rdi),%xmm0
-	movdqu	16(%rsi),%xmm1
-	movdqa	%xmm1,%xmm2
-	pxor	%xmm0,%xmm2
-	pand	%xmm3,%xmm2
-	pxor	%xmm2,%xmm0
-	pxor	%xmm2,%xmm1
-	movdqu	%xmm0,16(%rdi)
-	movdqu	%xmm1,16(%rsi)
-
-	movdqu	32(%rdi),%xmm0
-	movdqu	32(%rsi),%xmm1
-	movdqa	%xmm1,%xmm2
-	pxor	%xmm0,%xmm2
-	pand	%xmm3,%xmm2
-	pxor	%xmm2,%xmm0
-	pxor	%xmm2,%xmm1
-	movdqu	%xmm0,32(%rdi)
-	movdqu	%xmm1,32(%rsi)
-
-	movdqu	48(%rdi),%xmm0
-	movdqu	48(%rsi),%xmm1
-	movdqa	%xmm1,%xmm2
-	pxor	%xmm0,%xmm2
-	pand	%xmm3,%xmm2
-	pxor	%xmm2,%xmm0
-	pxor	%xmm2,%xmm1
-	movdqu	%xmm0,48(%rdi)
-	movdqu	%xmm1,48(%rsi)
-
-	movdqu	64(%rdi),%xmm0
-	movdqu	64(%rsi),%xmm1
-	movdqa	%xmm1,%xmm2
-	pxor	%xmm0,%xmm2
-	pand	%xmm3,%xmm2
-	pxor	%xmm2,%xmm0
-	pxor	%xmm2,%xmm1
-	movdqu	%xmm0,64(%rdi)
-	movdqu	%xmm1,64(%rsi)
-
-	movdqu	80(%rdi),%xmm0
-	movdqu	80(%rsi),%xmm1
-	movdqa	%xmm1,%xmm2
-	pxor	%xmm0,%xmm2
-	pand	%xmm3,%xmm2
-	pxor	%xmm2,%xmm0
-	pxor	%xmm2,%xmm1
-	movdqu	%xmm0,80(%rdi)
-	movdqu	%xmm1,80(%rsi)
-
-	movdqu	96(%rdi),%xmm0
-	movdqu	96(%rsi),%xmm1
-	movdqa	%xmm1,%xmm2
-	pxor	%xmm0,%xmm2
-	pand	%xmm3,%xmm2
-	pxor	%xmm2,%xmm0
-	pxor	%xmm2,%xmm1
-	movdqu	%xmm0,96(%rdi)
-	movdqu	%xmm1,96(%rsi)
-
-	movdqu	112(%rdi),%xmm0
-	movdqu	112(%rsi),%xmm1
-	movdqa	%xmm1,%xmm2
-	pxor	%xmm0,%xmm2
-	pand	%xmm3,%xmm2
-	pxor	%xmm2,%xmm0
-	pxor	%xmm2,%xmm1
-	movdqu	%xmm0,112(%rdi)
-	movdqu	%xmm1,112(%rsi)
-
-	movdqu	128(%rdi),%xmm0
-	movdqu	128(%rsi),%xmm1
-	movdqa	%xmm1,%xmm2
-	pxor	%xmm0,%xmm2
-	pand	%xmm3,%xmm2
-	pxor	%xmm2,%xmm0
-	pxor	%xmm2,%xmm1
-	movdqu	%xmm0,128(%rdi)
-	movdqu	%xmm1,128(%rsi)
-
-	movdqu	144(%rdi),%xmm0
-	movdqu	144(%rsi),%xmm1
-	movdqa	%xmm1,%xmm2
-	pxor	%xmm0,%xmm2
-	pand	%xmm3,%xmm2
-	pxor	%xmm2,%xmm0
-	pxor	%xmm2,%xmm1
-	movdqu	%xmm0,144(%rdi)
-	movdqu	%xmm1,144(%rsi)
-
-	movdqu	160(%rdi),%xmm0
-	movdqu	160(%rsi),%xmm1
-	movdqa	%xmm1,%xmm2
-	pxor	%xmm0,%xmm2
-	pand	%xmm3,%xmm2
-	pxor	%xmm2,%xmm0
-	pxor	%xmm2,%xmm1
-	movdqu	%xmm0,160(%rdi)
-	movdqu	%xmm1,160(%rsi)
-
-	movdqu	176(%rdi),%xmm0
-	movdqu	176(%rsi),%xmm1
-	movdqa	%xmm1,%xmm2
-	pxor	%xmm0,%xmm2
-	pand	%xmm3,%xmm2
-	pxor	%xmm2,%xmm0
-	pxor	%xmm2,%xmm1
-	movdqu	%xmm0,176(%rdi)
-	movdqu	%xmm1,176(%rsi)
-
-	movdqu	192(%rdi),%xmm0
-	movdqu	192(%rsi),%xmm1
-	movdqa	%xmm1,%xmm2
-	pxor	%xmm0,%xmm2
-	pand	%xmm3,%xmm2
-	pxor	%xmm2,%xmm0
-	pxor	%xmm2,%xmm1
-	movdqu	%xmm0,192(%rdi)
-	movdqu	%xmm1,192(%rsi)
-
-	movdqu	208(%rdi),%xmm0
-	movdqu	208(%rsi),%xmm1
-	movdqa	%xmm1,%xmm2
-	pxor	%xmm0,%xmm2
-	pand	%xmm3,%xmm2
-	pxor	%xmm2,%xmm0
-	pxor	%xmm2,%xmm1
-	movdqu	%xmm0,208(%rdi)
-	movdqu	%xmm1,208(%rsi)
-
-	.byte	0xf3,0xc3
-.globl	sike_fpsub
-.hidden sike_fpsub
-.type	sike_fpsub,@function
-sike_fpsub:
-.cfi_startproc	
-	pushq	%r12
-.cfi_adjust_cfa_offset	8
-.cfi_offset	r12, -16
-	pushq	%r13
-.cfi_adjust_cfa_offset	8
-.cfi_offset	r13, -24
-	pushq	%r14
-.cfi_adjust_cfa_offset	8
-.cfi_offset	r14, -32
-
-	xorq	%rax,%rax
-
-	movq	0(%rdi),%r8
-	subq	0(%rsi),%r8
-	movq	8(%rdi),%r9
-	sbbq	8(%rsi),%r9
-	movq	16(%rdi),%r10
-	sbbq	16(%rsi),%r10
-	movq	24(%rdi),%r11
-	sbbq	24(%rsi),%r11
-	movq	32(%rdi),%r12
-	sbbq	32(%rsi),%r12
-	movq	40(%rdi),%r13
-	sbbq	40(%rsi),%r13
-	movq	48(%rdi),%r14
-	sbbq	48(%rsi),%r14
-
-	sbbq	$0x0,%rax
-
-	movq	.Lp434x2(%rip),%rdi
-	andq	%rax,%rdi
-	movq	8+.Lp434x2(%rip),%rsi
-	andq	%rax,%rsi
-	movq	16+.Lp434x2(%rip),%rcx
-	andq	%rax,%rcx
-
-	addq	%rdi,%r8
-	movq	%r8,0(%rdx)
-	adcq	%rsi,%r9
-	movq	%r9,8(%rdx)
-	adcq	%rsi,%r10
-	movq	%r10,16(%rdx)
-	adcq	%rcx,%r11
-	movq	%r11,24(%rdx)
-
-	setc	%cl
-	movq	24+.Lp434x2(%rip),%r8
-	andq	%rax,%r8
-	movq	32+.Lp434x2(%rip),%r9
-	andq	%rax,%r9
-	movq	40+.Lp434x2(%rip),%r10
-	andq	%rax,%r10
-	btq	$0x0,%rcx
-
-	adcq	%r8,%r12
-	adcq	%r9,%r13
-	adcq	%r10,%r14
-	movq	%r12,32(%rdx)
-	movq	%r13,40(%rdx)
-	movq	%r14,48(%rdx)
-
-	popq	%r14
-.cfi_adjust_cfa_offset	-8
-	popq	%r13
-.cfi_adjust_cfa_offset	-8
-	popq	%r12
-.cfi_adjust_cfa_offset	-8
-	.byte	0xf3,0xc3
-.cfi_endproc	
-.globl	sike_mpadd_asm
-.hidden sike_mpadd_asm
-.type	sike_mpadd_asm,@function
-sike_mpadd_asm:
-.cfi_startproc	
-	movq	0(%rdi),%r8;
-	movq	8(%rdi),%r9
-	movq	16(%rdi),%r10
-	movq	24(%rdi),%r11
-	movq	32(%rdi),%rcx
-	addq	0(%rsi),%r8
-	adcq	8(%rsi),%r9
-	adcq	16(%rsi),%r10
-	adcq	24(%rsi),%r11
-	adcq	32(%rsi),%rcx
-	movq	%r8,0(%rdx)
-	movq	%r9,8(%rdx)
-	movq	%r10,16(%rdx)
-	movq	%r11,24(%rdx)
-	movq	%rcx,32(%rdx)
-
-	movq	40(%rdi),%r8
-	movq	48(%rdi),%r9
-	adcq	40(%rsi),%r8
-	adcq	48(%rsi),%r9
-	movq	%r8,40(%rdx)
-	movq	%r9,48(%rdx)
-	.byte	0xf3,0xc3
-.cfi_endproc	
-.globl	sike_mpsubx2_asm
-.hidden sike_mpsubx2_asm
-.type	sike_mpsubx2_asm,@function
-sike_mpsubx2_asm:
-.cfi_startproc	
-	xorq	%rax,%rax
-
-	movq	0(%rdi),%r8
-	movq	8(%rdi),%r9
-	movq	16(%rdi),%r10
-	movq	24(%rdi),%r11
-	movq	32(%rdi),%rcx
-	subq	0(%rsi),%r8
-	sbbq	8(%rsi),%r9
-	sbbq	16(%rsi),%r10
-	sbbq	24(%rsi),%r11
-	sbbq	32(%rsi),%rcx
-	movq	%r8,0(%rdx)
-	movq	%r9,8(%rdx)
-	movq	%r10,16(%rdx)
-	movq	%r11,24(%rdx)
-	movq	%rcx,32(%rdx)
-
-	movq	40(%rdi),%r8
-	movq	48(%rdi),%r9
-	movq	56(%rdi),%r10
-	movq	64(%rdi),%r11
-	movq	72(%rdi),%rcx
-	sbbq	40(%rsi),%r8
-	sbbq	48(%rsi),%r9
-	sbbq	56(%rsi),%r10
-	sbbq	64(%rsi),%r11
-	sbbq	72(%rsi),%rcx
-	movq	%r8,40(%rdx)
-	movq	%r9,48(%rdx)
-	movq	%r10,56(%rdx)
-	movq	%r11,64(%rdx)
-	movq	%rcx,72(%rdx)
-
-	movq	80(%rdi),%r8
-	movq	88(%rdi),%r9
-	movq	96(%rdi),%r10
-	movq	104(%rdi),%r11
-	sbbq	80(%rsi),%r8
-	sbbq	88(%rsi),%r9
-	sbbq	96(%rsi),%r10
-	sbbq	104(%rsi),%r11
-	sbbq	$0x0,%rax
-	movq	%r8,80(%rdx)
-	movq	%r9,88(%rdx)
-	movq	%r10,96(%rdx)
-	movq	%r11,104(%rdx)
-	.byte	0xf3,0xc3
-.cfi_endproc	
-.globl	sike_mpdblsubx2_asm
-.hidden sike_mpdblsubx2_asm
-.type	sike_mpdblsubx2_asm,@function
-sike_mpdblsubx2_asm:
-.cfi_startproc	
-	pushq	%r12
-.cfi_adjust_cfa_offset	8
-.cfi_offset	r12, -16
-	pushq	%r13
-.cfi_adjust_cfa_offset	8
-.cfi_offset	r13, -24
-
-	xorq	%rax,%rax
-
-
-	movq	0(%rdx),%r8
-	movq	8(%rdx),%r9
-	movq	16(%rdx),%r10
-	movq	24(%rdx),%r11
-	movq	32(%rdx),%r12
-	movq	40(%rdx),%r13
-	movq	48(%rdx),%rcx
-	subq	0(%rdi),%r8
-	sbbq	8(%rdi),%r9
-	sbbq	16(%rdi),%r10
-	sbbq	24(%rdi),%r11
-	sbbq	32(%rdi),%r12
-	sbbq	40(%rdi),%r13
-	sbbq	48(%rdi),%rcx
-	adcq	$0x0,%rax
-
-
-	subq	0(%rsi),%r8
-	sbbq	8(%rsi),%r9
-	sbbq	16(%rsi),%r10
-	sbbq	24(%rsi),%r11
-	sbbq	32(%rsi),%r12
-	sbbq	40(%rsi),%r13
-	sbbq	48(%rsi),%rcx
-	adcq	$0x0,%rax
-
-
-	movq	%r8,0(%rdx)
-	movq	%r9,8(%rdx)
-	movq	%r10,16(%rdx)
-	movq	%r11,24(%rdx)
-	movq	%r12,32(%rdx)
-	movq	%r13,40(%rdx)
-	movq	%rcx,48(%rdx)
-
-
-	movq	56(%rdx),%r8
-	movq	64(%rdx),%r9
-	movq	72(%rdx),%r10
-	movq	80(%rdx),%r11
-	movq	88(%rdx),%r12
-	movq	96(%rdx),%r13
-	movq	104(%rdx),%rcx
-
-	subq	%rax,%r8
-	sbbq	56(%rdi),%r8
-	sbbq	64(%rdi),%r9
-	sbbq	72(%rdi),%r10
-	sbbq	80(%rdi),%r11
-	sbbq	88(%rdi),%r12
-	sbbq	96(%rdi),%r13
-	sbbq	104(%rdi),%rcx
-
-
-	subq	56(%rsi),%r8
-	sbbq	64(%rsi),%r9
-	sbbq	72(%rsi),%r10
-	sbbq	80(%rsi),%r11
-	sbbq	88(%rsi),%r12
-	sbbq	96(%rsi),%r13
-	sbbq	104(%rsi),%rcx
-
-
-	movq	%r8,56(%rdx)
-	movq	%r9,64(%rdx)
-	movq	%r10,72(%rdx)
-	movq	%r11,80(%rdx)
-	movq	%r12,88(%rdx)
-	movq	%r13,96(%rdx)
-	movq	%rcx,104(%rdx)
-
-	popq	%r13
-.cfi_adjust_cfa_offset	-8
-	popq	%r12
-.cfi_adjust_cfa_offset	-8
-	.byte	0xf3,0xc3
-.cfi_endproc	
-
-.globl	sike_fprdc
-.hidden sike_fprdc
-.type	sike_fprdc,@function
-sike_fprdc:
-.cfi_startproc	
-	pushq	%r12
-.cfi_adjust_cfa_offset	8
-.cfi_offset	r12, -16
-	pushq	%r13
-.cfi_adjust_cfa_offset	8
-.cfi_offset	r13, -24
-	pushq	%r14
-.cfi_adjust_cfa_offset	8
-.cfi_offset	r14, -32
-	pushq	%r15
-.cfi_adjust_cfa_offset	8
-.cfi_offset	r15, -40
-
-	xorq	%rax,%rax
-	movq	0+0(%rdi),%rdx
-	mulxq	0+.Lp434p1(%rip),%r8,%r9
-	mulxq	8+.Lp434p1(%rip),%r12,%r10
-	mulxq	16+.Lp434p1(%rip),%r13,%r11
-
-	adoxq	%r12,%r9
-	adoxq	%r13,%r10
-
-	mulxq	24+.Lp434p1(%rip),%r13,%r12
-	adoxq	%r13,%r11
-	adoxq	%rax,%r12
-
-	xorq	%rax,%rax
-	movq	0+8(%rdi),%rdx
-	mulxq	0+.Lp434p1(%rip),%r13,%rcx
-	adcxq	%r13,%r9
-	adcxq	%rcx,%r10
-
-	mulxq	8+.Lp434p1(%rip),%rcx,%r13
-	adcxq	%r13,%r11
-	adoxq	%rcx,%r10
-
-	mulxq	16+.Lp434p1(%rip),%rcx,%r13
-	adcxq	%r13,%r12
-	adoxq	%rcx,%r11
-
-	mulxq	24+.Lp434p1(%rip),%rcx,%r13
-	adcxq	%rax,%r13
-	adoxq	%rcx,%r12
-	adoxq	%rax,%r13
-
-	xorq	%rcx,%rcx
-	addq	24(%rdi),%r8
-	adcq	32(%rdi),%r9
-	adcq	40(%rdi),%r10
-	adcq	48(%rdi),%r11
-	adcq	56(%rdi),%r12
-	adcq	64(%rdi),%r13
-	adcq	72(%rdi),%rcx
-	movq	%r8,24(%rdi)
-	movq	%r9,32(%rdi)
-	movq	%r10,40(%rdi)
-	movq	%r11,48(%rdi)
-	movq	%r12,56(%rdi)
-	movq	%r13,64(%rdi)
-	movq	%rcx,72(%rdi)
-	movq	80(%rdi),%r8
-	movq	88(%rdi),%r9
-	movq	96(%rdi),%r10
-	movq	104(%rdi),%r11
-	adcq	$0x0,%r8
-	adcq	$0x0,%r9
-	adcq	$0x0,%r10
-	adcq	$0x0,%r11
-	movq	%r8,80(%rdi)
-	movq	%r9,88(%rdi)
-	movq	%r10,96(%rdi)
-	movq	%r11,104(%rdi)
-
-	xorq	%rax,%rax
-	movq	16+0(%rdi),%rdx
-	mulxq	0+.Lp434p1(%rip),%r8,%r9
-	mulxq	8+.Lp434p1(%rip),%r12,%r10
-	mulxq	16+.Lp434p1(%rip),%r13,%r11
-
-	adoxq	%r12,%r9
-	adoxq	%r13,%r10
-
-	mulxq	24+.Lp434p1(%rip),%r13,%r12
-	adoxq	%r13,%r11
-	adoxq	%rax,%r12
-
-	xorq	%rax,%rax
-	movq	16+8(%rdi),%rdx
-	mulxq	0+.Lp434p1(%rip),%r13,%rcx
-	adcxq	%r13,%r9
-	adcxq	%rcx,%r10
-
-	mulxq	8+.Lp434p1(%rip),%rcx,%r13
-	adcxq	%r13,%r11
-	adoxq	%rcx,%r10
-
-	mulxq	16+.Lp434p1(%rip),%rcx,%r13
-	adcxq	%r13,%r12
-	adoxq	%rcx,%r11
-
-	mulxq	24+.Lp434p1(%rip),%rcx,%r13
-	adcxq	%rax,%r13
-	adoxq	%rcx,%r12
-	adoxq	%rax,%r13
-
-	xorq	%rcx,%rcx
-	addq	40(%rdi),%r8
-	adcq	48(%rdi),%r9
-	adcq	56(%rdi),%r10
-	adcq	64(%rdi),%r11
-	adcq	72(%rdi),%r12
-	adcq	80(%rdi),%r13
-	adcq	88(%rdi),%rcx
-	movq	%r8,40(%rdi)
-	movq	%r9,48(%rdi)
-	movq	%r10,56(%rdi)
-	movq	%r11,64(%rdi)
-	movq	%r12,72(%rdi)
-	movq	%r13,80(%rdi)
-	movq	%rcx,88(%rdi)
-	movq	96(%rdi),%r8
-	movq	104(%rdi),%r9
-	adcq	$0x0,%r8
-	adcq	$0x0,%r9
-	movq	%r8,96(%rdi)
-	movq	%r9,104(%rdi)
-
-	xorq	%rax,%rax
-	movq	32+0(%rdi),%rdx
-	mulxq	0+.Lp434p1(%rip),%r8,%r9
-	mulxq	8+.Lp434p1(%rip),%r12,%r10
-	mulxq	16+.Lp434p1(%rip),%r13,%r11
-
-	adoxq	%r12,%r9
-	adoxq	%r13,%r10
-
-	mulxq	24+.Lp434p1(%rip),%r13,%r12
-	adoxq	%r13,%r11
-	adoxq	%rax,%r12
-
-	xorq	%rax,%rax
-	movq	32+8(%rdi),%rdx
-	mulxq	0+.Lp434p1(%rip),%r13,%rcx
-	adcxq	%r13,%r9
-	adcxq	%rcx,%r10
-
-	mulxq	8+.Lp434p1(%rip),%rcx,%r13
-	adcxq	%r13,%r11
-	adoxq	%rcx,%r10
-
-	mulxq	16+.Lp434p1(%rip),%rcx,%r13
-	adcxq	%r13,%r12
-	adoxq	%rcx,%r11
-
-	mulxq	24+.Lp434p1(%rip),%rcx,%r13
-	adcxq	%rax,%r13
-	adoxq	%rcx,%r12
-	adoxq	%rax,%r13
-
-	xorq	%rcx,%rcx
-	addq	56(%rdi),%r8
-	adcq	64(%rdi),%r9
-	adcq	72(%rdi),%r10
-	adcq	80(%rdi),%r11
-	adcq	88(%rdi),%r12
-	adcq	96(%rdi),%r13
-	adcq	104(%rdi),%rcx
-	movq	%r8,0(%rsi)
-	movq	%r9,8(%rsi)
-	movq	%r10,72(%rdi)
-	movq	%r11,80(%rdi)
-	movq	%r12,88(%rdi)
-	movq	%r13,96(%rdi)
-	movq	%rcx,104(%rdi)
-
-	xorq	%rax,%rax
-	movq	48(%rdi),%rdx
-	mulxq	0+.Lp434p1(%rip),%r8,%r9
-	mulxq	8+.Lp434p1(%rip),%r12,%r10
-	mulxq	16+.Lp434p1(%rip),%r13,%r11
-
-	adoxq	%r12,%r9
-	adoxq	%r13,%r10
-
-	mulxq	24+.Lp434p1(%rip),%r13,%r12
-	adoxq	%r13,%r11
-	adoxq	%rax,%r12
-
-	addq	72(%rdi),%r8
-	adcq	80(%rdi),%r9
-	adcq	88(%rdi),%r10
-	adcq	96(%rdi),%r11
-	adcq	104(%rdi),%r12
-	movq	%r8,16(%rsi)
-	movq	%r9,24(%rsi)
-	movq	%r10,32(%rsi)
-	movq	%r11,40(%rsi)
-	movq	%r12,48(%rsi)
-
-
-	popq	%r15
-.cfi_adjust_cfa_offset	-8
-	popq	%r14
-.cfi_adjust_cfa_offset	-8
-	popq	%r13
-.cfi_adjust_cfa_offset	-8
-	popq	%r12
-.cfi_adjust_cfa_offset	-8
-	.byte	0xf3,0xc3
-.cfi_endproc	
-.globl	sike_mpmul
-.hidden sike_mpmul
-.type	sike_mpmul,@function
-sike_mpmul:
-.cfi_startproc	
-	pushq	%r12
-.cfi_adjust_cfa_offset	8
-.cfi_offset	r12, -16
-	pushq	%r13
-.cfi_adjust_cfa_offset	8
-.cfi_offset	r13, -24
-	pushq	%r14
-.cfi_adjust_cfa_offset	8
-.cfi_offset	r14, -32
-	pushq	%r15
-.cfi_adjust_cfa_offset	8
-.cfi_offset	r15, -40
-
-
-	movq	%rdx,%rcx
-	xorq	%rax,%rax
-
-
-	movq	0(%rdi),%r8
-	movq	8(%rdi),%r9
-	movq	16(%rdi),%r10
-	movq	24(%rdi),%r11
-
-	pushq	%rbx
-.cfi_adjust_cfa_offset	8
-.cfi_offset	rbx, -48
-	pushq	%rbp
-.cfi_offset	rbp, -56
-.cfi_adjust_cfa_offset	8
-	subq	$96,%rsp
-.cfi_adjust_cfa_offset	96
-
-	addq	32(%rdi),%r8
-	adcq	40(%rdi),%r9
-	adcq	48(%rdi),%r10
-	adcq	$0x0,%r11
-	sbbq	$0x0,%rax
-	movq	%r8,0(%rsp)
-	movq	%r9,8(%rsp)
-	movq	%r10,16(%rsp)
-	movq	%r11,24(%rsp)
-
-
-	xorq	%rbx,%rbx
-	movq	0(%rsi),%r12
-	movq	8(%rsi),%r13
-	movq	16(%rsi),%r14
-	movq	24(%rsi),%r15
-	addq	32(%rsi),%r12
-	adcq	40(%rsi),%r13
-	adcq	48(%rsi),%r14
-	adcq	$0x0,%r15
-	sbbq	$0x0,%rbx
-	movq	%r12,32(%rsp)
-	movq	%r13,40(%rsp)
-	movq	%r14,48(%rsp)
-	movq	%r15,56(%rsp)
-
-
-	andq	%rax,%r12
-	andq	%rax,%r13
-	andq	%rax,%r14
-	andq	%rax,%r15
-
-
-	andq	%rbx,%r8
-	andq	%rbx,%r9
-	andq	%rbx,%r10
-	andq	%rbx,%r11
-
-
-	addq	%r12,%r8
-	adcq	%r13,%r9
-	adcq	%r14,%r10
-	adcq	%r15,%r11
-	movq	%r8,64(%rsp)
-	movq	%r9,72(%rsp)
-	movq	%r10,80(%rsp)
-	movq	%r11,88(%rsp)
-
-
-	movq	0+0(%rsp),%rdx
-	mulxq	32+0(%rsp),%r9,%r8
-	movq	%r9,0+0(%rsp)
-	mulxq	32+8(%rsp),%r10,%r9
-	xorq	%rax,%rax
-	adoxq	%r10,%r8
-	mulxq	32+16(%rsp),%r11,%r10
-	adoxq	%r11,%r9
-	mulxq	32+24(%rsp),%r12,%r11
-	adoxq	%r12,%r10
-
-	movq	0+8(%rsp),%rdx
-	mulxq	32+0(%rsp),%r12,%r13
-	adoxq	%rax,%r11
-	xorq	%rax,%rax
-	mulxq	32+8(%rsp),%r15,%r14
-	adoxq	%r8,%r12
-	movq	%r12,0+8(%rsp)
-	adcxq	%r15,%r13
-	mulxq	32+16(%rsp),%rbx,%r15
-	adcxq	%rbx,%r14
-	adoxq	%r9,%r13
-	mulxq	32+24(%rsp),%rbp,%rbx
-	adcxq	%rbp,%r15
-	adcxq	%rax,%rbx
-	adoxq	%r10,%r14
-
-	movq	0+16(%rsp),%rdx
-	mulxq	32+0(%rsp),%r8,%r9
-	adoxq	%r11,%r15
-	adoxq	%rax,%rbx
-	xorq	%rax,%rax
-	mulxq	32+8(%rsp),%r11,%r10
-	adoxq	%r13,%r8
-	movq	%r8,0+16(%rsp)
-	adcxq	%r11,%r9
-	mulxq	32+16(%rsp),%r12,%r11
-	adcxq	%r12,%r10
-	adoxq	%r14,%r9
-	mulxq	32+24(%rsp),%rbp,%r12
-	adcxq	%rbp,%r11
-	adcxq	%rax,%r12
-
-	adoxq	%r15,%r10
-	adoxq	%rbx,%r11
-	adoxq	%rax,%r12
-
-	movq	0+24(%rsp),%rdx
-	mulxq	32+0(%rsp),%r8,%r13
-	xorq	%rax,%rax
-	mulxq	32+8(%rsp),%r15,%r14
-	adcxq	%r15,%r13
-	adoxq	%r8,%r9
-	mulxq	32+16(%rsp),%rbx,%r15
-	adcxq	%rbx,%r14
-	adoxq	%r13,%r10
-	mulxq	32+24(%rsp),%rbp,%rbx
-	adcxq	%rbp,%r15
-	adcxq	%rax,%rbx
-	adoxq	%r14,%r11
-	adoxq	%r15,%r12
-	adoxq	%rax,%rbx
-	movq	%r9,0+24(%rsp)
-	movq	%r10,0+32(%rsp)
-	movq	%r11,0+40(%rsp)
-	movq	%r12,0+48(%rsp)
-	movq	%rbx,0+56(%rsp)
-
-
-
-	movq	0+0(%rdi),%rdx
-	mulxq	0+0(%rsi),%r9,%r8
-	movq	%r9,0+0(%rcx)
-	mulxq	0+8(%rsi),%r10,%r9
-	xorq	%rax,%rax
-	adoxq	%r10,%r8
-	mulxq	0+16(%rsi),%r11,%r10
-	adoxq	%r11,%r9
-	mulxq	0+24(%rsi),%r12,%r11
-	adoxq	%r12,%r10
-
-	movq	0+8(%rdi),%rdx
-	mulxq	0+0(%rsi),%r12,%r13
-	adoxq	%rax,%r11
-	xorq	%rax,%rax
-	mulxq	0+8(%rsi),%r15,%r14
-	adoxq	%r8,%r12
-	movq	%r12,0+8(%rcx)
-	adcxq	%r15,%r13
-	mulxq	0+16(%rsi),%rbx,%r15
-	adcxq	%rbx,%r14
-	adoxq	%r9,%r13
-	mulxq	0+24(%rsi),%rbp,%rbx
-	adcxq	%rbp,%r15
-	adcxq	%rax,%rbx
-	adoxq	%r10,%r14
-
-	movq	0+16(%rdi),%rdx
-	mulxq	0+0(%rsi),%r8,%r9
-	adoxq	%r11,%r15
-	adoxq	%rax,%rbx
-	xorq	%rax,%rax
-	mulxq	0+8(%rsi),%r11,%r10
-	adoxq	%r13,%r8
-	movq	%r8,0+16(%rcx)
-	adcxq	%r11,%r9
-	mulxq	0+16(%rsi),%r12,%r11
-	adcxq	%r12,%r10
-	adoxq	%r14,%r9
-	mulxq	0+24(%rsi),%rbp,%r12
-	adcxq	%rbp,%r11
-	adcxq	%rax,%r12
-
-	adoxq	%r15,%r10
-	adoxq	%rbx,%r11
-	adoxq	%rax,%r12
-
-	movq	0+24(%rdi),%rdx
-	mulxq	0+0(%rsi),%r8,%r13
-	xorq	%rax,%rax
-	mulxq	0+8(%rsi),%r15,%r14
-	adcxq	%r15,%r13
-	adoxq	%r8,%r9
-	mulxq	0+16(%rsi),%rbx,%r15
-	adcxq	%rbx,%r14
-	adoxq	%r13,%r10
-	mulxq	0+24(%rsi),%rbp,%rbx
-	adcxq	%rbp,%r15
-	adcxq	%rax,%rbx
-	adoxq	%r14,%r11
-	adoxq	%r15,%r12
-	adoxq	%rax,%rbx
-	movq	%r9,0+24(%rcx)
-	movq	%r10,0+32(%rcx)
-	movq	%r11,0+40(%rcx)
-	movq	%r12,0+48(%rcx)
-	movq	%rbx,0+56(%rcx)
-
-
-
-	movq	32+0(%rdi),%rdx
-	mulxq	32+0(%rsi),%r9,%r8
-	movq	%r9,64+0(%rcx)
-	mulxq	32+8(%rsi),%r10,%r9
-	xorq	%rax,%rax
-	adoxq	%r10,%r8
-	mulxq	32+16(%rsi),%r11,%r10
-	adoxq	%r11,%r9
-
-	movq	32+8(%rdi),%rdx
-	mulxq	32+0(%rsi),%r12,%r11
-	adoxq	%rax,%r10
-	xorq	%rax,%rax
-
-	mulxq	32+8(%rsi),%r14,%r13
-	adoxq	%r8,%r12
-	movq	%r12,64+8(%rcx)
-	adcxq	%r14,%r11
-
-	mulxq	32+16(%rsi),%r8,%r14
-	adoxq	%r9,%r11
-	adcxq	%r8,%r13
-	adcxq	%rax,%r14
-	adoxq	%r10,%r13
-
-	movq	32+16(%rdi),%rdx
-	mulxq	32+0(%rsi),%r8,%r9
-	adoxq	%rax,%r14
-	xorq	%rax,%rax
-
-	mulxq	32+8(%rsi),%r10,%r12
-	adoxq	%r11,%r8
-	movq	%r8,64+16(%rcx)
-	adcxq	%r13,%r9
-
-	mulxq	32+16(%rsi),%r11,%r8
-	adcxq	%r14,%r12
-	adcxq	%rax,%r8
-	adoxq	%r10,%r9
-	adoxq	%r12,%r11
-	adoxq	%rax,%r8
-	movq	%r9,64+24(%rcx)
-	movq	%r11,64+32(%rcx)
-	movq	%r8,64+40(%rcx)
-
-
-
-
-	movq	64(%rsp),%r8
-	movq	72(%rsp),%r9
-	movq	80(%rsp),%r10
-	movq	88(%rsp),%r11
-
-	movq	32(%rsp),%rax
-	addq	%rax,%r8
-	movq	40(%rsp),%rax
-	adcq	%rax,%r9
-	movq	48(%rsp),%rax
-	adcq	%rax,%r10
-	movq	56(%rsp),%rax
-	adcq	%rax,%r11
-
-
-	movq	0(%rsp),%r12
-	movq	8(%rsp),%r13
-	movq	16(%rsp),%r14
-	movq	24(%rsp),%r15
-	subq	0(%rcx),%r12
-	sbbq	8(%rcx),%r13
-	sbbq	16(%rcx),%r14
-	sbbq	24(%rcx),%r15
-	sbbq	32(%rcx),%r8
-	sbbq	40(%rcx),%r9
-	sbbq	48(%rcx),%r10
-	sbbq	56(%rcx),%r11
-
-
-	subq	64(%rcx),%r12
-	sbbq	72(%rcx),%r13
-	sbbq	80(%rcx),%r14
-	sbbq	88(%rcx),%r15
-	sbbq	96(%rcx),%r8
-	sbbq	104(%rcx),%r9
-	sbbq	$0x0,%r10
-	sbbq	$0x0,%r11
-
-	addq	32(%rcx),%r12
-	movq	%r12,32(%rcx)
-	adcq	40(%rcx),%r13
-	movq	%r13,40(%rcx)
-	adcq	48(%rcx),%r14
-	movq	%r14,48(%rcx)
-	adcq	56(%rcx),%r15
-	movq	%r15,56(%rcx)
-	adcq	64(%rcx),%r8
-	movq	%r8,64(%rcx)
-	adcq	72(%rcx),%r9
-	movq	%r9,72(%rcx)
-	adcq	80(%rcx),%r10
-	movq	%r10,80(%rcx)
-	adcq	88(%rcx),%r11
-	movq	%r11,88(%rcx)
-	movq	96(%rcx),%r12
-	adcq	$0x0,%r12
-	movq	%r12,96(%rcx)
-	movq	104(%rcx),%r13
-	adcq	$0x0,%r13
-	movq	%r13,104(%rcx)
-
-	addq	$96,%rsp
-.cfi_adjust_cfa_offset	-96
-	popq	%rbp
-.cfi_adjust_cfa_offset	-8
-.cfi_same_value	rbp
-	popq	%rbx
-.cfi_adjust_cfa_offset	-8
-.cfi_same_value	rbx
-
-
-	popq	%r15
-.cfi_adjust_cfa_offset	-8
-	popq	%r14
-.cfi_adjust_cfa_offset	-8
-	popq	%r13
-.cfi_adjust_cfa_offset	-8
-	popq	%r12
-.cfi_adjust_cfa_offset	-8
-	.byte	0xf3,0xc3
-.cfi_endproc	
-#endif
diff --git a/src/kem/sike/sike-p434-sha256/asm/fp_generic.c b/src/kem/sike/sike-p434-sha256/asm/fp_generic.c
deleted file mode 100644
index 38e7645e..00000000
--- a/src/kem/sike/sike-p434-sha256/asm/fp_generic.c
+++ /dev/null
@@ -1,179 +0,0 @@
-/********************************************************************************************
-* SIDH: an efficient supersingular isogeny cryptography library
-*
-* Abstract: portable modular arithmetic for P503
-*********************************************************************************************/
-
-#if defined(ARCH_GENERIC) || \
-    (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64))
-
-#include "../utils.h"
-#include "../fpx.h"
-
-// Global constants
-extern const struct params_t params;
-
-static void digit_x_digit(const crypto_word_t a, const crypto_word_t b, crypto_word_t* c)
-{ // Digit multiplication, digit * digit -> 2-digit result
-    crypto_word_t al, ah, bl, bh, temp;
-    crypto_word_t albl, albh, ahbl, ahbh, res1, res2, res3, carry;
-    crypto_word_t mask_low = (crypto_word_t)(-1) >> (sizeof(crypto_word_t)*4);
-    crypto_word_t mask_high = (crypto_word_t)(-1) << (sizeof(crypto_word_t)*4);
-
-    al = a & mask_low;                              // Low part
-    ah = a >> (sizeof(crypto_word_t) * 4);          // High part
-    bl = b & mask_low;
-    bh = b >> (sizeof(crypto_word_t) * 4);
-
-    albl = al*bl;
-    albh = al*bh;
-    ahbl = ah*bl;
-    ahbh = ah*bh;
-    c[0] = albl & mask_low;                         // C00
-
-    res1 = albl >> (sizeof(crypto_word_t) * 4);
-    res2 = ahbl & mask_low;
-    res3 = albh & mask_low;
-    temp = res1 + res2 + res3;
-    carry = temp >> (sizeof(crypto_word_t) * 4);
-    c[0] ^= temp << (sizeof(crypto_word_t) * 4);    // C01
-
-    res1 = ahbl >> (sizeof(crypto_word_t) * 4);
-    res2 = albh >> (sizeof(crypto_word_t) * 4);
-    res3 = ahbh & mask_low;
-    temp = res1 + res2 + res3 + carry;
-    c[1] = temp & mask_low;                         // C10
-    carry = temp & mask_high;
-    c[1] ^= (ahbh & mask_high) + carry;             // C11
-}
-
-void sike_fpadd(const felm_t a, const felm_t b, felm_t c)
-{ // Modular addition, c = a+b mod p434.
-  // Inputs: a, b in [0, 2*p434-1]
-  // Output: c in [0, 2*p434-1]
-    unsigned int i, carry = 0;
-    crypto_word_t mask;
-
-    for (i = 0; i < NWORDS_FIELD; i++) {
-        ADDC(carry, a[i], b[i], carry, c[i]);
-    }
-
-    carry = 0;
-    for (i = 0; i < NWORDS_FIELD; i++) {
-        SUBC(carry, c[i], params.prime_x2[i], carry, c[i]);
-    }
-    mask = 0 - (crypto_word_t)carry;
-
-    carry = 0;
-    for (i = 0; i < NWORDS_FIELD; i++) {
-        ADDC(carry, c[i], params.prime_x2[i] & mask, carry, c[i]);
-    }
-}
-
-void sike_fpsub(const felm_t a, const felm_t b, felm_t c)
-{ // Modular subtraction, c = a-b mod p434.
-  // Inputs: a, b in [0, 2*p434-1]
-  // Output: c in [0, 2*p434-1]
-    unsigned int i, borrow = 0;
-    crypto_word_t mask;
-
-    for (i = 0; i < NWORDS_FIELD; i++) {
-        SUBC(borrow, a[i], b[i], borrow, c[i]);
-    }
-    mask = 0 - (crypto_word_t)borrow;
-
-    borrow = 0;
-    for (i = 0; i < NWORDS_FIELD; i++) {
-        ADDC(borrow, c[i], params.prime_x2[i] & mask, borrow, c[i]);
-    }
-}
-
-void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c)
-{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = NWORDS_FIELD.
-    unsigned int i, j;
-    crypto_word_t t = 0, u = 0, v = 0, UV[2];
-    unsigned int carry = 0;
-
-    for (i = 0; i < NWORDS_FIELD; i++) {
-        for (j = 0; j <= i; j++) {
-            MUL(a[j], b[i-j], UV+1, UV[0]);
-            ADDC(0, UV[0], v, carry, v);
-            ADDC(carry, UV[1], u, carry, u);
-            t += carry;
-        }
-        c[i] = v;
-        v = u;
-        u = t;
-        t = 0;
-    }
-
-    for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) {
-        for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) {
-            MUL(a[j], b[i-j], UV+1, UV[0]);
-            ADDC(0, UV[0], v, carry, v);
-            ADDC(carry, UV[1], u, carry, u);
-            t += carry;
-        }
-        c[i] = v;
-        v = u;
-        u = t;
-        t = 0;
-    }
-    c[2*NWORDS_FIELD-1] = v;
-}
-
-void sike_fprdc(const felm_t ma, felm_t mc)
-{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p434.
-  // mc = ma*R^-1 mod p434x2, where R = 2^448.
-  // If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1].
-  // ma is assumed to be in Montgomery representation.
-    unsigned int i, j, carry, count = ZERO_WORDS;
-    crypto_word_t UV[2], t = 0, u = 0, v = 0;
-
-    for (i = 0; i < NWORDS_FIELD; i++) {
-        mc[i] = 0;
-    }
-
-    for (i = 0; i < NWORDS_FIELD; i++) {
-        for (j = 0; j < i; j++) {
-            if (j < (i-ZERO_WORDS+1)) {
-                MUL(mc[j], params.prime_p1[i-j], UV+1, UV[0]);
-                ADDC(0, UV[0], v, carry, v);
-                ADDC(carry, UV[1], u, carry, u);
-                t += carry;
-            }
-        }
-        ADDC(0, v, ma[i], carry, v);
-        ADDC(carry, u, 0, carry, u);
-        t += carry;
-        mc[i] = v;
-        v = u;
-        u = t;
-        t = 0;
-    }
-
-    for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) {
-        if (count > 0) {
-            count -= 1;
-        }
-        for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) {
-            if (j < (NWORDS_FIELD-count)) {
-                MUL(mc[j], params.prime_p1[i-j], UV+1, UV[0]);
-                ADDC(0, UV[0], v, carry, v);
-                ADDC(carry, UV[1], u, carry, u);
-                t += carry;
-            }
-        }
-        ADDC(0, v, ma[i], carry, v);
-        ADDC(carry, u, 0, carry, u);
-        t += carry;
-        mc[i-NWORDS_FIELD] = v;
-        v = u;
-        u = t;
-        t = 0;
-    }
-    ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v);
-    mc[NWORDS_FIELD-1] = v;
-}
-
-#endif  // NO_ASM || (!X86_64 && !AARCH64)
diff --git a/src/kem/sike/sike-p434-sha256/fpx.c b/src/kem/sike/sike-p434-sha256/fpx.c
deleted file mode 100644
index 30233406..00000000
--- a/src/kem/sike/sike-p434-sha256/fpx.c
+++ /dev/null
@@ -1,282 +0,0 @@
-/********************************************************************************************
-* SIDH: an efficient supersingular isogeny cryptography library
-*
-* Abstract: core functions over GF(p) and GF(p^2)
-*********************************************************************************************/
-#include <stddef.h>
-#include "utils.h"
-#include "fpx.h"
-
-extern const struct params_t params;
-
-// Multiprecision squaring, c = a^2 mod p.
-static void fpsqr_mont(const felm_t ma, felm_t mc)
-{
-    dfelm_t temp = {0};
-    sike_mpmul(ma, ma, temp);
-    sike_fprdc(temp, mc);
-}
-
-// Chain to compute a^(p-3)/4 using Montgomery arithmetic.
-static void fpinv_chain_mont(felm_t a)
-{
-    unsigned int i, j;
-    felm_t t[31], tt;
-
-    // Precomputed table
-    fpsqr_mont(a, tt);
-    sike_fpmul_mont(a, tt, t[0]);
-    for (i = 0; i <= 29; i++) sike_fpmul_mont(t[i], tt, t[i+1]);
-
-    sike_fpcopy(a, tt);
-    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[5], tt, tt);
-    for (i = 0; i < 10; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[14], tt, tt);
-    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[3], tt, tt);
-    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[23], tt, tt);
-    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[13], tt, tt);
-    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[24], tt, tt);
-    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[7], tt, tt);
-    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[12], tt, tt);
-    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[30], tt, tt);
-    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[1], tt, tt);
-    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[30], tt, tt);
-    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[21], tt, tt);
-    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[2], tt, tt);
-    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[19], tt, tt);
-    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[1], tt, tt);
-    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[24], tt, tt);
-    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[26], tt, tt);
-    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[16], tt, tt);
-    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[10], tt, tt);
-    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[6], tt, tt);
-    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[0], tt, tt);
-    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[20], tt, tt);
-    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[9], tt, tt);
-    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[25], tt, tt);
-    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[30], tt, tt);
-    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[26], tt, tt);
-    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(a, tt, tt);
-    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[28], tt, tt);
-    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[6], tt, tt);
-    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[10], tt, tt);
-    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
-    sike_fpmul_mont(t[22], tt, tt);
-    for (j = 0; j < 35; j++) {
-        for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
-        sike_fpmul_mont(t[30], tt, tt);
-    }
-    sike_fpcopy(tt, a);
-}
-
-// Field inversion using Montgomery arithmetic, a = a^(-1)*R mod p.
-static void fpinv_mont(felm_t a)
-{
-    felm_t tt = {0};
-    sike_fpcopy(a, tt);
-    fpinv_chain_mont(tt);
-    fpsqr_mont(tt, tt);
-    fpsqr_mont(tt, tt);
-    sike_fpmul_mont(a, tt, a);
-}
-
-// Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit.
-#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64))
-inline static unsigned int mp_add(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) {
-    uint8_t carry = 0;
-    for (size_t i = 0; i < nwords; i++) {
-        ADDC(carry, a[i], b[i], carry, c[i]);
-    }
-    return carry;
-}
-
-// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit.
-inline static unsigned int mp_sub(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) {
-    uint32_t borrow = 0;
-    for (size_t i = 0; i < nwords; i++) {
-        SUBC(borrow, a[i], b[i], borrow, c[i]);
-    }
-    return borrow;
-}
-#endif
-
-// Multiprecision addition, c = a+b.
-inline static void mp_addfast(const felm_t a, const felm_t b, felm_t c)
-{
-#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64))
-    mp_add(a, b, c, NWORDS_FIELD);
-#else
-    sike_mpadd_asm(a, b, c);
-#endif
-}
-
-// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD.
-// If c < 0 then returns mask = 0xFF..F, else mask = 0x00..0
-inline static crypto_word_t mp_subfast(const dfelm_t a, const dfelm_t b, dfelm_t c) {
-#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64))
-    return (0 - (crypto_word_t)mp_sub(a, b, c, 2*NWORDS_FIELD));
-#else
-    return sike_mpsubx2_asm(a, b, c);
-#endif
-}
-
-// Multiprecision subtraction, c = c-a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD.
-// Inputs should be s.t. c > a and c > b
-inline static void mp_dblsubfast(const dfelm_t a, const dfelm_t b, dfelm_t c) {
-#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64))
-    mp_sub(c, a, c, 2*NWORDS_FIELD);
-    mp_sub(c, b, c, 2*NWORDS_FIELD);
-#else
-    sike_mpdblsubx2_asm(a, b, c);
-#endif
-}
-
-// Copy a field element, c = a.
-void sike_fpcopy(const felm_t a, felm_t c) {
-    for (size_t i = 0; i < NWORDS_FIELD; i++) {
-        c[i] = a[i];
-    }
-}
-
-// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod prime, where R=2^768
-void sike_fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc)
-{
-    dfelm_t temp = {0};
-    sike_mpmul(ma, mb, temp);
-    sike_fprdc(temp, mc);
-}
-
-// Conversion from Montgomery representation to standard representation,
-// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1].
-void sike_from_mont(const felm_t ma, felm_t c)
-{
-    felm_t one = {0};
-    one[0] = 1;
-
-    sike_fpmul_mont(ma, one, c);
-    sike_fpcorrection(c);
-}
-
-// GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2).
-// Inputs: a = a0+a1*i, where a0, a1 are in [0, 2*p-1]
-// Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1]
-void sike_fp2sqr_mont(const f2elm_t a, f2elm_t c) {
-    felm_t t1, t2, t3;
-
-    mp_addfast(a->c0, a->c1, t1);                      // t1 = a0+a1
-    sike_fpsub(a->c0, a->c1, t2);                      // t2 = a0-a1
-    mp_addfast(a->c0, a->c0, t3);                      // t3 = 2a0
-    sike_fpmul_mont(t1, t2, c->c0);                    // c0 = (a0+a1)(a0-a1)
-    sike_fpmul_mont(t3, a->c1, c->c1);                 // c1 = 2a0*a1
-}
-
-// Modular negation, a = -a mod p503.
-// Input/output: a in [0, 2*p503-1]
-void sike_fpneg(felm_t a) {
-  uint32_t borrow = 0;
-  for (size_t i = 0; i < NWORDS_FIELD; i++) {
-    SUBC(borrow, params.prime_x2[i], a[i], borrow, a[i]);
-  }
-}
-
-// Modular division by two, c = a/2 mod p503.
-// Input : a in [0, 2*p503-1]
-// Output: c in [0, 2*p503-1]
-void sike_fpdiv2(const felm_t a, felm_t c) {
-  uint32_t carry = 0;
-  crypto_word_t mask;
-
-  mask = 0 - (crypto_word_t)(a[0] & 1);    // If a is odd compute a+p503
-  for (size_t i = 0; i < NWORDS_FIELD; i++) {
-    ADDC(carry, a[i], params.prime[i] & mask, carry, c[i]);
-  }
-
-  // Multiprecision right shift by one.
-  for (size_t i = 0; i < NWORDS_FIELD-1; i++) {
-    c[i] = (c[i] >> 1) ^ (c[i+1] << (RADIX - 1));
-  }
-  c[NWORDS_FIELD-1] >>= 1;
-}
-
-// Modular correction to reduce field element a in [0, 2*p503-1] to [0, p503-1].
-void sike_fpcorrection(felm_t a) {
-  uint32_t borrow = 0;
-  crypto_word_t mask;
-
-  for (size_t i = 0; i < NWORDS_FIELD; i++) {
-    SUBC(borrow, a[i], params.prime[i], borrow, a[i]);
-  }
-  mask = 0 - (crypto_word_t)borrow;
-
-  borrow = 0;
-  for (size_t i = 0; i < NWORDS_FIELD; i++) {
-    ADDC(borrow, a[i], params.prime[i] & mask, borrow, a[i]);
-  }
-}
-
-// GF(p^2) multiplication using Montgomery arithmetic, c = a*b in GF(p^2).
-// Inputs: a = a0+a1*i and b = b0+b1*i, where a0, a1, b0, b1 are in [0, 2*p-1]
-// Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1]
-void sike_fp2mul_mont(const f2elm_t a, const f2elm_t b, f2elm_t c) {
-    felm_t t1, t2;
-    dfelm_t tt1, tt2, tt3;
-    crypto_word_t mask;
-
-    mp_addfast(a->c0, a->c1, t1);                      // t1 = a0+a1
-    mp_addfast(b->c0, b->c1, t2);                      // t2 = b0+b1
-    sike_mpmul(a->c0, b->c0, tt1);                     // tt1 = a0*b0
-    sike_mpmul(a->c1, b->c1, tt2);                     // tt2 = a1*b1
-    sike_mpmul(t1, t2, tt3);                           // tt3 = (a0+a1)*(b0+b1)
-    mp_dblsubfast(tt1, tt2, tt3);                      // tt3 = (a0+a1)*(b0+b1) - a0*b0 - a1*b1
-    mask = mp_subfast(tt1, tt2, tt1);                  // tt1 = a0*b0 - a1*b1. If tt1 < 0 then mask = 0xFF..F, else if tt1 >= 0 then mask = 0x00..0
-
-    for (size_t i = 0; i < NWORDS_FIELD; i++) {
-        t1[i] = params.prime[i] & mask;
-    }
-
-    sike_fprdc(tt3, c->c1);                             // c[1] = (a0+a1)*(b0+b1) - a0*b0 - a1*b1
-    mp_addfast(&tt1[NWORDS_FIELD], t1, &tt1[NWORDS_FIELD]);
-    sike_fprdc(tt1, c->c0);                             // c[0] = a0*b0 - a1*b1
-}
-
-// GF(p^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2).
-void sike_fp2inv_mont(f2elm_t a) {
-    f2elm_t t1;
-
-    fpsqr_mont(a->c0, t1->c0);                         // t10 = a0^2
-    fpsqr_mont(a->c1, t1->c1);                         // t11 = a1^2
-    sike_fpadd(t1->c0, t1->c1, t1->c0);                // t10 = a0^2+a1^2
-    fpinv_mont(t1->c0);                                // t10 = (a0^2+a1^2)^-1
-    sike_fpneg(a->c1);                                 // a = a0-i*a1
-    sike_fpmul_mont(a->c0, t1->c0, a->c0);
-    sike_fpmul_mont(a->c1, t1->c0, a->c1);             // a = (a0-i*a1)*(a0^2+a1^2)^-1
-}
diff --git a/src/kem/sike/sike-p434-sha256/fpx.h b/src/kem/sike/sike-p434-sha256/fpx.h
deleted file mode 100644
index b9255ac7..00000000
--- a/src/kem/sike/sike-p434-sha256/fpx.h
+++ /dev/null
@@ -1,112 +0,0 @@
-#ifndef FPX_H_
-#define FPX_H_
-
-#include "utils.h"
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-// Modular addition, c = a+b mod p.
-void sike_fpadd(const felm_t a, const felm_t b, felm_t c);
-// Modular subtraction, c = a-b mod p.
-void sike_fpsub(const felm_t a, const felm_t b, felm_t c);
-// Modular division by two, c = a/2 mod p.
-void sike_fpdiv2(const felm_t a, felm_t c);
-// Modular correction to reduce field element a in [0, 2*p-1] to [0, p-1].
-void sike_fpcorrection(felm_t a);
-// Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords.
-void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c);
-// 443-bit Montgomery reduction, c = a mod p
-void sike_fprdc(const dfelm_t a, felm_t c);
-// Double 2x443-bit multiprecision subtraction, c = c-a-b
-void sike_mpdblsubx2_asm(const felm_t a, const felm_t b, felm_t c);
-// Multiprecision subtraction, c = a-b
-crypto_word_t sike_mpsubx2_asm(const dfelm_t a, const dfelm_t b, dfelm_t c);
-// 443-bit multiprecision addition, c = a+b
-void sike_mpadd_asm(const felm_t a, const felm_t b, felm_t c);
-// Modular negation, a = -a mod p.
-void sike_fpneg(felm_t a);
-// Copy of a field element, c = a
-void sike_fpcopy(const felm_t a, felm_t c);
-// Copy a field element, c = a.
-void sike_fpzero(felm_t a);
-// If option = 0xFF...FF x=y; y=x, otherwise swap doesn't happen. Constant time.
-void sike_cswap_asm(point_proj_t x, point_proj_t y, const crypto_word_t option);
-// Conversion from Montgomery representation to standard representation,
-// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1].
-void sike_from_mont(const felm_t ma, felm_t c);
-// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p443, where R=2^768
-void sike_fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc);
-// GF(p443^2) multiplication using Montgomery arithmetic, c = a*b in GF(p443^2)
-void sike_fp2mul_mont(const f2elm_t a, const f2elm_t b, f2elm_t c);
-// GF(p443^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2)
-void sike_fp2inv_mont(f2elm_t a);
-// GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2).
-void sike_fp2sqr_mont(const f2elm_t a, f2elm_t c);
-// Modular correction, a = a in GF(p^2).
-void sike_fp2correction(f2elm_t a);
-
-#if defined(__cplusplus)
-}  // extern C
-#endif
-
-// GF(p^2) addition, c = a+b in GF(p^2).
-#define sike_fp2add(a, b, c)             \
-do {                                     \
-    sike_fpadd(a->c0, b->c0, c->c0);     \
-    sike_fpadd(a->c1, b->c1, c->c1);     \
-} while(0)
-
-// GF(p^2) subtraction, c = a-b in GF(p^2).
-#define sike_fp2sub(a,b,c)               \
-do {                                     \
-    sike_fpsub(a->c0, b->c0, c->c0);     \
-    sike_fpsub(a->c1, b->c1, c->c1);     \
-} while(0)
-
-// Copy a GF(p^2) element, c = a.
-#define sike_fp2copy(a, c)               \
-do {                                     \
-    sike_fpcopy(a->c0, c->c0);           \
-    sike_fpcopy(a->c1, c->c1);           \
-} while(0)
-
-// GF(p^2) negation, a = -a in GF(p^2).
-#define sike_fp2neg(a)                   \
-do {                                     \
-    sike_fpneg(a->c0);                   \
-    sike_fpneg(a->c1);                   \
-} while(0)
-
-// GF(p^2) division by two, c = a/2  in GF(p^2).
-#define sike_fp2div2(a, c)               \
-do {                                     \
-    sike_fpdiv2(a->c0, c->c0);           \
-    sike_fpdiv2(a->c1, c->c1);           \
-} while(0)
-
-// Modular correction, a = a in GF(p^2).
-#define sike_fp2correction(a)            \
-do {                                     \
-    sike_fpcorrection(a->c0);            \
-    sike_fpcorrection(a->c1);            \
-} while(0)
-
-// Conversion of a GF(p^2) element to Montgomery representation,
-// mc_i = a_i*R^2*R^(-1) = a_i*R in GF(p^2).
-#define sike_to_fp2mont(a, mc)           \
-do {                                     \
-    sike_fpmul_mont(a->c0, params.mont_R2, mc->c0);   \
-    sike_fpmul_mont(a->c1, params.mont_R2, mc->c1);   \
-} while(0)
-
-// Conversion of a GF(p^2) element from Montgomery representation to standard representation,
-// c_i = ma_i*R^(-1) = a_i in GF(p^2).
-#define sike_from_fp2mont(ma, c)         \
-do {                                     \
-    sike_from_mont(ma->c0, c->c0);       \
-    sike_from_mont(ma->c1, c->c1);       \
-} while(0)
-
-#endif // FPX_H_
diff --git a/src/kem/sike/sike-p434-sha256/isogeny.c b/src/kem/sike/sike-p434-sha256/isogeny.c
deleted file mode 100644
index 661410e4..00000000
--- a/src/kem/sike/sike-p434-sha256/isogeny.c
+++ /dev/null
@@ -1,262 +0,0 @@
-/********************************************************************************************
-* SIDH: an efficient supersingular isogeny cryptography library
-*
-* Abstract: elliptic curve and isogeny functions
-*********************************************************************************************/
-#include <stddef.h>
-#include <string.h>
-#include "utils.h"
-#include "isogeny.h"
-#include "fpx.h"
-
-static void xDBL(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24)
-{ // Doubling of a Montgomery point in projective coordinates (X:Z).
-  // Input: projective Montgomery x-coordinates P = (X1:Z1), where x1=X1/Z1 and Montgomery curve constants A+2C and 4C.
-  // Output: projective Montgomery x-coordinates Q = 2*P = (X2:Z2).
-    f2elm_t t0, t1;
-
-    sike_fp2sub(P->X, P->Z, t0);                         // t0 = X1-Z1
-    sike_fp2add(P->X, P->Z, t1);                         // t1 = X1+Z1
-    sike_fp2sqr_mont(t0, t0);                            // t0 = (X1-Z1)^2
-    sike_fp2sqr_mont(t1, t1);                            // t1 = (X1+Z1)^2
-    sike_fp2mul_mont(C24, t0, Q->Z);                     // Z2 = C24*(X1-Z1)^2
-    sike_fp2mul_mont(t1, Q->Z, Q->X);                    // X2 = C24*(X1-Z1)^2*(X1+Z1)^2
-    sike_fp2sub(t1, t0, t1);                             // t1 = (X1+Z1)^2-(X1-Z1)^2
-    sike_fp2mul_mont(A24plus, t1, t0);                   // t0 = A24plus*[(X1+Z1)^2-(X1-Z1)^2]
-    sike_fp2add(Q->Z, t0, Q->Z);                         // Z2 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2
-    sike_fp2mul_mont(Q->Z, t1, Q->Z);                    // Z2 = [A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2]*[(X1+Z1)^2-(X1-Z1)^2]
-}
-
-void xDBLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24, size_t e)
-{ // Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings.
-  // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A+2C and 4C.
-  // Output: projective Montgomery x-coordinates Q <- (2^e)*P.
-
-    memmove(Q, P, sizeof(*P));
-    for (size_t i = 0; i < e; i++) {
-        xDBL(Q, Q, A24plus, C24);
-    }
-}
-
-void get_4_isog(const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t* coeff)
-{ // Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4.
-  // Input:  projective point of order four P = (X4:Z4).
-  // Output: the 4-isogenous Montgomery curve with projective coefficients A+2C/4C and the 3 coefficients
-  //         that are used to evaluate the isogeny at a point in eval_4_isog().
-
-    sike_fp2sub(P->X, P->Z, coeff[1]);                   // coeff[1] = X4-Z4
-    sike_fp2add(P->X, P->Z, coeff[2]);                   // coeff[2] = X4+Z4
-    sike_fp2sqr_mont(P->Z, coeff[0]);                    // coeff[0] = Z4^2
-    sike_fp2add(coeff[0], coeff[0], coeff[0]);           // coeff[0] = 2*Z4^2
-    sike_fp2sqr_mont(coeff[0], C24);                     // C24 = 4*Z4^4
-    sike_fp2add(coeff[0], coeff[0], coeff[0]);           // coeff[0] = 4*Z4^2
-    sike_fp2sqr_mont(P->X, A24plus);                     // A24plus = X4^2
-    sike_fp2add(A24plus, A24plus, A24plus);              // A24plus = 2*X4^2
-    sike_fp2sqr_mont(A24plus, A24plus);                  // A24plus = 4*X4^4
-}
-
-void eval_4_isog(point_proj_t P, f2elm_t* coeff)
-{ // Evaluates the isogeny at the point (X:Z) in the domain of the isogeny, given a 4-isogeny phi defined
-  // by the 3 coefficients in coeff (computed in the function get_4_isog()).
-  // Inputs: the coefficients defining the isogeny, and the projective point P = (X:Z).
-  // Output: the projective point P = phi(P) = (X:Z) in the codomain.
-    f2elm_t t0, t1;
-
-    sike_fp2add(P->X, P->Z, t0);                         // t0 = X+Z
-    sike_fp2sub(P->X, P->Z, t1);                         // t1 = X-Z
-    sike_fp2mul_mont(t0, coeff[1], P->X);                // X = (X+Z)*coeff[1]
-    sike_fp2mul_mont(t1, coeff[2], P->Z);                // Z = (X-Z)*coeff[2]
-    sike_fp2mul_mont(t0, t1, t0);                        // t0 = (X+Z)*(X-Z)
-    sike_fp2mul_mont(t0, coeff[0], t0);                  // t0 = coeff[0]*(X+Z)*(X-Z)
-    sike_fp2add(P->X, P->Z, t1);                         // t1 = (X-Z)*coeff[2] + (X+Z)*coeff[1]
-    sike_fp2sub(P->X, P->Z, P->Z);                       // Z = (X-Z)*coeff[2] - (X+Z)*coeff[1]
-    sike_fp2sqr_mont(t1, t1);                            // t1 = [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2
-    sike_fp2sqr_mont(P->Z, P->Z);                        // Z = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2
-    sike_fp2add(t1, t0, P->X);                           // X = coeff[0]*(X+Z)*(X-Z) + [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2
-    sike_fp2sub(P->Z, t0, t0);                           // t0 = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 - coeff[0]*(X+Z)*(X-Z)
-    sike_fp2mul_mont(P->X, t1, P->X);                    // Xfinal
-    sike_fp2mul_mont(P->Z, t0, P->Z);                    // Zfinal
-}
-
-
-void xTPL(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus)
-{ // Tripling of a Montgomery point in projective coordinates (X:Z).
-  // Input: projective Montgomery x-coordinates P = (X:Z), where x=X/Z and Montgomery curve constants A24plus = A+2C and A24minus = A-2C.
-  // Output: projective Montgomery x-coordinates Q = 3*P = (X3:Z3).
-    f2elm_t t0, t1, t2, t3, t4, t5, t6;
-
-    sike_fp2sub(P->X, P->Z, t0);                         // t0 = X-Z
-    sike_fp2sqr_mont(t0, t2);                            // t2 = (X-Z)^2
-    sike_fp2add(P->X, P->Z, t1);                         // t1 = X+Z
-    sike_fp2sqr_mont(t1, t3);                            // t3 = (X+Z)^2
-    sike_fp2add(t0, t1, t4);                             // t4 = 2*X
-    sike_fp2sub(t1, t0, t0);                             // t0 = 2*Z
-    sike_fp2sqr_mont(t4, t1);                            // t1 = 4*X^2
-    sike_fp2sub(t1, t3, t1);                             // t1 = 4*X^2 - (X+Z)^2
-    sike_fp2sub(t1, t2, t1);                             // t1 = 4*X^2 - (X+Z)^2 - (X-Z)^2
-    sike_fp2mul_mont(t3, A24plus, t5);                   // t5 = A24plus*(X+Z)^2
-    sike_fp2mul_mont(t3, t5, t3);                        // t3 = A24plus*(X+Z)^3
-    sike_fp2mul_mont(A24minus, t2, t6);                  // t6 = A24minus*(X-Z)^2
-    sike_fp2mul_mont(t2, t6, t2);                        // t2 = A24minus*(X-Z)^3
-    sike_fp2sub(t2, t3, t3);                             // t3 = A24minus*(X-Z)^3 - coeff*(X+Z)^3
-    sike_fp2sub(t5, t6, t2);                             // t2 = A24plus*(X+Z)^2 - A24minus*(X-Z)^2
-    sike_fp2mul_mont(t1, t2, t1);                        // t1 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2]
-    sike_fp2add(t3, t1, t2);                             // t2 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + A24minus*(X-Z)^3 - coeff*(X+Z)^3
-    sike_fp2sqr_mont(t2, t2);                            // t2 = t2^2
-    sike_fp2mul_mont(t4, t2, Q->X);                      // X3 = 2*X*t2
-    sike_fp2sub(t3, t1, t1);                             // t1 = A24minus*(X-Z)^3 - A24plus*(X+Z)^3 - [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2]
-    sike_fp2sqr_mont(t1, t1);                            // t1 = t1^2
-    sike_fp2mul_mont(t0, t1, Q->Z);                      // Z3 = 2*Z*t1
-}
-
-void xTPLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus, size_t e)
-{ // Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings.
-  // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A24plus = A+2C and A24minus = A-2C.
-  // Output: projective Montgomery x-coordinates Q <- (3^e)*P.
-    memmove(Q, P, sizeof(*P));
-    for (size_t i = 0; i < e; i++) {
-        xTPL(Q, Q, A24minus, A24plus);
-    }
-}
-
-void get_3_isog(const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus, f2elm_t* coeff)
-{ // Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3.
-  // Input:  projective point of order three P = (X3:Z3).
-  // Output: the 3-isogenous Montgomery curve with projective coefficient A/C.
-    f2elm_t t0, t1, t2, t3, t4;
-
-    sike_fp2sub(P->X, P->Z, coeff[0]);                   // coeff0 = X-Z
-    sike_fp2sqr_mont(coeff[0], t0);                      // t0 = (X-Z)^2
-    sike_fp2add(P->X, P->Z, coeff[1]);                   // coeff1 = X+Z
-    sike_fp2sqr_mont(coeff[1], t1);                      // t1 = (X+Z)^2
-    sike_fp2add(t0, t1, t2);                             // t2 = (X+Z)^2 + (X-Z)^2
-    sike_fp2add(coeff[0], coeff[1], t3);                 // t3 = 2*X
-    sike_fp2sqr_mont(t3, t3);                            // t3 = 4*X^2
-    sike_fp2sub(t3, t2, t3);                             // t3 = 4*X^2 - (X+Z)^2 - (X-Z)^2
-    sike_fp2add(t1, t3, t2);                             // t2 = 4*X^2 - (X-Z)^2
-    sike_fp2add(t3, t0, t3);                             // t3 = 4*X^2 - (X+Z)^2
-    sike_fp2add(t0, t3, t4);                             // t4 = 4*X^2 - (X+Z)^2 + (X-Z)^2
-    sike_fp2add(t4, t4, t4);                             // t4 = 2(4*X^2 - (X+Z)^2 + (X-Z)^2)
-    sike_fp2add(t1, t4, t4);                             // t4 = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2
-    sike_fp2mul_mont(t2, t4, A24minus);                  // A24minus = [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2]
-    sike_fp2add(t1, t2, t4);                             // t4 = 4*X^2 + (X+Z)^2 - (X-Z)^2
-    sike_fp2add(t4, t4, t4);                             // t4 = 2(4*X^2 + (X+Z)^2 - (X-Z)^2)
-    sike_fp2add(t0, t4, t4);                             // t4 = 8*X^2 + 2*(X+Z)^2 - (X-Z)^2
-    sike_fp2mul_mont(t3, t4, t4);                        // t4 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2]
-    sike_fp2sub(t4, A24minus, t0);                       // t0 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2] - [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2]
-    sike_fp2add(A24minus, t0, A24plus);                  // A24plus = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2
-}
-
-
-void eval_3_isog(point_proj_t Q, f2elm_t* coeff)
-{ // Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and
-  // a point P with 2 coefficients in coeff (computed in the function get_3_isog()).
-  // Inputs: projective points P = (X3:Z3) and Q = (X:Z).
-  // Output: the projective point Q <- phi(Q) = (X3:Z3).
-    f2elm_t t0, t1, t2;
-
-    sike_fp2add(Q->X, Q->Z, t0);                       // t0 = X+Z
-    sike_fp2sub(Q->X, Q->Z, t1);                       // t1 = X-Z
-    sike_fp2mul_mont(t0, coeff[0], t0);                // t0 = coeff0*(X+Z)
-    sike_fp2mul_mont(t1, coeff[1], t1);                // t1 = coeff1*(X-Z)
-    sike_fp2add(t0, t1, t2);                           // t2 = coeff0*(X+Z) + coeff1*(X-Z)
-    sike_fp2sub(t1, t0, t0);                           // t0 = coeff1*(X-Z) - coeff0*(X+Z)
-    sike_fp2sqr_mont(t2, t2);                          // t2 = [coeff0*(X+Z) + coeff1*(X-Z)]^2
-    sike_fp2sqr_mont(t0, t0);                          // t0 = [coeff1*(X-Z) - coeff0*(X+Z)]^2
-    sike_fp2mul_mont(Q->X, t2, Q->X);                  // X3final = X*[coeff0*(X+Z) + coeff1*(X-Z)]^2
-    sike_fp2mul_mont(Q->Z, t0, Q->Z);                  // Z3final = Z*[coeff1*(X-Z) - coeff0*(X+Z)]^2
-}
-
-
-void inv_3_way(f2elm_t z1, f2elm_t z2, f2elm_t z3)
-{ // 3-way simultaneous inversion
-  // Input:  z1,z2,z3
-  // Output: 1/z1,1/z2,1/z3 (override inputs).
-    f2elm_t t0, t1, t2, t3;
-
-    sike_fp2mul_mont(z1, z2, t0);                      // t0 = z1*z2
-    sike_fp2mul_mont(z3, t0, t1);                      // t1 = z1*z2*z3
-    sike_fp2inv_mont(t1);                              // t1 = 1/(z1*z2*z3)
-    sike_fp2mul_mont(z3, t1, t2);                      // t2 = 1/(z1*z2)
-    sike_fp2mul_mont(t2, z2, t3);                      // t3 = 1/z1
-    sike_fp2mul_mont(t2, z1, z2);                      // z2 = 1/z2
-    sike_fp2mul_mont(t0, t1, z3);                      // z3 = 1/z3
-    sike_fp2copy(t3, z1);                              // z1 = 1/z1
-}
-
-
-void get_A(const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A)
-{ // Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A.
-  // Input:  the x-coordinates xP, xQ, and xR of the points P, Q and R.
-  // Output: the coefficient A corresponding to the curve E_A: y^2=x^3+A*x^2+x.
-    f2elm_t t0, t1, one = F2ELM_INIT;
-
-    extern const struct params_t params;
-    sike_fpcopy(params.mont_one, one->c0);
-    sike_fp2add(xP, xQ, t1);                           // t1 = xP+xQ
-    sike_fp2mul_mont(xP, xQ, t0);                      // t0 = xP*xQ
-    sike_fp2mul_mont(xR, t1, A);                       // A = xR*t1
-    sike_fp2add(t0, A, A);                             // A = A+t0
-    sike_fp2mul_mont(t0, xR, t0);                      // t0 = t0*xR
-    sike_fp2sub(A, one, A);                            // A = A-1
-    sike_fp2add(t0, t0, t0);                           // t0 = t0+t0
-    sike_fp2add(t1, xR, t1);                           // t1 = t1+xR
-    sike_fp2add(t0, t0, t0);                           // t0 = t0+t0
-    sike_fp2sqr_mont(A, A);                            // A = A^2
-    sike_fp2inv_mont(t0);                              // t0 = 1/t0
-    sike_fp2mul_mont(A, t0, A);                        // A = A*t0
-    sike_fp2sub(A, t1, A);                             // Afinal = A-t1
-}
-
-
-void j_inv(const f2elm_t A, const f2elm_t C, f2elm_t jinv)
-{ // Computes the j-invariant of a Montgomery curve with projective constant.
-  // Input: A,C in GF(p^2).
-  // Output: j=256*(A^2-3*C^2)^3/(C^4*(A^2-4*C^2)), which is the j-invariant of the Montgomery curve B*y^2=x^3+(A/C)*x^2+x or (equivalently) j-invariant of B'*y^2=C*x^3+A*x^2+C*x.
-    f2elm_t t0, t1;
-
-    sike_fp2sqr_mont(A, jinv);                           // jinv = A^2
-    sike_fp2sqr_mont(C, t1);                             // t1 = C^2
-    sike_fp2add(t1, t1, t0);                             // t0 = t1+t1
-    sike_fp2sub(jinv, t0, t0);                           // t0 = jinv-t0
-    sike_fp2sub(t0, t1, t0);                             // t0 = t0-t1
-    sike_fp2sub(t0, t1, jinv);                           // jinv = t0-t1
-    sike_fp2sqr_mont(t1, t1);                            // t1 = t1^2
-    sike_fp2mul_mont(jinv, t1, jinv);                    // jinv = jinv*t1
-    sike_fp2add(t0, t0, t0);                             // t0 = t0+t0
-    sike_fp2add(t0, t0, t0);                             // t0 = t0+t0
-    sike_fp2sqr_mont(t0, t1);                            // t1 = t0^2
-    sike_fp2mul_mont(t0, t1, t0);                        // t0 = t0*t1
-    sike_fp2add(t0, t0, t0);                             // t0 = t0+t0
-    sike_fp2add(t0, t0, t0);                             // t0 = t0+t0
-    sike_fp2inv_mont(jinv);                              // jinv = 1/jinv
-    sike_fp2mul_mont(jinv, t0, jinv);                    // jinv = t0*jinv
-}
-
-
-void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t xPQ, const f2elm_t A24)
-{ // Simultaneous doubling and differential addition.
-  // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4.
-  // Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP.
-    f2elm_t t0, t1, t2;
-
-    sike_fp2add(P->X, P->Z, t0);                         // t0 = XP+ZP
-    sike_fp2sub(P->X, P->Z, t1);                         // t1 = XP-ZP
-    sike_fp2sqr_mont(t0, P->X);                          // XP = (XP+ZP)^2
-    sike_fp2sub(Q->X, Q->Z, t2);                         // t2 = XQ-ZQ
-    sike_fp2correction(t2);
-    sike_fp2add(Q->X, Q->Z, Q->X);                       // XQ = XQ+ZQ
-    sike_fp2mul_mont(t0, t2, t0);                        // t0 = (XP+ZP)*(XQ-ZQ)
-    sike_fp2sqr_mont(t1, P->Z);                          // ZP = (XP-ZP)^2
-    sike_fp2mul_mont(t1, Q->X, t1);                      // t1 = (XP-ZP)*(XQ+ZQ)
-    sike_fp2sub(P->X, P->Z, t2);                         // t2 = (XP+ZP)^2-(XP-ZP)^2
-    sike_fp2mul_mont(P->X, P->Z, P->X);                  // XP = (XP+ZP)^2*(XP-ZP)^2
-    sike_fp2mul_mont(t2, A24, Q->X);                     // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2]
-    sike_fp2sub(t0, t1, Q->Z);                           // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)
-    sike_fp2add(Q->X, P->Z, P->Z);                       // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2
-    sike_fp2add(t0, t1, Q->X);                           // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)
-    sike_fp2mul_mont(P->Z, t2, P->Z);                    // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2]
-    sike_fp2sqr_mont(Q->Z, Q->Z);                        // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2
-    sike_fp2sqr_mont(Q->X, Q->X);                        // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2
-    sike_fp2mul_mont(Q->Z, xPQ, Q->Z);                   // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2
-}
diff --git a/src/kem/sike/sike-p434-sha256/isogeny.h b/src/kem/sike/sike-p434-sha256/isogeny.h
deleted file mode 100644
index 460c8c66..00000000
--- a/src/kem/sike/sike-p434-sha256/isogeny.h
+++ /dev/null
@@ -1,49 +0,0 @@
-#ifndef ISOGENY_H_
-#define ISOGENY_H_
-
-// Computes [2^e](X:Z) on Montgomery curve with projective
-// constant via e repeated doublings.
-void xDBLe(
-    const point_proj_t P, point_proj_t Q, const f2elm_t A24plus,
-    const f2elm_t C24, size_t e);
-// Simultaneous doubling and differential addition.
-void xDBLADD(
-    point_proj_t P, point_proj_t Q, const f2elm_t xPQ,
-    const f2elm_t A24);
-// Tripling of a Montgomery point in projective coordinates (X:Z).
-void xTPL(
-    const point_proj_t P, point_proj_t Q, const f2elm_t A24minus,
-    const f2elm_t A24plus);
-// Computes [3^e](X:Z) on Montgomery curve with projective constant
-// via e repeated triplings.
-void xTPLe(
-    const point_proj_t P, point_proj_t Q, const f2elm_t A24minus,
-    const f2elm_t A24plus, size_t e);
-// Given the x-coordinates of P, Q, and R, returns the value A
-// corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A.
-void get_A(
-    const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A);
-// Computes the j-invariant of a Montgomery curve with projective constant.
-void j_inv(
-    const f2elm_t A, const f2elm_t C, f2elm_t jinv);
-// Computes the corresponding 4-isogeny of a projective Montgomery
-// point (X4:Z4) of order 4.
-void get_4_isog(
-    const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t* coeff);
-// Computes the corresponding 3-isogeny of a projective Montgomery
-// point (X3:Z3) of order 3.
-void get_3_isog(
-    const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus,
-    f2elm_t* coeff);
-// Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3)
-// of order 3 on a Montgomery curve and a point P with coefficients given in coeff.
-void eval_3_isog(
-    point_proj_t Q, f2elm_t* coeff);
-// Evaluates the isogeny at the point (X:Z) in the domain of the isogeny.
-void eval_4_isog(
-    point_proj_t P, f2elm_t* coeff);
-// 3-way simultaneous inversion
-void inv_3_way(
-    f2elm_t z1, f2elm_t z2, f2elm_t z3);
-
-#endif // ISOGENY_H_
diff --git a/src/kem/sike/sike-p434-sha256/params.c b/src/kem/sike/sike-p434-sha256/params.c
deleted file mode 100644
index b13f4c87..00000000
--- a/src/kem/sike/sike-p434-sha256/params.c
+++ /dev/null
@@ -1,128 +0,0 @@
-/********************************************************************************************
-* SIDH: an efficient supersingular isogeny cryptography library
-*
-* Abstract: supersingular isogeny parameters and generation of functions for P434
-*********************************************************************************************/
-
-#include "utils.h"
-
-// Parameters for isogeny system "SIKE"
-const struct params_t params = {
-    .prime = {
-        U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFFFFFFFFFFFFFFFF),
-        U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFDC1767AE2FFFFFF),
-        U64_TO_WORDS(0x7BC65C783158AEA3), U64_TO_WORDS(0x6CFC5FD681C52056),
-        U64_TO_WORDS(0x0002341F27177344)
-    },
-    .prime_p1 = {
-        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
-        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0xFDC1767AE3000000),
-        U64_TO_WORDS(0x7BC65C783158AEA3), U64_TO_WORDS(0x6CFC5FD681C52056),
-        U64_TO_WORDS(0x0002341F27177344)
-    },
-    .prime_x2 = {
-        U64_TO_WORDS(0xFFFFFFFFFFFFFFFE), U64_TO_WORDS(0xFFFFFFFFFFFFFFFF),
-        U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFB82ECF5C5FFFFFF),
-        U64_TO_WORDS(0xF78CB8F062B15D47), U64_TO_WORDS(0xD9F8BFAD038A40AC),
-        U64_TO_WORDS(0x0004683E4E2EE688)
-    },
-    .A_gen = {
-        U64_TO_WORDS(0x05ADF455C5C345BF), U64_TO_WORDS(0x91935C5CC767AC2B),
-        U64_TO_WORDS(0xAFE4E879951F0257), U64_TO_WORDS(0x70E792DC89FA27B1),
-        U64_TO_WORDS(0xF797F526BB48C8CD), U64_TO_WORDS(0x2181DB6131AF621F),
-        U64_TO_WORDS(0x00000A1C08B1ECC4), // XPA0
-        U64_TO_WORDS(0x74840EB87CDA7788), U64_TO_WORDS(0x2971AA0ECF9F9D0B),
-        U64_TO_WORDS(0xCB5732BDF41715D5), U64_TO_WORDS(0x8CD8E51F7AACFFAA),
-        U64_TO_WORDS(0xA7F424730D7E419F), U64_TO_WORDS(0xD671EB919A179E8C),
-        U64_TO_WORDS(0x0000FFA26C5A924A), // XPA1
-        U64_TO_WORDS(0xFEC6E64588B7273B), U64_TO_WORDS(0xD2A626D74CBBF1C6),
-        U64_TO_WORDS(0xF8F58F07A78098C7), U64_TO_WORDS(0xE23941F470841B03),
-        U64_TO_WORDS(0x1B63EDA2045538DD), U64_TO_WORDS(0x735CFEB0FFD49215),
-        U64_TO_WORDS(0x0001C4CB77542876), // XQA0
-        U64_TO_WORDS(0xADB0F733C17FFDD6), U64_TO_WORDS(0x6AFFBD037DA0A050),
-        U64_TO_WORDS(0x680EC43DB144E02F), U64_TO_WORDS(0x1E2E5D5FF524E374),
-        U64_TO_WORDS(0xE2DDA115260E2995), U64_TO_WORDS(0xA6E4B552E2EDE508),
-        U64_TO_WORDS(0x00018ECCDDF4B53E), // XQA1
-        U64_TO_WORDS(0x01BA4DB518CD6C7D), U64_TO_WORDS(0x2CB0251FE3CC0611),
-        U64_TO_WORDS(0x259B0C6949A9121B), U64_TO_WORDS(0x60E17AC16D2F82AD),
-        U64_TO_WORDS(0x3AA41F1CE175D92D), U64_TO_WORDS(0x413FBE6A9B9BC4F3),
-        U64_TO_WORDS(0x00022A81D8D55643), // XRA0
-        U64_TO_WORDS(0xB8ADBC70FC82E54A), U64_TO_WORDS(0xEF9CDDB0D5FADDED),
-        U64_TO_WORDS(0x5820C734C80096A0), U64_TO_WORDS(0x7799994BAA96E0E4),
-        U64_TO_WORDS(0x044961599E379AF8), U64_TO_WORDS(0xDB2B94FBF09F27E2),
-        U64_TO_WORDS(0x0000B87FC716C0C6)  // XRA1
-    },
-    .B_gen = {
-        U64_TO_WORDS(0x6E5497556EDD48A3), U64_TO_WORDS(0x2A61B501546F1C05),
-        U64_TO_WORDS(0xEB919446D049887D), U64_TO_WORDS(0x5864A4A69D450C4F),
-        U64_TO_WORDS(0xB883F276A6490D2B), U64_TO_WORDS(0x22CC287022D5F5B9),
-        U64_TO_WORDS(0x0001BED4772E551F), // XPB0
-        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
-        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
-        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
-        U64_TO_WORDS(0x0000000000000000), // XPB1
-        U64_TO_WORDS(0xFAE2A3F93D8B6B8E), U64_TO_WORDS(0x494871F51700FE1C),
-        U64_TO_WORDS(0xEF1A94228413C27C), U64_TO_WORDS(0x498FF4A4AF60BD62),
-        U64_TO_WORDS(0xB00AD2A708267E8A), U64_TO_WORDS(0xF4328294E017837F),
-        U64_TO_WORDS(0x000034080181D8AE), // XQB0
-        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
-        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
-        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
-        U64_TO_WORDS(0x0000000000000000), // XQB1
-        U64_TO_WORDS(0x283B34FAFEFDC8E4), U64_TO_WORDS(0x9208F44977C3E647),
-        U64_TO_WORDS(0x7DEAE962816F4E9A), U64_TO_WORDS(0x68A2BA8AA262EC9D),
-        U64_TO_WORDS(0x8176F112EA43F45B), U64_TO_WORDS(0x02106D022634F504),
-        U64_TO_WORDS(0x00007E8A50F02E37), // XRB0
-        U64_TO_WORDS(0xB378B7C1DA22CCB1), U64_TO_WORDS(0x6D089C99AD1D9230),
-        U64_TO_WORDS(0xEBE15711813E2369), U64_TO_WORDS(0x2B35A68239D48A53),
-        U64_TO_WORDS(0x445F6FD138407C93), U64_TO_WORDS(0xBEF93B29A3F6B54B),
-        U64_TO_WORDS(0x000173FA910377D3)  // XRB1
-    },
-    .mont_R2 = {
-        U64_TO_WORDS(0x28E55B65DCD69B30), U64_TO_WORDS(0xACEC7367768798C2),
-        U64_TO_WORDS(0xAB27973F8311688D), U64_TO_WORDS(0x175CC6AF8D6C7C0B),
-        U64_TO_WORDS(0xABCD92BF2DDE347E), U64_TO_WORDS(0x69E16A61C7686D9A),
-        U64_TO_WORDS(0x000025A89BCDD12A)
-    },
-    .mont_one = {
-        U64_TO_WORDS(0x000000000000742C), U64_TO_WORDS(0x0000000000000000),
-        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0xB90FF404FC000000),
-        U64_TO_WORDS(0xD801A4FB559FACD4), U64_TO_WORDS(0xE93254545F77410C),
-        U64_TO_WORDS(0x0000ECEEA7BD2EDA)
-    },
-    .mont_six = {
-        U64_TO_WORDS(0x000000000002B90A), U64_TO_WORDS(0x0000000000000000),
-        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x5ADCCB2822000000),
-        U64_TO_WORDS(0x187D24F39F0CAFB4), U64_TO_WORDS(0x9D353A4D394145A0),
-        U64_TO_WORDS(0x00012559A0403298)
-    },
-    .A_strat = {
-        0x30, 0x1C, 0x10, 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01,
-        0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x08, 0x04,
-        0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01,
-        0x02, 0x01, 0x01, 0x0D, 0x07, 0x04, 0x02, 0x01, 0x01, 0x02,
-        0x01, 0x01, 0x03, 0x02, 0x01, 0x01, 0x01, 0x01, 0x05, 0x04,
-        0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01,
-        0x15, 0x0C, 0x07, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01,
-        0x03, 0x02, 0x01, 0x01, 0x01, 0x01, 0x05, 0x03, 0x02, 0x01,
-        0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x09, 0x05, 0x03,
-        0x02, 0x01, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x04,
-        0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01
-    },
-    .B_strat = {
-        0x42, 0x21, 0x11, 0x09, 0x05, 0x03, 0x02, 0x01, 0x01, 0x01,
-        0x01, 0x02, 0x01, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x01,
-        0x02, 0x01, 0x01, 0x08, 0x04, 0x02, 0x01, 0x01, 0x01, 0x02,
-        0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x10,
-        0x08, 0x04, 0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04,
-        0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x08, 0x04, 0x02, 0x01,
-        0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01,
-        0x01, 0x20, 0x10, 0x08, 0x04, 0x03, 0x01, 0x01, 0x01, 0x01,
-        0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01,
-        0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, 0x02,
-        0x01, 0x01, 0x02, 0x01, 0x01, 0x10, 0x08, 0x04, 0x02, 0x01,
-        0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01,
-        0x01, 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04,
-        0x02, 0x01, 0x01, 0x02, 0x01, 0x01
-    }
-};
diff --git a/src/kem/sike/sike-p434-sha256/sike.c b/src/kem/sike/sike-p434-sha256/sike.c
deleted file mode 100644
index f00ebe76..00000000
--- a/src/kem/sike/sike-p434-sha256/sike.c
+++ /dev/null
@@ -1,517 +0,0 @@
-/********************************************************************************************
-* SIDH: an efficient supersingular isogeny cryptography library
-*
-* Abstract: supersingular isogeny key encapsulation (SIKE) protocol
-*********************************************************************************************/
-
-#include <assert.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <string.h>
-#include <sha2/sha256.h>
-#include <random/randombytes.h>
-
-#include "utils.h"
-#include "isogeny.h"
-#include "fpx.h"
-
-extern const struct params_t params;
-
-// SIDH_JINV_BYTESZ is a number of bytes used for encoding j-invariant.
-#define SIDH_JINV_BYTESZ    110U
-// SIDH_PRV_A_BITSZ is a number of bits of SIDH private key (2-isogeny)
-#define SIDH_PRV_A_BITSZ    216U
-// SIDH_PRV_A_BITSZ is a number of bits of SIDH private key (3-isogeny)
-#define SIDH_PRV_B_BITSZ    217U
-// MAX_INT_POINTS_ALICE is a number of points used in 2-isogeny tree computation
-#define MAX_INT_POINTS_ALICE    7U
-// MAX_INT_POINTS_ALICE is a number of points used in 3-isogeny tree computation
-#define MAX_INT_POINTS_BOB      8U
-
-// Swap points.
-// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P
-#if !defined(ARCH_X86_64) || defined(ARCH_GENERIC)
-static void sike_cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option)
-{
-    crypto_word_t temp;
-    for (size_t i = 0; i < NWORDS_FIELD; i++) {
-        temp = option & (P->X->c0[i] ^ Q->X->c0[i]);
-        P->X->c0[i] = temp ^ P->X->c0[i];
-        Q->X->c0[i] = temp ^ Q->X->c0[i];
-        temp = option & (P->Z->c0[i] ^ Q->Z->c0[i]);
-        P->Z->c0[i] = temp ^ P->Z->c0[i];
-        Q->Z->c0[i] = temp ^ Q->Z->c0[i];
-        temp = option & (P->X->c1[i] ^ Q->X->c1[i]);
-        P->X->c1[i] = temp ^ P->X->c1[i];
-        Q->X->c1[i] = temp ^ Q->X->c1[i];
-        temp = option & (P->Z->c1[i] ^ Q->Z->c1[i]);
-        P->Z->c1[i] = temp ^ P->Z->c1[i];
-        Q->Z->c1[i] = temp ^ Q->Z->c1[i];
-    }
-}
-#endif
-
-// Swap points.
-// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P
-static inline void sike_fp2cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option)
-{
-#if defined(ARCH_X86_64) && !defined(ARCH_GENERIC)
-    sike_cswap_asm(P, Q, option);
-#else
-    sike_cswap(P, Q, option);
-#endif
-}
-
-static void ladder3Pt(
-    const f2elm_t xP, const f2elm_t xQ, const f2elm_t xPQ, const uint8_t* m,
-    int is_A, point_proj_t R, const f2elm_t A) {
-    point_proj_t R0 = POINT_PROJ_INIT, R2 = POINT_PROJ_INIT;
-    f2elm_t A24 = F2ELM_INIT;
-    crypto_word_t mask;
-    int bit, swap, prevbit = 0;
-
-    const size_t nbits = is_A?SIDH_PRV_A_BITSZ:SIDH_PRV_B_BITSZ;
-
-    // Initializing constant
-    sike_fpcopy(params.mont_one, A24[0].c0);
-    sike_fp2add(A24, A24, A24);
-    sike_fp2add(A, A24, A24);
-    sike_fp2div2(A24, A24);
-    sike_fp2div2(A24, A24); // A24 = (A+2)/4
-
-    // Initializing points
-    sike_fp2copy(xQ, R0->X);
-    sike_fpcopy(params.mont_one, R0->Z[0].c0);
-    sike_fp2copy(xPQ, R2->X);
-    sike_fpcopy(params.mont_one, R2->Z[0].c0);
-    sike_fp2copy(xP, R->X);
-    sike_fpcopy(params.mont_one, R->Z[0].c0);
-    memset(R->Z->c1, 0, sizeof(R->Z->c1));
-
-    // Main loop
-    for (size_t i = 0; i < nbits; i++) {
-        bit = (m[i >> 3] >> (i & 7)) & 1;
-        swap = bit ^ prevbit;
-        prevbit = bit;
-        mask = 0 - (crypto_word_t)swap;
-
-        sike_fp2cswap(R, R2, mask);
-        xDBLADD(R0, R2, R->X, A24);
-        sike_fp2mul_mont(R2->X, R->Z, R2->X);
-    }
-    swap = 0 ^ prevbit;
-    mask = 0 - (crypto_word_t)swap;
-    sike_fp2cswap(R, R2, mask);
-}
-
-// Initialization of basis points
-static inline void sike_init_basis(const crypto_word_t *gen, f2elm_t XP, f2elm_t XQ, f2elm_t XR) {
-    sike_fpcopy(gen,                  XP->c0);
-    sike_fpcopy(gen +   NWORDS_FIELD, XP->c1);
-    sike_fpcopy(gen + 2*NWORDS_FIELD, XQ->c0);
-    sike_fpcopy(gen + 3*NWORDS_FIELD, XQ->c1);
-    sike_fpcopy(gen + 4*NWORDS_FIELD, XR->c0);
-    sike_fpcopy(gen + 5*NWORDS_FIELD, XR->c1);
-}
-
-// Conversion of GF(p^2) element from Montgomery to standard representation.
-static inline void sike_fp2_encode(const f2elm_t x, uint8_t *enc) {
-    f2elm_t t;
-    sike_from_fp2mont(x, t);
-
-    // convert to bytes in little endian form
-    for (size_t i=0; i<FIELD_BYTESZ; i++) {
-        enc[i+           0] = (t[0].c0[i/LSZ] >> (8*(i%LSZ))) & 0xFF;
-        enc[i+FIELD_BYTESZ] = (t[0].c1[i/LSZ] >> (8*(i%LSZ))) & 0xFF;
-    }
-}
-
-// Parse byte sequence back into GF(p^2) element, and conversion to Montgomery representation.
-// Elements over GF(p503) are encoded in 63 octets in little endian format
-// (i.e., the least significant octet is located in the lowest memory address).
-static inline void fp2_decode(const uint8_t *enc, f2elm_t t) {
-    memset(t[0].c0, 0, sizeof(t[0].c0));
-    memset(t[0].c1, 0, sizeof(t[0].c1));
-    // convert bytes in little endian form to f2elm_t
-    for (size_t i = 0; i < FIELD_BYTESZ; i++) {
-        t[0].c0[i/LSZ] |= ((crypto_word_t)enc[i+           0]) << (8*(i%LSZ));
-        t[0].c1[i/LSZ] |= ((crypto_word_t)enc[i+FIELD_BYTESZ]) << (8*(i%LSZ));
-    }
-    sike_to_fp2mont(t, t);
-}
-
-// Alice's ephemeral public key generation
-// Input:  a private key prA in the range [0, 2^250 - 1], stored in 32 bytes.
-// Output: the public key pkA consisting of 3 GF(p503^2) elements encoded in 378 bytes.
-static void gen_iso_A(const uint8_t* skA, uint8_t* pkA)
-{
-    point_proj_t R, pts[MAX_INT_POINTS_ALICE];
-    point_proj_t phiP = POINT_PROJ_INIT;
-    point_proj_t phiQ = POINT_PROJ_INIT;
-    point_proj_t phiR = POINT_PROJ_INIT;
-    f2elm_t XPA, XQA, XRA, coeff[3];
-    f2elm_t A24plus = F2ELM_INIT;
-    f2elm_t C24 = F2ELM_INIT;
-    f2elm_t A = F2ELM_INIT;
-    unsigned int m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0;
-
-    // Initialize basis points
-    sike_init_basis(params.A_gen, XPA, XQA, XRA);
-    sike_init_basis(params.B_gen, phiP->X, phiQ->X, phiR->X);
-    sike_fpcopy(params.mont_one, (phiP->Z)->c0);
-    sike_fpcopy(params.mont_one, (phiQ->Z)->c0);
-    sike_fpcopy(params.mont_one, (phiR->Z)->c0);
-
-    // Initialize constants: A24plus = A+2C, C24 = 4C, where A=6, C=1
-    sike_fpcopy(params.mont_one, A24plus->c0);
-    sike_fp2add(A24plus, A24plus, A24plus);
-    sike_fp2add(A24plus, A24plus, C24);
-    sike_fp2add(A24plus, C24, A);
-    sike_fp2add(C24, C24, A24plus);
-
-    // Retrieve kernel point
-    ladder3Pt(XPA, XQA, XRA, skA, 1, R, A);
-
-    // Traverse tree
-    index = 0;
-    for (size_t row = 1; row < A_max; row++) {
-        while (index < A_max-row) {
-            sike_fp2copy(R->X, pts[npts]->X);
-            sike_fp2copy(R->Z, pts[npts]->Z);
-            pts_index[npts++] = index;
-            m = params.A_strat[ii++];
-            xDBLe(R, R, A24plus, C24, (2*m));
-            index += m;
-        }
-        get_4_isog(R, A24plus, C24, coeff);
-
-        for (size_t i = 0; i < npts; i++) {
-            eval_4_isog(pts[i], coeff);
-        }
-        eval_4_isog(phiP, coeff);
-        eval_4_isog(phiQ, coeff);
-        eval_4_isog(phiR, coeff);
-
-        sike_fp2copy(pts[npts-1]->X, R->X);
-        sike_fp2copy(pts[npts-1]->Z, R->Z);
-        index = pts_index[npts-1];
-        npts -= 1;
-    }
-
-    get_4_isog(R, A24plus, C24, coeff);
-    eval_4_isog(phiP, coeff);
-    eval_4_isog(phiQ, coeff);
-    eval_4_isog(phiR, coeff);
-
-    inv_3_way(phiP->Z, phiQ->Z, phiR->Z);
-    sike_fp2mul_mont(phiP->X, phiP->Z, phiP->X);
-    sike_fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X);
-    sike_fp2mul_mont(phiR->X, phiR->Z, phiR->X);
-
-    // Format public key
-    sike_fp2_encode(phiP->X, pkA);
-    sike_fp2_encode(phiQ->X, pkA + SIDH_JINV_BYTESZ);
-    sike_fp2_encode(phiR->X, pkA + 2*SIDH_JINV_BYTESZ);
-}
-
-// Bob's ephemeral key-pair generation
-// It produces a private key skB and computes the public key pkB.
-// The private key is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes.
-// The public key consists of 3 GF(p503^2) elements encoded in 378 bytes.
-static void gen_iso_B(const uint8_t* skB, uint8_t* pkB)
-{
-    point_proj_t R, pts[MAX_INT_POINTS_BOB];
-    point_proj_t phiP = POINT_PROJ_INIT;
-    point_proj_t phiQ = POINT_PROJ_INIT;
-    point_proj_t phiR = POINT_PROJ_INIT;
-    f2elm_t XPB, XQB, XRB, coeff[3];
-    f2elm_t A24plus = F2ELM_INIT;
-    f2elm_t A24minus = F2ELM_INIT;
-    f2elm_t A = F2ELM_INIT;
-    unsigned int m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0;
-
-    // Initialize basis points
-    sike_init_basis(params.B_gen, XPB, XQB, XRB);
-    sike_init_basis(params.A_gen, phiP->X, phiQ->X, phiR->X);
-    sike_fpcopy(params.mont_one, (phiP->Z)->c0);
-    sike_fpcopy(params.mont_one, (phiQ->Z)->c0);
-    sike_fpcopy(params.mont_one, (phiR->Z)->c0);
-
-    // Initialize constants: A24minus = A-2C, A24plus = A+2C, where A=6, C=1
-    sike_fpcopy(params.mont_one, A24plus->c0);
-    sike_fp2add(A24plus, A24plus, A24plus);
-    sike_fp2add(A24plus, A24plus, A24minus);
-    sike_fp2add(A24plus, A24minus, A);
-    sike_fp2add(A24minus, A24minus, A24plus);
-
-    // Retrieve kernel point
-    ladder3Pt(XPB, XQB, XRB, skB, 0, R, A);
-
-    // Traverse tree
-    index = 0;
-    for (size_t row = 1; row < B_max; row++) {
-        while (index < B_max-row) {
-            sike_fp2copy(R->X, pts[npts]->X);
-            sike_fp2copy(R->Z, pts[npts]->Z);
-            pts_index[npts++] = index;
-            m = params.B_strat[ii++];
-            xTPLe(R, R, A24minus, A24plus, m);
-            index += m;
-        }
-        get_3_isog(R, A24minus, A24plus, coeff);
-
-        for (size_t i = 0; i < npts; i++) {
-            eval_3_isog(pts[i], coeff);
-        }
-        eval_3_isog(phiP, coeff);
-        eval_3_isog(phiQ, coeff);
-        eval_3_isog(phiR, coeff);
-
-        sike_fp2copy(pts[npts-1]->X, R->X);
-        sike_fp2copy(pts[npts-1]->Z, R->Z);
-        index = pts_index[npts-1];
-        npts -= 1;
-    }
-
-    get_3_isog(R, A24minus, A24plus, coeff);
-    eval_3_isog(phiP, coeff);
-    eval_3_isog(phiQ, coeff);
-    eval_3_isog(phiR, coeff);
-
-    inv_3_way(phiP->Z, phiQ->Z, phiR->Z);
-    sike_fp2mul_mont(phiP->X, phiP->Z, phiP->X);
-    sike_fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X);
-    sike_fp2mul_mont(phiR->X, phiR->Z, phiR->X);
-
-    // Format public key
-    sike_fp2_encode(phiP->X, pkB);
-    sike_fp2_encode(phiQ->X, pkB + SIDH_JINV_BYTESZ);
-    sike_fp2_encode(phiR->X, pkB + 2*SIDH_JINV_BYTESZ);
-}
-
-// Alice's ephemeral shared secret computation
-// It produces a shared secret key ssA using her secret key skA and Bob's public key pkB
-// Inputs: Alice's skA is an integer in the range [0, 2^250 - 1], stored in 32 bytes.
-//         Bob's pkB consists of 3 GF(p503^2) elements encoded in 378 bytes.
-// Output: a shared secret ssA that consists of one element in GF(p503^2) encoded in 126 bytes.
-static void ex_iso_A(const uint8_t* skA, const uint8_t* pkB, uint8_t* ssA)
-{
-    point_proj_t R, pts[MAX_INT_POINTS_ALICE];
-    f2elm_t coeff[3], PKB[3], jinv;
-    f2elm_t A24plus = F2ELM_INIT;
-    f2elm_t C24 = F2ELM_INIT;
-    f2elm_t A = F2ELM_INIT;
-    unsigned int m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0;
-
-    // Initialize images of Bob's basis
-    fp2_decode(pkB, PKB[0]);
-    fp2_decode(pkB + SIDH_JINV_BYTESZ, PKB[1]);
-    fp2_decode(pkB + 2*SIDH_JINV_BYTESZ, PKB[2]);
-
-    // Initialize constants
-    get_A(PKB[0], PKB[1], PKB[2], A);
-    sike_fpadd(params.mont_one, params.mont_one, C24->c0);
-    sike_fp2add(A, C24, A24plus);
-    sike_fpadd(C24->c0, C24->c0, C24->c0);
-
-    // Retrieve kernel point
-    ladder3Pt(PKB[0], PKB[1], PKB[2], skA, 1, R, A);
-
-    // Traverse tree
-    index = 0;
-    for (size_t row = 1; row < A_max; row++) {
-        while (index < A_max-row) {
-            sike_fp2copy(R->X, pts[npts]->X);
-            sike_fp2copy(R->Z, pts[npts]->Z);
-            pts_index[npts++] = index;
-            m = params.A_strat[ii++];
-            xDBLe(R, R, A24plus, C24, (2*m));
-            index += m;
-        }
-        get_4_isog(R, A24plus, C24, coeff);
-
-        for (size_t i = 0; i < npts; i++) {
-            eval_4_isog(pts[i], coeff);
-        }
-
-        sike_fp2copy(pts[npts-1]->X, R->X);
-        sike_fp2copy(pts[npts-1]->Z, R->Z);
-        index = pts_index[npts-1];
-        npts -= 1;
-    }
-
-    get_4_isog(R, A24plus, C24, coeff);
-    sike_fp2add(A24plus, A24plus, A24plus);
-    sike_fp2sub(A24plus, C24, A24plus);
-    sike_fp2add(A24plus, A24plus, A24plus);
-    j_inv(A24plus, C24, jinv);
-    sike_fp2_encode(jinv, ssA);
-}
-
-// Bob's ephemeral shared secret computation
-// It produces a shared secret key ssB using his secret key skB and Alice's public key pkA
-// Inputs: Bob's skB is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes.
-//         Alice's pkA consists of 3 GF(p503^2) elements encoded in 378 bytes.
-// Output: a shared secret ssB that consists of one element in GF(p503^2) encoded in 126 bytes.
-static void ex_iso_B(const uint8_t* skB, const uint8_t* pkA, uint8_t* ssB)
-{
-    point_proj_t R, pts[MAX_INT_POINTS_BOB];
-    f2elm_t coeff[3], PKB[3], jinv;
-    f2elm_t A24plus = F2ELM_INIT;
-    f2elm_t A24minus = F2ELM_INIT;
-    f2elm_t A = F2ELM_INIT;
-    unsigned int m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0;
-
-    // Initialize images of Alice's basis
-    fp2_decode(pkA, PKB[0]);
-    fp2_decode(pkA + SIDH_JINV_BYTESZ, PKB[1]);
-    fp2_decode(pkA + 2*SIDH_JINV_BYTESZ, PKB[2]);
-
-    // Initialize constants
-    get_A(PKB[0], PKB[1], PKB[2], A);
-    sike_fpadd(params.mont_one, params.mont_one, A24minus->c0);
-    sike_fp2add(A, A24minus, A24plus);
-    sike_fp2sub(A, A24minus, A24minus);
-
-    // Retrieve kernel point
-    ladder3Pt(PKB[0], PKB[1], PKB[2], skB, 0, R, A);
-
-    // Traverse tree
-    index = 0;
-    for (size_t row = 1; row < B_max; row++) {
-        while (index < B_max-row) {
-            sike_fp2copy(R->X, pts[npts]->X);
-            sike_fp2copy(R->Z, pts[npts]->Z);
-            pts_index[npts++] = index;
-            m = params.B_strat[ii++];
-            xTPLe(R, R, A24minus, A24plus, m);
-            index += m;
-        }
-        get_3_isog(R, A24minus, A24plus, coeff);
-
-        for (size_t i = 0; i < npts; i++) {
-            eval_3_isog(pts[i], coeff);
-        }
-
-        sike_fp2copy(pts[npts-1]->X, R->X);
-        sike_fp2copy(pts[npts-1]->Z, R->Z);
-        index = pts_index[npts-1];
-        npts -= 1;
-    }
-
-    get_3_isog(R, A24minus, A24plus, coeff);
-    sike_fp2add(A24plus, A24minus, A);
-    sike_fp2add(A, A, A);
-    sike_fp2sub(A24plus, A24minus, A24plus);
-    j_inv(A, A24plus, jinv);
-    sike_fp2_encode(jinv, ssB);
-}
-
-int SIKE_keypair(uint8_t out_priv[SIKE_PRV_BYTESZ],
-                 uint8_t out_pub[SIKE_PUB_BYTESZ]) {
-  // Calculate private key for Alice. Needs to be in range [0, 2^0xFA - 1] and <
-  // 253 bits
-  randombytes(out_priv, SIKE_PRV_BYTESZ);
-  out_priv[31] = (out_priv[31] | 0x01) & 0x03;
-
-  gen_iso_B(out_priv, out_pub);
-  return 1;
-}
-
-void SIKE_encaps(uint8_t out_shared_key[SIKE_SS_BYTESZ],
-                 uint8_t out_ciphertext[SIKE_CT_BYTESZ],
-                 const uint8_t pub_key[SIKE_PUB_BYTESZ]) {
-  // Secret buffer is reused by the function to store some ephemeral
-  // secret data. It's size must be maximum of 64,
-  // SIKE_MSG_BYTESZ and SIDH_PRV_A_BITSZ in bytes.
-  uint8_t secret[32]; // OZAPTF, why?
-  uint8_t j[SIDH_JINV_BYTESZ];
-  uint8_t temp[SIKE_MSG_BYTESZ + SIKE_CT_BYTESZ];
-  SHA256_CTX ctx;
-
-  // Generate secret key for A
-  // secret key A = SHA256({0,1}^n || pub_key)) mod SIDH_PRV_A_BITSZ
-  randombytes(temp, SIKE_MSG_BYTESZ);
-
-  sha256_init(&ctx);
-  sha256_update(&ctx, temp, SIKE_MSG_BYTESZ);
-  sha256_update(&ctx, pub_key, SIKE_PUB_BYTESZ);
-  sha256_final(&ctx, secret);
-
-  // Generate public key for A - first part of the ciphertext
-  gen_iso_A(secret, out_ciphertext);
-
-  // Generate c1:
-  //  h = SHA256(j-invariant)
-  // c1 = h ^ m
-  ex_iso_A(secret, pub_key, j);
-  sha256_init(&ctx);
-  sha256_update(&ctx, j, sizeof(j));
-  sha256_final(&ctx, secret);
-
-  // c1 = h ^ m
-  uint8_t *c1 = &out_ciphertext[SIKE_PUB_BYTESZ];
-  for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) {
-    c1[i] = temp[i] ^ secret[i];
-  }
-
-  sha256_init(&ctx);
-  sha256_update(&ctx, temp, SIKE_MSG_BYTESZ);
-  sha256_update(&ctx, out_ciphertext, SIKE_CT_BYTESZ);
-  sha256_final(&ctx, secret);
-  // Generate shared secret out_shared_key = SHA256(m||out_ciphertext)
-  memcpy(out_shared_key, secret, SIKE_SS_BYTESZ);
-}
-
-void SIKE_decaps(uint8_t out_shared_key[SIKE_SS_BYTESZ],
-                 const uint8_t ciphertext[SIKE_CT_BYTESZ],
-                 const uint8_t pub_key[SIKE_PUB_BYTESZ],
-                 const uint8_t priv_key[SIKE_PRV_BYTESZ]) {
-  // Secret buffer is reused by the function to store some ephemeral
-  // secret data. It's size must be maximum of 64,
-  // SIKE_MSG_BYTESZ and SIDH_PRV_A_BITSZ in bytes.
-  uint8_t secret[32];
-  uint8_t j[SIDH_JINV_BYTESZ];
-  uint8_t c0[SIKE_PUB_BYTESZ];
-  uint8_t temp[SIKE_MSG_BYTESZ];
-  uint8_t shared_nok[SIKE_MSG_BYTESZ];
-  SHA256_CTX ctx;
-
-  // This is OK as we are only using ephemeral keys in BoringSSL
-  randombytes(shared_nok, SIKE_MSG_BYTESZ);
-
-  // Recover m
-  // Let ciphertext = c0 || c1 - both have fixed sizes
-  // m = F(j-invariant(c0, priv_key)) ^ c1
-  ex_iso_B(priv_key, ciphertext, j);
-
-  sha256_init(&ctx);
-  sha256_update(&ctx, j, sizeof(j));
-  sha256_final(&ctx, secret);
-
-  const uint8_t *c1 = &ciphertext[sizeof(c0)];
-  for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) {
-    temp[i] = c1[i] ^ secret[i];
-  }
-
-  sha256_init(&ctx);
-  sha256_update(&ctx, temp, SIKE_MSG_BYTESZ);
-  sha256_update(&ctx, pub_key, SIKE_PUB_BYTESZ);
-  sha256_final(&ctx, secret);
-
-  // Recover c0 = public key A
-  gen_iso_A(secret, c0);
-  crypto_word_t ok = ct_uint_eq(
-    ct_mem_eq(c0, ciphertext, SIKE_PUB_BYTESZ), 1);
-  for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) {
-    temp[i] = ct_select_8(ok, temp[i], shared_nok[i]);
-  }
-
-  sha256_init(&ctx);
-  sha256_update(&ctx, temp, SIKE_MSG_BYTESZ);
-  sha256_update(&ctx, ciphertext, SIKE_CT_BYTESZ);
-  sha256_final(&ctx, secret);
-
-  // Generate shared secret out_shared_key = SHA256(m||ciphertext)
-  memcpy(out_shared_key, secret, SIKE_SS_BYTESZ);
-}
diff --git a/src/kem/sike/sike-p434-sha256/utils.h b/src/kem/sike/sike-p434-sha256/utils.h
deleted file mode 100644
index 87623d33..00000000
--- a/src/kem/sike/sike-p434-sha256/utils.h
+++ /dev/null
@@ -1,231 +0,0 @@
-/********************************************************************************************
-* SIDH: an efficient supersingular isogeny cryptography library
-*
-* Abstract: internal header file for P434
-*********************************************************************************************/
-
-#ifndef UTILS_H_
-#define UTILS_H_
-
-#include <stddef.h>
-#include <sike/sike.h>
-
-// Conversion macro from number of bits to number of bytes
-#define BITS_TO_BYTES(nbits)      (((nbits)+7)/8)
-
-// Bit size of the field
-#define BITS_FIELD              434
-// Byte size of the field
-#define FIELD_BYTESZ            BITS_TO_BYTES(BITS_FIELD)
-// Number of 64-bit words of a 224-bit element
-#define NBITS_ORDER             224
-#define NWORDS64_ORDER          ((NBITS_ORDER+63)/64)
-// Number of elements in Alice's strategy
-#define A_max                   108
-// Number of elements in Bob's strategy
-#define B_max                   137
-// Word size size
-#define RADIX                   sizeof(crypto_word_t)*8
-// Byte size of a limb
-#define LSZ                     sizeof(crypto_word_t)
-
-#if defined(CPU_64_BIT)
-    typedef uint64_t crypto_word_t;
-    // Number of words of a 434-bit field element
-    #define NWORDS_FIELD    7
-    // Number of "0" digits in the least significant part of p434 + 1
-    #define ZERO_WORDS 3
-    // U64_TO_WORDS expands |x| for a |crypto_word_t| array literal.
-    #define U64_TO_WORDS(x) UINT64_C(x)
-#else
-    typedef uint32_t crypto_word_t;
-    // Number of words of a 434-bit field element
-    #define NWORDS_FIELD    14
-    // Number of "0" digits in the least significant part of p434 + 1
-    #define ZERO_WORDS 6
-    // U64_TO_WORDS expands |x| for a |crypto_word_t| array literal.
-    #define U64_TO_WORDS(x) \
-        (uint32_t)(UINT64_C(x) & 0xffffffff), (uint32_t)(UINT64_C(x) >> 32)
-#endif
-
-// Extended datatype support
-#if !defined(HAS_UINT128)
-    typedef uint64_t uint128_t[2];
-#endif
-
-// The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise
-// Digit multiplication
-#define MUL(multiplier, multiplicand, hi, lo) digit_x_digit((multiplier), (multiplicand), &(lo));
-
-// If mask |x|==0xff.ff set |x| to 1, otherwise 0
-#define M2B(x) ((x)>>(RADIX-1))
-
-// Digit addition with carry
-#define ADDC(carryIn, addend1, addend2, carryOut, sumOut)                   \
-do {                                                                        \
-  crypto_word_t tempReg = (addend1) + (crypto_word_t)(carryIn);             \
-  (sumOut) = (addend2) + tempReg;                                           \
-  (carryOut) = M2B(ct_uint_lt(tempReg, (crypto_word_t)(carryIn)) |  \
-                   ct_uint_lt((sumOut), tempReg));                  \
-} while(0)
-
-// Digit subtraction with borrow
-#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut)           \
-do {                                                                            \
-    crypto_word_t tempReg = (minuend) - (subtrahend);                           \
-    crypto_word_t borrowReg = M2B(ct_uint_lt((minuend), (subtrahend))); \
-    borrowReg |= ((borrowIn) & ct_uint_eq(tempReg, 0));               \
-    (differenceOut) = tempReg - (crypto_word_t)(borrowIn);                      \
-    (borrowOut) = borrowReg;                                                    \
-} while(0)
-
-/* Old GCC 4.9 (jessie) doesn't implement {0} initialization properly,
-   which violates C11 as described in 6.7.9, 21 (similarily C99, 6.7.8).
-   Defines below are used to work around the bug, and provide a way
-   to initialize f2elem_t and point_proj_t structs.
-   Bug has been fixed in GCC6 (debian stretch).
-*/
-#define F2ELM_INIT {{ {0}, {0} }}
-#define POINT_PROJ_INIT {{ F2ELM_INIT, F2ELM_INIT }}
-
-// Datatype for representing 434-bit field elements (448-bit max.)
-// Elements over GF(p434) are encoded in 63 octets in little endian format
-// (i.e., the least significant octet is located in the lowest memory address).
-typedef crypto_word_t felm_t[NWORDS_FIELD];
-
-// An element in F_{p^2}, is composed of two coefficients from F_p, * i.e.
-// Fp2 element = c0 + c1*i in F_{p^2}
-// Datatype for representing double-precision 2x434-bit field elements (448-bit max.)
-// Elements (a+b*i) over GF(p434^2), where a and b are defined over GF(p434), are
-// encoded as {a, b}, with a in the lowest memory portion.
-typedef struct {
-    felm_t c0;
-    felm_t c1;
-} fp2;
-
-// Our F_{p^2} element type is a pointer to the struct.
-typedef fp2 f2elm_t[1];
-
-// Datatype for representing double-precision 2x434-bit
-// field elements in contiguous memory.
-typedef crypto_word_t dfelm_t[2*NWORDS_FIELD];
-
-// Constants used during SIKE computation.
-struct params_t {
-    // Stores a prime
-    const crypto_word_t prime[NWORDS_FIELD];
-    // Stores prime + 1
-    const crypto_word_t prime_p1[NWORDS_FIELD];
-    // Stores prime * 2
-    const crypto_word_t prime_x2[NWORDS_FIELD];
-    // Alice's generator values {XPA0 + XPA1*i, XQA0 + XQA1*i, XRA0 + XRA1*i}
-    // in GF(prime^2), expressed in Montgomery representation
-    const crypto_word_t A_gen[6*NWORDS_FIELD];
-    // Bob's generator values {XPB0 + XPB1*i, XQB0 + XQB1*i, XRB0 + XRB1*i}
-    // in GF(prime^2), expressed in Montgomery representation
-    const crypto_word_t B_gen[6*NWORDS_FIELD];
-    // Montgomery constant mont_R2 = (2^448)^2 mod prime
-    const crypto_word_t mont_R2[NWORDS_FIELD];
-    // Value 'one' in Montgomery representation
-    const crypto_word_t mont_one[NWORDS_FIELD];
-    // Value '6' in Montgomery representation
-    const crypto_word_t mont_six[NWORDS_FIELD];
-    // Fixed parameters for isogeny tree computation
-    const unsigned int A_strat[A_max-1];
-    const unsigned int B_strat[B_max-1];
-};
-
-// Point representation in projective XZ Montgomery coordinates.
-typedef struct {
-    f2elm_t X;
-    f2elm_t Z;
-} point_proj;
-typedef point_proj point_proj_t[1];
-
-// Checks whether two words are equal. Returns 1 in case it is,
-// otherwise 0.
-static inline crypto_word_t ct_uint_eq(crypto_word_t x, crypto_word_t y)
-{
-    // if x==y then t = 0
-    crypto_word_t t = x ^ y;
-    // if x!=y t will have first bit set
-    t = (t >> 1) - t;
-    // return MSB - 1 in case x==y, otherwise 0
-    return ((~t) >> (RADIX-1));
-}
-// Constant time select.
-// if pick == 1 (out = in1)
-// if pick == 0 (out = in2)
-// else out is undefined
-static inline uint8_t ct_select_8(uint8_t flag, uint8_t in1, uint8_t in2) {
-    uint8_t mask = ((int8_t)(flag << 7))>>7;
-    return (in1&mask) | (in2&(~mask));
-}
-
-// Constant time memcmp. Returns 1 if p==q, otherwise 0
-static inline int ct_mem_eq(const void *p, const void *q, size_t n)
-{
-  const uint8_t *pp = (uint8_t*)p, *qq = (uint8_t*)q;
-  uint8_t a = 0;
-
-  while (n--) a |= *pp++ ^ *qq++;
-  return (ct_uint_eq(a, 0));
-}
-
-/*
-// Returns 1 if x<y, otherwise 0
-static inline crypto_word_t ct_uint_lt(crypto_word_t x, crypto_word_t y) {
-  const crypto_word_t t1 = x^y;
-  const crypto_word_t t2 = x - y;
-  const crypto_word_t tt = x ^ (t1 | (t2^y));
-  return (tt >> (RADIX-1));
-}
-*/
-
-/// OZAPTF: coppied from boringssl
-static inline crypto_word_t constant_time_msb_w(crypto_word_t a) {
-  return 0u - (a >> (sizeof(a) * 8 - 1));
-}
-
-// constant_time_lt_w returns 0xff..f if a < b and 0 otherwise.
-static inline crypto_word_t ct_uint_lt(crypto_word_t x, crypto_word_t y)
-{
-  /*
-  const crypto_word_t t1 = x^y;
-  const crypto_word_t t2 = x - y;
-  const crypto_word_t tt = x ^ (t1 | (t2^y));
-  return (tt >> (RADIX-1));
-  */
-  // Consider the two cases of the problem:
-  //   msb(a) == msb(b): a < b iff the MSB of a - b is set.
-  //   msb(a) != msb(b): a < b iff the MSB of b is set.
-  //
-  // If msb(a) == msb(b) then the following evaluates as:
-  //   msb(a^((a^b)|((a-b)^a))) ==
-  //   msb(a^((a-b) ^ a))       ==   (because msb(a^b) == 0)
-  //   msb(a^a^(a-b))           ==   (rearranging)
-  //   msb(a-b)                      (because ∀x. x^x == 0)
-  //
-  // Else, if msb(a) != msb(b) then the following evaluates as:
-  //   msb(a^((a^b)|((a-b)^a))) ==
-  //   msb(a^(𝟙 | ((a-b)^a)))   ==   (because msb(a^b) == 1 and 𝟙
-  //                                  represents a value s.t. msb(𝟙) = 1)
-  //   msb(a^𝟙)                 ==   (because ORing with 1 results in 1)
-  //   msb(b)
-  //
-  //
-  // Here is an SMT-LIB verification of this formula:
-  //
-  // (define-fun lt ((a (_ BitVec 32)) (b (_ BitVec 32))) (_ BitVec 32)
-  //   (bvxor a (bvor (bvxor a b) (bvxor (bvsub a b) a)))
-  // )
-  //
-  // (declare-fun a () (_ BitVec 32))
-  // (declare-fun b () (_ BitVec 32))
-  //
-  // (assert (not (= (= #x00000001 (bvlshr (lt a b) #x0000001f)) (bvult a b))))
-  // (check-sat)
-  // (get-model)
-  return constant_time_msb_w(x^((x^y)|((x-y)^x)));
-}
-#endif // UTILS_H_
diff --git a/src/rustapi/pqc-sys/src/bindings.rs b/src/rustapi/pqc-sys/src/bindings.rs
index 3bcde42d..de93cfad 100644
--- a/src/rustapi/pqc-sys/src/bindings.rs
+++ b/src/rustapi/pqc-sys/src/bindings.rs
@@ -256,7 +256,8 @@ pub const SABER: ::std::os::raw::c_uint = 15;
 pub const HQCRMRS128: ::std::os::raw::c_uint = 16;
 pub const HQCRMRS192: ::std::os::raw::c_uint = 17;
 pub const HQCRMRS256: ::std::os::raw::c_uint = 18;
-pub const PQC_ALG_KEM_MAX: ::std::os::raw::c_uint = 19;
+pub const SIKE434: ::std::os::raw::c_uint = 19;
+pub const PQC_ALG_KEM_MAX: ::std::os::raw::c_uint = 20;
 pub type _bindgen_ty_2 = ::std::os::raw::c_uint;
 #[repr(C)]
 #[derive(Debug, Copy, Clone)]
diff --git a/test/katrunner/src/main.rs b/test/katrunner/src/main.rs
index 6e6dad92..f6ee752c 100644
--- a/test/katrunner/src/main.rs
+++ b/test/katrunner/src/main.rs
@@ -212,6 +212,7 @@ const KATS: &'static[Register] = &[
     REG_KEM!(HQCRMRS128, "round3/hqc/hqc-128/hqc-128_kat.rsp"),
     REG_KEM!(HQCRMRS192, "round3/hqc/hqc-192/hqc-192_kat.rsp"),
     REG_KEM!(HQCRMRS256, "round3/hqc/hqc-256/hqc-256_kat.rsp"),
+    REG_KEM!(SIKE434, "round3/sike/PQCkemKAT_374.rsp"),
 
     // Those are Round2. KATs are very big, so skip testing until it makes sense to do so.
     //REG_SIGN!(RAINBOWVCLASSIC),
-- 
2.39.5


From b2f9d52be5ccf5983561e48ce9ad3a6ae7e2f532 Mon Sep 17 00:00:00 2001
From: Kris Kwiatkowski <contact@amongbytes.com>
Date: Fri, 9 Apr 2021 00:44:47 +0100
Subject: [PATCH 03/12] update sike

---
 src/kem/sike/p434/fp-x86_64.S  | 1080 ++++++++++++++++++++++++++++++++
 src/kem/sike/p434/fp_generic.c |  173 +++++
 src/kem/sike/p434/fpx.c        |  282 +++++++++
 src/kem/sike/p434/fpx.h        |  112 ++++
 src/kem/sike/p434/isogeny.c    |  262 ++++++++
 src/kem/sike/p434/isogeny.h    |   49 ++
 src/kem/sike/p434/params.c     |  128 ++++
 src/kem/sike/p434/sike.c       |  522 +++++++++++++++
 src/kem/sike/p434/utils.h      |  214 +++++++
 9 files changed, 2822 insertions(+)
 create mode 100644 src/kem/sike/p434/fp-x86_64.S
 create mode 100644 src/kem/sike/p434/fp_generic.c
 create mode 100644 src/kem/sike/p434/fpx.c
 create mode 100644 src/kem/sike/p434/fpx.h
 create mode 100644 src/kem/sike/p434/isogeny.c
 create mode 100644 src/kem/sike/p434/isogeny.h
 create mode 100644 src/kem/sike/p434/params.c
 create mode 100644 src/kem/sike/p434/sike.c
 create mode 100644 src/kem/sike/p434/utils.h

diff --git a/src/kem/sike/p434/fp-x86_64.S b/src/kem/sike/p434/fp-x86_64.S
new file mode 100644
index 00000000..f2f32392
--- /dev/null
+++ b/src/kem/sike/p434/fp-x86_64.S
@@ -0,0 +1,1080 @@
+.text
+
+.Lp434x2:
+.quad	0xFFFFFFFFFFFFFFFE
+.quad	0xFFFFFFFFFFFFFFFF
+.quad	0xFB82ECF5C5FFFFFF
+.quad	0xF78CB8F062B15D47
+.quad	0xD9F8BFAD038A40AC
+.quad	0x0004683E4E2EE688
+
+
+.Lp434p1:
+.quad	0xFDC1767AE3000000
+.quad	0x7BC65C783158AEA3
+.quad	0x6CFC5FD681C52056
+.quad	0x0002341F27177344
+
+.globl	sike_fpadd
+.hidden sike_fpadd
+.type	sike_fpadd,@function
+sike_fpadd:
+.cfi_startproc
+	pushq	%r12
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r12, -16
+	pushq	%r13
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r13, -24
+	pushq	%r14
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r14, -32
+
+	xorq	%rax,%rax
+
+	movq	0(%rdi),%r8
+	addq	0(%rsi),%r8
+	movq	8(%rdi),%r9
+	adcq	8(%rsi),%r9
+	movq	16(%rdi),%r10
+	adcq	16(%rsi),%r10
+	movq	24(%rdi),%r11
+	adcq	24(%rsi),%r11
+	movq	32(%rdi),%r12
+	adcq	32(%rsi),%r12
+	movq	40(%rdi),%r13
+	adcq	40(%rsi),%r13
+	movq	48(%rdi),%r14
+	adcq	48(%rsi),%r14
+
+	movq	.Lp434x2(%rip),%rcx
+	subq	%rcx,%r8
+	movq	8+.Lp434x2(%rip),%rcx
+	sbbq	%rcx,%r9
+	sbbq	%rcx,%r10
+	movq	16+.Lp434x2(%rip),%rcx
+	sbbq	%rcx,%r11
+	movq	24+.Lp434x2(%rip),%rcx
+	sbbq	%rcx,%r12
+	movq	32+.Lp434x2(%rip),%rcx
+	sbbq	%rcx,%r13
+	movq	40+.Lp434x2(%rip),%rcx
+	sbbq	%rcx,%r14
+
+	sbbq	$0,%rax
+
+	movq	.Lp434x2(%rip),%rdi
+	andq	%rax,%rdi
+	movq	8+.Lp434x2(%rip),%rsi
+	andq	%rax,%rsi
+	movq	16+.Lp434x2(%rip),%rcx
+	andq	%rax,%rcx
+
+	addq	%rdi,%r8
+	movq	%r8,0(%rdx)
+	adcq	%rsi,%r9
+	movq	%r9,8(%rdx)
+	adcq	%rsi,%r10
+	movq	%r10,16(%rdx)
+	adcq	%rcx,%r11
+	movq	%r11,24(%rdx)
+
+	setc	%cl
+	movq	24+.Lp434x2(%rip),%r8
+	andq	%rax,%r8
+	movq	32+.Lp434x2(%rip),%r9
+	andq	%rax,%r9
+	movq	40+.Lp434x2(%rip),%r10
+	andq	%rax,%r10
+	btq	$0,%rcx
+
+	adcq	%r8,%r12
+	movq	%r12,32(%rdx)
+	adcq	%r9,%r13
+	movq	%r13,40(%rdx)
+	adcq	%r10,%r14
+	movq	%r14,48(%rdx)
+
+	popq	%r14
+.cfi_adjust_cfa_offset	-8
+	popq	%r13
+.cfi_adjust_cfa_offset	-8
+	popq	%r12
+.cfi_adjust_cfa_offset	-8
+	.byte	0xf3,0xc3
+.cfi_endproc
+.globl	sike_cswap_asm
+.hidden sike_cswap_asm
+.type	sike_cswap_asm,@function
+sike_cswap_asm:
+
+
+	movq	%rdx,%xmm3
+
+
+
+
+
+	pshufd	$68,%xmm3,%xmm3
+
+	movdqu	0(%rdi),%xmm0
+	movdqu	0(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,0(%rdi)
+	movdqu	%xmm1,0(%rsi)
+
+	movdqu	16(%rdi),%xmm0
+	movdqu	16(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,16(%rdi)
+	movdqu	%xmm1,16(%rsi)
+
+	movdqu	32(%rdi),%xmm0
+	movdqu	32(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,32(%rdi)
+	movdqu	%xmm1,32(%rsi)
+
+	movdqu	48(%rdi),%xmm0
+	movdqu	48(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,48(%rdi)
+	movdqu	%xmm1,48(%rsi)
+
+	movdqu	64(%rdi),%xmm0
+	movdqu	64(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,64(%rdi)
+	movdqu	%xmm1,64(%rsi)
+
+	movdqu	80(%rdi),%xmm0
+	movdqu	80(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,80(%rdi)
+	movdqu	%xmm1,80(%rsi)
+
+	movdqu	96(%rdi),%xmm0
+	movdqu	96(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,96(%rdi)
+	movdqu	%xmm1,96(%rsi)
+
+	movdqu	112(%rdi),%xmm0
+	movdqu	112(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,112(%rdi)
+	movdqu	%xmm1,112(%rsi)
+
+	movdqu	128(%rdi),%xmm0
+	movdqu	128(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,128(%rdi)
+	movdqu	%xmm1,128(%rsi)
+
+	movdqu	144(%rdi),%xmm0
+	movdqu	144(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,144(%rdi)
+	movdqu	%xmm1,144(%rsi)
+
+	movdqu	160(%rdi),%xmm0
+	movdqu	160(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,160(%rdi)
+	movdqu	%xmm1,160(%rsi)
+
+	movdqu	176(%rdi),%xmm0
+	movdqu	176(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,176(%rdi)
+	movdqu	%xmm1,176(%rsi)
+
+	movdqu	192(%rdi),%xmm0
+	movdqu	192(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,192(%rdi)
+	movdqu	%xmm1,192(%rsi)
+
+	movdqu	208(%rdi),%xmm0
+	movdqu	208(%rsi),%xmm1
+	movdqa	%xmm1,%xmm2
+	pxor	%xmm0,%xmm2
+	pand	%xmm3,%xmm2
+	pxor	%xmm2,%xmm0
+	pxor	%xmm2,%xmm1
+	movdqu	%xmm0,208(%rdi)
+	movdqu	%xmm1,208(%rsi)
+
+	.byte	0xf3,0xc3
+.globl	sike_fpsub
+.hidden sike_fpsub
+.type	sike_fpsub,@function
+sike_fpsub:
+.cfi_startproc
+	pushq	%r12
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r12, -16
+	pushq	%r13
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r13, -24
+	pushq	%r14
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r14, -32
+
+	xorq	%rax,%rax
+
+	movq	0(%rdi),%r8
+	subq	0(%rsi),%r8
+	movq	8(%rdi),%r9
+	sbbq	8(%rsi),%r9
+	movq	16(%rdi),%r10
+	sbbq	16(%rsi),%r10
+	movq	24(%rdi),%r11
+	sbbq	24(%rsi),%r11
+	movq	32(%rdi),%r12
+	sbbq	32(%rsi),%r12
+	movq	40(%rdi),%r13
+	sbbq	40(%rsi),%r13
+	movq	48(%rdi),%r14
+	sbbq	48(%rsi),%r14
+
+	sbbq	$0x0,%rax
+
+	movq	.Lp434x2(%rip),%rdi
+	andq	%rax,%rdi
+	movq	8+.Lp434x2(%rip),%rsi
+	andq	%rax,%rsi
+	movq	16+.Lp434x2(%rip),%rcx
+	andq	%rax,%rcx
+
+	addq	%rdi,%r8
+	movq	%r8,0(%rdx)
+	adcq	%rsi,%r9
+	movq	%r9,8(%rdx)
+	adcq	%rsi,%r10
+	movq	%r10,16(%rdx)
+	adcq	%rcx,%r11
+	movq	%r11,24(%rdx)
+
+	setc	%cl
+	movq	24+.Lp434x2(%rip),%r8
+	andq	%rax,%r8
+	movq	32+.Lp434x2(%rip),%r9
+	andq	%rax,%r9
+	movq	40+.Lp434x2(%rip),%r10
+	andq	%rax,%r10
+	btq	$0x0,%rcx
+
+	adcq	%r8,%r12
+	adcq	%r9,%r13
+	adcq	%r10,%r14
+	movq	%r12,32(%rdx)
+	movq	%r13,40(%rdx)
+	movq	%r14,48(%rdx)
+
+	popq	%r14
+.cfi_adjust_cfa_offset	-8
+	popq	%r13
+.cfi_adjust_cfa_offset	-8
+	popq	%r12
+.cfi_adjust_cfa_offset	-8
+	.byte	0xf3,0xc3
+.cfi_endproc
+.globl	sike_mpadd_asm
+.hidden sike_mpadd_asm
+.type	sike_mpadd_asm,@function
+sike_mpadd_asm:
+.cfi_startproc
+	movq	0(%rdi),%r8;
+	movq	8(%rdi),%r9
+	movq	16(%rdi),%r10
+	movq	24(%rdi),%r11
+	movq	32(%rdi),%rcx
+	addq	0(%rsi),%r8
+	adcq	8(%rsi),%r9
+	adcq	16(%rsi),%r10
+	adcq	24(%rsi),%r11
+	adcq	32(%rsi),%rcx
+	movq	%r8,0(%rdx)
+	movq	%r9,8(%rdx)
+	movq	%r10,16(%rdx)
+	movq	%r11,24(%rdx)
+	movq	%rcx,32(%rdx)
+
+	movq	40(%rdi),%r8
+	movq	48(%rdi),%r9
+	adcq	40(%rsi),%r8
+	adcq	48(%rsi),%r9
+	movq	%r8,40(%rdx)
+	movq	%r9,48(%rdx)
+	.byte	0xf3,0xc3
+.cfi_endproc
+.globl	sike_mpsubx2_asm
+.hidden sike_mpsubx2_asm
+.type	sike_mpsubx2_asm,@function
+sike_mpsubx2_asm:
+.cfi_startproc
+	xorq	%rax,%rax
+
+	movq	0(%rdi),%r8
+	movq	8(%rdi),%r9
+	movq	16(%rdi),%r10
+	movq	24(%rdi),%r11
+	movq	32(%rdi),%rcx
+	subq	0(%rsi),%r8
+	sbbq	8(%rsi),%r9
+	sbbq	16(%rsi),%r10
+	sbbq	24(%rsi),%r11
+	sbbq	32(%rsi),%rcx
+	movq	%r8,0(%rdx)
+	movq	%r9,8(%rdx)
+	movq	%r10,16(%rdx)
+	movq	%r11,24(%rdx)
+	movq	%rcx,32(%rdx)
+
+	movq	40(%rdi),%r8
+	movq	48(%rdi),%r9
+	movq	56(%rdi),%r10
+	movq	64(%rdi),%r11
+	movq	72(%rdi),%rcx
+	sbbq	40(%rsi),%r8
+	sbbq	48(%rsi),%r9
+	sbbq	56(%rsi),%r10
+	sbbq	64(%rsi),%r11
+	sbbq	72(%rsi),%rcx
+	movq	%r8,40(%rdx)
+	movq	%r9,48(%rdx)
+	movq	%r10,56(%rdx)
+	movq	%r11,64(%rdx)
+	movq	%rcx,72(%rdx)
+
+	movq	80(%rdi),%r8
+	movq	88(%rdi),%r9
+	movq	96(%rdi),%r10
+	movq	104(%rdi),%r11
+	sbbq	80(%rsi),%r8
+	sbbq	88(%rsi),%r9
+	sbbq	96(%rsi),%r10
+	sbbq	104(%rsi),%r11
+	sbbq	$0x0,%rax
+	movq	%r8,80(%rdx)
+	movq	%r9,88(%rdx)
+	movq	%r10,96(%rdx)
+	movq	%r11,104(%rdx)
+	.byte	0xf3,0xc3
+.cfi_endproc
+.globl	sike_mpdblsubx2_asm
+.hidden sike_mpdblsubx2_asm
+.type	sike_mpdblsubx2_asm,@function
+sike_mpdblsubx2_asm:
+.cfi_startproc
+	pushq	%r12
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r12, -16
+	pushq	%r13
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r13, -24
+
+	xorq	%rax,%rax
+
+
+	movq	0(%rdx),%r8
+	movq	8(%rdx),%r9
+	movq	16(%rdx),%r10
+	movq	24(%rdx),%r11
+	movq	32(%rdx),%r12
+	movq	40(%rdx),%r13
+	movq	48(%rdx),%rcx
+	subq	0(%rdi),%r8
+	sbbq	8(%rdi),%r9
+	sbbq	16(%rdi),%r10
+	sbbq	24(%rdi),%r11
+	sbbq	32(%rdi),%r12
+	sbbq	40(%rdi),%r13
+	sbbq	48(%rdi),%rcx
+	adcq	$0x0,%rax
+
+
+	subq	0(%rsi),%r8
+	sbbq	8(%rsi),%r9
+	sbbq	16(%rsi),%r10
+	sbbq	24(%rsi),%r11
+	sbbq	32(%rsi),%r12
+	sbbq	40(%rsi),%r13
+	sbbq	48(%rsi),%rcx
+	adcq	$0x0,%rax
+
+
+	movq	%r8,0(%rdx)
+	movq	%r9,8(%rdx)
+	movq	%r10,16(%rdx)
+	movq	%r11,24(%rdx)
+	movq	%r12,32(%rdx)
+	movq	%r13,40(%rdx)
+	movq	%rcx,48(%rdx)
+
+
+	movq	56(%rdx),%r8
+	movq	64(%rdx),%r9
+	movq	72(%rdx),%r10
+	movq	80(%rdx),%r11
+	movq	88(%rdx),%r12
+	movq	96(%rdx),%r13
+	movq	104(%rdx),%rcx
+
+	subq	%rax,%r8
+	sbbq	56(%rdi),%r8
+	sbbq	64(%rdi),%r9
+	sbbq	72(%rdi),%r10
+	sbbq	80(%rdi),%r11
+	sbbq	88(%rdi),%r12
+	sbbq	96(%rdi),%r13
+	sbbq	104(%rdi),%rcx
+
+
+	subq	56(%rsi),%r8
+	sbbq	64(%rsi),%r9
+	sbbq	72(%rsi),%r10
+	sbbq	80(%rsi),%r11
+	sbbq	88(%rsi),%r12
+	sbbq	96(%rsi),%r13
+	sbbq	104(%rsi),%rcx
+
+
+	movq	%r8,56(%rdx)
+	movq	%r9,64(%rdx)
+	movq	%r10,72(%rdx)
+	movq	%r11,80(%rdx)
+	movq	%r12,88(%rdx)
+	movq	%r13,96(%rdx)
+	movq	%rcx,104(%rdx)
+
+	popq	%r13
+.cfi_adjust_cfa_offset	-8
+	popq	%r12
+.cfi_adjust_cfa_offset	-8
+	.byte	0xf3,0xc3
+.cfi_endproc
+
+.globl	sike_fprdc
+.hidden sike_fprdc
+.type	sike_fprdc,@function
+sike_fprdc:
+.cfi_startproc
+	pushq	%r12
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r12, -16
+	pushq	%r13
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r13, -24
+	pushq	%r14
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r14, -32
+	pushq	%r15
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r15, -40
+
+	xorq	%rax,%rax
+	movq	0+0(%rdi),%rdx
+	mulxq	0+.Lp434p1(%rip),%r8,%r9
+	mulxq	8+.Lp434p1(%rip),%r12,%r10
+	mulxq	16+.Lp434p1(%rip),%r13,%r11
+
+	adoxq	%r12,%r9
+	adoxq	%r13,%r10
+
+	mulxq	24+.Lp434p1(%rip),%r13,%r12
+	adoxq	%r13,%r11
+	adoxq	%rax,%r12
+
+	xorq	%rax,%rax
+	movq	0+8(%rdi),%rdx
+	mulxq	0+.Lp434p1(%rip),%r13,%rcx
+	adcxq	%r13,%r9
+	adcxq	%rcx,%r10
+
+	mulxq	8+.Lp434p1(%rip),%rcx,%r13
+	adcxq	%r13,%r11
+	adoxq	%rcx,%r10
+
+	mulxq	16+.Lp434p1(%rip),%rcx,%r13
+	adcxq	%r13,%r12
+	adoxq	%rcx,%r11
+
+	mulxq	24+.Lp434p1(%rip),%rcx,%r13
+	adcxq	%rax,%r13
+	adoxq	%rcx,%r12
+	adoxq	%rax,%r13
+
+	xorq	%rcx,%rcx
+	addq	24(%rdi),%r8
+	adcq	32(%rdi),%r9
+	adcq	40(%rdi),%r10
+	adcq	48(%rdi),%r11
+	adcq	56(%rdi),%r12
+	adcq	64(%rdi),%r13
+	adcq	72(%rdi),%rcx
+	movq	%r8,24(%rdi)
+	movq	%r9,32(%rdi)
+	movq	%r10,40(%rdi)
+	movq	%r11,48(%rdi)
+	movq	%r12,56(%rdi)
+	movq	%r13,64(%rdi)
+	movq	%rcx,72(%rdi)
+	movq	80(%rdi),%r8
+	movq	88(%rdi),%r9
+	movq	96(%rdi),%r10
+	movq	104(%rdi),%r11
+	adcq	$0x0,%r8
+	adcq	$0x0,%r9
+	adcq	$0x0,%r10
+	adcq	$0x0,%r11
+	movq	%r8,80(%rdi)
+	movq	%r9,88(%rdi)
+	movq	%r10,96(%rdi)
+	movq	%r11,104(%rdi)
+
+	xorq	%rax,%rax
+	movq	16+0(%rdi),%rdx
+	mulxq	0+.Lp434p1(%rip),%r8,%r9
+	mulxq	8+.Lp434p1(%rip),%r12,%r10
+	mulxq	16+.Lp434p1(%rip),%r13,%r11
+
+	adoxq	%r12,%r9
+	adoxq	%r13,%r10
+
+	mulxq	24+.Lp434p1(%rip),%r13,%r12
+	adoxq	%r13,%r11
+	adoxq	%rax,%r12
+
+	xorq	%rax,%rax
+	movq	16+8(%rdi),%rdx
+	mulxq	0+.Lp434p1(%rip),%r13,%rcx
+	adcxq	%r13,%r9
+	adcxq	%rcx,%r10
+
+	mulxq	8+.Lp434p1(%rip),%rcx,%r13
+	adcxq	%r13,%r11
+	adoxq	%rcx,%r10
+
+	mulxq	16+.Lp434p1(%rip),%rcx,%r13
+	adcxq	%r13,%r12
+	adoxq	%rcx,%r11
+
+	mulxq	24+.Lp434p1(%rip),%rcx,%r13
+	adcxq	%rax,%r13
+	adoxq	%rcx,%r12
+	adoxq	%rax,%r13
+
+	xorq	%rcx,%rcx
+	addq	40(%rdi),%r8
+	adcq	48(%rdi),%r9
+	adcq	56(%rdi),%r10
+	adcq	64(%rdi),%r11
+	adcq	72(%rdi),%r12
+	adcq	80(%rdi),%r13
+	adcq	88(%rdi),%rcx
+	movq	%r8,40(%rdi)
+	movq	%r9,48(%rdi)
+	movq	%r10,56(%rdi)
+	movq	%r11,64(%rdi)
+	movq	%r12,72(%rdi)
+	movq	%r13,80(%rdi)
+	movq	%rcx,88(%rdi)
+	movq	96(%rdi),%r8
+	movq	104(%rdi),%r9
+	adcq	$0x0,%r8
+	adcq	$0x0,%r9
+	movq	%r8,96(%rdi)
+	movq	%r9,104(%rdi)
+
+	xorq	%rax,%rax
+	movq	32+0(%rdi),%rdx
+	mulxq	0+.Lp434p1(%rip),%r8,%r9
+	mulxq	8+.Lp434p1(%rip),%r12,%r10
+	mulxq	16+.Lp434p1(%rip),%r13,%r11
+
+	adoxq	%r12,%r9
+	adoxq	%r13,%r10
+
+	mulxq	24+.Lp434p1(%rip),%r13,%r12
+	adoxq	%r13,%r11
+	adoxq	%rax,%r12
+
+	xorq	%rax,%rax
+	movq	32+8(%rdi),%rdx
+	mulxq	0+.Lp434p1(%rip),%r13,%rcx
+	adcxq	%r13,%r9
+	adcxq	%rcx,%r10
+
+	mulxq	8+.Lp434p1(%rip),%rcx,%r13
+	adcxq	%r13,%r11
+	adoxq	%rcx,%r10
+
+	mulxq	16+.Lp434p1(%rip),%rcx,%r13
+	adcxq	%r13,%r12
+	adoxq	%rcx,%r11
+
+	mulxq	24+.Lp434p1(%rip),%rcx,%r13
+	adcxq	%rax,%r13
+	adoxq	%rcx,%r12
+	adoxq	%rax,%r13
+
+	xorq	%rcx,%rcx
+	addq	56(%rdi),%r8
+	adcq	64(%rdi),%r9
+	adcq	72(%rdi),%r10
+	adcq	80(%rdi),%r11
+	adcq	88(%rdi),%r12
+	adcq	96(%rdi),%r13
+	adcq	104(%rdi),%rcx
+	movq	%r8,0(%rsi)
+	movq	%r9,8(%rsi)
+	movq	%r10,72(%rdi)
+	movq	%r11,80(%rdi)
+	movq	%r12,88(%rdi)
+	movq	%r13,96(%rdi)
+	movq	%rcx,104(%rdi)
+
+	xorq	%rax,%rax
+	movq	48(%rdi),%rdx
+	mulxq	0+.Lp434p1(%rip),%r8,%r9
+	mulxq	8+.Lp434p1(%rip),%r12,%r10
+	mulxq	16+.Lp434p1(%rip),%r13,%r11
+
+	adoxq	%r12,%r9
+	adoxq	%r13,%r10
+
+	mulxq	24+.Lp434p1(%rip),%r13,%r12
+	adoxq	%r13,%r11
+	adoxq	%rax,%r12
+
+	addq	72(%rdi),%r8
+	adcq	80(%rdi),%r9
+	adcq	88(%rdi),%r10
+	adcq	96(%rdi),%r11
+	adcq	104(%rdi),%r12
+	movq	%r8,16(%rsi)
+	movq	%r9,24(%rsi)
+	movq	%r10,32(%rsi)
+	movq	%r11,40(%rsi)
+	movq	%r12,48(%rsi)
+
+
+	popq	%r15
+.cfi_adjust_cfa_offset	-8
+	popq	%r14
+.cfi_adjust_cfa_offset	-8
+	popq	%r13
+.cfi_adjust_cfa_offset	-8
+	popq	%r12
+.cfi_adjust_cfa_offset	-8
+	.byte	0xf3,0xc3
+.cfi_endproc
+.globl	sike_mpmul
+.hidden sike_mpmul
+.type	sike_mpmul,@function
+sike_mpmul:
+.cfi_startproc
+	pushq	%r12
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r12, -16
+	pushq	%r13
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r13, -24
+	pushq	%r14
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r14, -32
+	pushq	%r15
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r15, -40
+
+
+	movq	%rdx,%rcx
+	xorq	%rax,%rax
+
+
+	movq	0(%rdi),%r8
+	movq	8(%rdi),%r9
+	movq	16(%rdi),%r10
+	movq	24(%rdi),%r11
+
+	pushq	%rbx
+.cfi_adjust_cfa_offset	8
+.cfi_offset	rbx, -48
+	pushq	%rbp
+.cfi_offset	rbp, -56
+.cfi_adjust_cfa_offset	8
+	subq	$96,%rsp
+.cfi_adjust_cfa_offset	96
+
+	addq	32(%rdi),%r8
+	adcq	40(%rdi),%r9
+	adcq	48(%rdi),%r10
+	adcq	$0x0,%r11
+	sbbq	$0x0,%rax
+	movq	%r8,0(%rsp)
+	movq	%r9,8(%rsp)
+	movq	%r10,16(%rsp)
+	movq	%r11,24(%rsp)
+
+
+	xorq	%rbx,%rbx
+	movq	0(%rsi),%r12
+	movq	8(%rsi),%r13
+	movq	16(%rsi),%r14
+	movq	24(%rsi),%r15
+	addq	32(%rsi),%r12
+	adcq	40(%rsi),%r13
+	adcq	48(%rsi),%r14
+	adcq	$0x0,%r15
+	sbbq	$0x0,%rbx
+	movq	%r12,32(%rsp)
+	movq	%r13,40(%rsp)
+	movq	%r14,48(%rsp)
+	movq	%r15,56(%rsp)
+
+
+	andq	%rax,%r12
+	andq	%rax,%r13
+	andq	%rax,%r14
+	andq	%rax,%r15
+
+
+	andq	%rbx,%r8
+	andq	%rbx,%r9
+	andq	%rbx,%r10
+	andq	%rbx,%r11
+
+
+	addq	%r12,%r8
+	adcq	%r13,%r9
+	adcq	%r14,%r10
+	adcq	%r15,%r11
+	movq	%r8,64(%rsp)
+	movq	%r9,72(%rsp)
+	movq	%r10,80(%rsp)
+	movq	%r11,88(%rsp)
+
+
+	movq	0+0(%rsp),%rdx
+	mulxq	32+0(%rsp),%r9,%r8
+	movq	%r9,0+0(%rsp)
+	mulxq	32+8(%rsp),%r10,%r9
+	xorq	%rax,%rax
+	adoxq	%r10,%r8
+	mulxq	32+16(%rsp),%r11,%r10
+	adoxq	%r11,%r9
+	mulxq	32+24(%rsp),%r12,%r11
+	adoxq	%r12,%r10
+
+	movq	0+8(%rsp),%rdx
+	mulxq	32+0(%rsp),%r12,%r13
+	adoxq	%rax,%r11
+	xorq	%rax,%rax
+	mulxq	32+8(%rsp),%r15,%r14
+	adoxq	%r8,%r12
+	movq	%r12,0+8(%rsp)
+	adcxq	%r15,%r13
+	mulxq	32+16(%rsp),%rbx,%r15
+	adcxq	%rbx,%r14
+	adoxq	%r9,%r13
+	mulxq	32+24(%rsp),%rbp,%rbx
+	adcxq	%rbp,%r15
+	adcxq	%rax,%rbx
+	adoxq	%r10,%r14
+
+	movq	0+16(%rsp),%rdx
+	mulxq	32+0(%rsp),%r8,%r9
+	adoxq	%r11,%r15
+	adoxq	%rax,%rbx
+	xorq	%rax,%rax
+	mulxq	32+8(%rsp),%r11,%r10
+	adoxq	%r13,%r8
+	movq	%r8,0+16(%rsp)
+	adcxq	%r11,%r9
+	mulxq	32+16(%rsp),%r12,%r11
+	adcxq	%r12,%r10
+	adoxq	%r14,%r9
+	mulxq	32+24(%rsp),%rbp,%r12
+	adcxq	%rbp,%r11
+	adcxq	%rax,%r12
+
+	adoxq	%r15,%r10
+	adoxq	%rbx,%r11
+	adoxq	%rax,%r12
+
+	movq	0+24(%rsp),%rdx
+	mulxq	32+0(%rsp),%r8,%r13
+	xorq	%rax,%rax
+	mulxq	32+8(%rsp),%r15,%r14
+	adcxq	%r15,%r13
+	adoxq	%r8,%r9
+	mulxq	32+16(%rsp),%rbx,%r15
+	adcxq	%rbx,%r14
+	adoxq	%r13,%r10
+	mulxq	32+24(%rsp),%rbp,%rbx
+	adcxq	%rbp,%r15
+	adcxq	%rax,%rbx
+	adoxq	%r14,%r11
+	adoxq	%r15,%r12
+	adoxq	%rax,%rbx
+	movq	%r9,0+24(%rsp)
+	movq	%r10,0+32(%rsp)
+	movq	%r11,0+40(%rsp)
+	movq	%r12,0+48(%rsp)
+	movq	%rbx,0+56(%rsp)
+
+
+
+	movq	0+0(%rdi),%rdx
+	mulxq	0+0(%rsi),%r9,%r8
+	movq	%r9,0+0(%rcx)
+	mulxq	0+8(%rsi),%r10,%r9
+	xorq	%rax,%rax
+	adoxq	%r10,%r8
+	mulxq	0+16(%rsi),%r11,%r10
+	adoxq	%r11,%r9
+	mulxq	0+24(%rsi),%r12,%r11
+	adoxq	%r12,%r10
+
+	movq	0+8(%rdi),%rdx
+	mulxq	0+0(%rsi),%r12,%r13
+	adoxq	%rax,%r11
+	xorq	%rax,%rax
+	mulxq	0+8(%rsi),%r15,%r14
+	adoxq	%r8,%r12
+	movq	%r12,0+8(%rcx)
+	adcxq	%r15,%r13
+	mulxq	0+16(%rsi),%rbx,%r15
+	adcxq	%rbx,%r14
+	adoxq	%r9,%r13
+	mulxq	0+24(%rsi),%rbp,%rbx
+	adcxq	%rbp,%r15
+	adcxq	%rax,%rbx
+	adoxq	%r10,%r14
+
+	movq	0+16(%rdi),%rdx
+	mulxq	0+0(%rsi),%r8,%r9
+	adoxq	%r11,%r15
+	adoxq	%rax,%rbx
+	xorq	%rax,%rax
+	mulxq	0+8(%rsi),%r11,%r10
+	adoxq	%r13,%r8
+	movq	%r8,0+16(%rcx)
+	adcxq	%r11,%r9
+	mulxq	0+16(%rsi),%r12,%r11
+	adcxq	%r12,%r10
+	adoxq	%r14,%r9
+	mulxq	0+24(%rsi),%rbp,%r12
+	adcxq	%rbp,%r11
+	adcxq	%rax,%r12
+
+	adoxq	%r15,%r10
+	adoxq	%rbx,%r11
+	adoxq	%rax,%r12
+
+	movq	0+24(%rdi),%rdx
+	mulxq	0+0(%rsi),%r8,%r13
+	xorq	%rax,%rax
+	mulxq	0+8(%rsi),%r15,%r14
+	adcxq	%r15,%r13
+	adoxq	%r8,%r9
+	mulxq	0+16(%rsi),%rbx,%r15
+	adcxq	%rbx,%r14
+	adoxq	%r13,%r10
+	mulxq	0+24(%rsi),%rbp,%rbx
+	adcxq	%rbp,%r15
+	adcxq	%rax,%rbx
+	adoxq	%r14,%r11
+	adoxq	%r15,%r12
+	adoxq	%rax,%rbx
+	movq	%r9,0+24(%rcx)
+	movq	%r10,0+32(%rcx)
+	movq	%r11,0+40(%rcx)
+	movq	%r12,0+48(%rcx)
+	movq	%rbx,0+56(%rcx)
+
+
+
+	movq	32+0(%rdi),%rdx
+	mulxq	32+0(%rsi),%r9,%r8
+	movq	%r9,64+0(%rcx)
+	mulxq	32+8(%rsi),%r10,%r9
+	xorq	%rax,%rax
+	adoxq	%r10,%r8
+	mulxq	32+16(%rsi),%r11,%r10
+	adoxq	%r11,%r9
+
+	movq	32+8(%rdi),%rdx
+	mulxq	32+0(%rsi),%r12,%r11
+	adoxq	%rax,%r10
+	xorq	%rax,%rax
+
+	mulxq	32+8(%rsi),%r14,%r13
+	adoxq	%r8,%r12
+	movq	%r12,64+8(%rcx)
+	adcxq	%r14,%r11
+
+	mulxq	32+16(%rsi),%r8,%r14
+	adoxq	%r9,%r11
+	adcxq	%r8,%r13
+	adcxq	%rax,%r14
+	adoxq	%r10,%r13
+
+	movq	32+16(%rdi),%rdx
+	mulxq	32+0(%rsi),%r8,%r9
+	adoxq	%rax,%r14
+	xorq	%rax,%rax
+
+	mulxq	32+8(%rsi),%r10,%r12
+	adoxq	%r11,%r8
+	movq	%r8,64+16(%rcx)
+	adcxq	%r13,%r9
+
+	mulxq	32+16(%rsi),%r11,%r8
+	adcxq	%r14,%r12
+	adcxq	%rax,%r8
+	adoxq	%r10,%r9
+	adoxq	%r12,%r11
+	adoxq	%rax,%r8
+	movq	%r9,64+24(%rcx)
+	movq	%r11,64+32(%rcx)
+	movq	%r8,64+40(%rcx)
+
+
+
+
+	movq	64(%rsp),%r8
+	movq	72(%rsp),%r9
+	movq	80(%rsp),%r10
+	movq	88(%rsp),%r11
+
+	movq	32(%rsp),%rax
+	addq	%rax,%r8
+	movq	40(%rsp),%rax
+	adcq	%rax,%r9
+	movq	48(%rsp),%rax
+	adcq	%rax,%r10
+	movq	56(%rsp),%rax
+	adcq	%rax,%r11
+
+
+	movq	0(%rsp),%r12
+	movq	8(%rsp),%r13
+	movq	16(%rsp),%r14
+	movq	24(%rsp),%r15
+	subq	0(%rcx),%r12
+	sbbq	8(%rcx),%r13
+	sbbq	16(%rcx),%r14
+	sbbq	24(%rcx),%r15
+	sbbq	32(%rcx),%r8
+	sbbq	40(%rcx),%r9
+	sbbq	48(%rcx),%r10
+	sbbq	56(%rcx),%r11
+
+
+	subq	64(%rcx),%r12
+	sbbq	72(%rcx),%r13
+	sbbq	80(%rcx),%r14
+	sbbq	88(%rcx),%r15
+	sbbq	96(%rcx),%r8
+	sbbq	104(%rcx),%r9
+	sbbq	$0x0,%r10
+	sbbq	$0x0,%r11
+
+	addq	32(%rcx),%r12
+	movq	%r12,32(%rcx)
+	adcq	40(%rcx),%r13
+	movq	%r13,40(%rcx)
+	adcq	48(%rcx),%r14
+	movq	%r14,48(%rcx)
+	adcq	56(%rcx),%r15
+	movq	%r15,56(%rcx)
+	adcq	64(%rcx),%r8
+	movq	%r8,64(%rcx)
+	adcq	72(%rcx),%r9
+	movq	%r9,72(%rcx)
+	adcq	80(%rcx),%r10
+	movq	%r10,80(%rcx)
+	adcq	88(%rcx),%r11
+	movq	%r11,88(%rcx)
+	movq	96(%rcx),%r12
+	adcq	$0x0,%r12
+	movq	%r12,96(%rcx)
+	movq	104(%rcx),%r13
+	adcq	$0x0,%r13
+	movq	%r13,104(%rcx)
+
+	addq	$96,%rsp
+.cfi_adjust_cfa_offset	-96
+	popq	%rbp
+.cfi_adjust_cfa_offset	-8
+.cfi_same_value	rbp
+	popq	%rbx
+.cfi_adjust_cfa_offset	-8
+.cfi_same_value	rbx
+
+
+	popq	%r15
+.cfi_adjust_cfa_offset	-8
+	popq	%r14
+.cfi_adjust_cfa_offset	-8
+	popq	%r13
+.cfi_adjust_cfa_offset	-8
+	popq	%r12
+.cfi_adjust_cfa_offset	-8
+	.byte	0xf3,0xc3
+.cfi_endproc
diff --git a/src/kem/sike/p434/fp_generic.c b/src/kem/sike/p434/fp_generic.c
new file mode 100644
index 00000000..02e851cf
--- /dev/null
+++ b/src/kem/sike/p434/fp_generic.c
@@ -0,0 +1,173 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library
+*
+* Abstract: portable modular arithmetic for P503
+*********************************************************************************************/
+#include "utils.h"
+#include "fpx.h"
+
+// Global constants
+extern const struct params_t params;
+
+static void digit_x_digit(const crypto_word_t a, const crypto_word_t b, crypto_word_t* c)
+{ // Digit multiplication, digit * digit -> 2-digit result
+    crypto_word_t al, ah, bl, bh, temp;
+    crypto_word_t albl, albh, ahbl, ahbh, res1, res2, res3, carry;
+    crypto_word_t mask_low = (crypto_word_t)(-1) >> (sizeof(crypto_word_t)*4);
+    crypto_word_t mask_high = (crypto_word_t)(-1) << (sizeof(crypto_word_t)*4);
+
+    al = a & mask_low;                              // Low part
+    ah = a >> (sizeof(crypto_word_t) * 4);          // High part
+    bl = b & mask_low;
+    bh = b >> (sizeof(crypto_word_t) * 4);
+
+    albl = al*bl;
+    albh = al*bh;
+    ahbl = ah*bl;
+    ahbh = ah*bh;
+    c[0] = albl & mask_low;                         // C00
+
+    res1 = albl >> (sizeof(crypto_word_t) * 4);
+    res2 = ahbl & mask_low;
+    res3 = albh & mask_low;
+    temp = res1 + res2 + res3;
+    carry = temp >> (sizeof(crypto_word_t) * 4);
+    c[0] ^= temp << (sizeof(crypto_word_t) * 4);    // C01
+
+    res1 = ahbl >> (sizeof(crypto_word_t) * 4);
+    res2 = albh >> (sizeof(crypto_word_t) * 4);
+    res3 = ahbh & mask_low;
+    temp = res1 + res2 + res3 + carry;
+    c[1] = temp & mask_low;                         // C10
+    carry = temp & mask_high;
+    c[1] ^= (ahbh & mask_high) + carry;             // C11
+}
+
+void sike_fpadd(const felm_t a, const felm_t b, felm_t c)
+{ // Modular addition, c = a+b mod p434.
+  // Inputs: a, b in [0, 2*p434-1]
+  // Output: c in [0, 2*p434-1]
+    unsigned int i, carry = 0;
+    crypto_word_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, a[i], b[i], carry, c[i]);
+    }
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(carry, c[i], params.prime_x2[i], carry, c[i]);
+    }
+    mask = 0 - (crypto_word_t)carry;
+
+    carry = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(carry, c[i], params.prime_x2[i] & mask, carry, c[i]);
+    }
+}
+
+void sike_fpsub(const felm_t a, const felm_t b, felm_t c)
+{ // Modular subtraction, c = a-b mod p434.
+  // Inputs: a, b in [0, 2*p434-1]
+  // Output: c in [0, 2*p434-1]
+    unsigned int i, borrow = 0;
+    crypto_word_t mask;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]);
+    }
+    mask = 0 - (crypto_word_t)borrow;
+
+    borrow = 0;
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        ADDC(borrow, c[i], params.prime_x2[i] & mask, borrow, c[i]);
+    }
+}
+
+void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c)
+{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = NWORDS_FIELD.
+    unsigned int i, j;
+    crypto_word_t t = 0, u = 0, v = 0, UV[2];
+    unsigned int carry = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        for (j = 0; j <= i; j++) {
+            MUL(a[j], b[i-j], UV+1, UV[0]);
+            ADDC(0, UV[0], v, carry, v);
+            ADDC(carry, UV[1], u, carry, u);
+            t += carry;
+        }
+        c[i] = v;
+        v = u;
+        u = t;
+        t = 0;
+    }
+
+    for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) {
+        for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) {
+            MUL(a[j], b[i-j], UV+1, UV[0]);
+            ADDC(0, UV[0], v, carry, v);
+            ADDC(carry, UV[1], u, carry, u);
+            t += carry;
+        }
+        c[i] = v;
+        v = u;
+        u = t;
+        t = 0;
+    }
+    c[2*NWORDS_FIELD-1] = v;
+}
+
+void sike_fprdc(const felm_t ma, felm_t mc)
+{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p434.
+  // mc = ma*R^-1 mod p434x2, where R = 2^448.
+  // If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1].
+  // ma is assumed to be in Montgomery representation.
+    unsigned int i, j, carry, count = ZERO_WORDS;
+    crypto_word_t UV[2], t = 0, u = 0, v = 0;
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        mc[i] = 0;
+    }
+
+    for (i = 0; i < NWORDS_FIELD; i++) {
+        for (j = 0; j < i; j++) {
+            if (j < (i-ZERO_WORDS+1)) {
+                MUL(mc[j], params.prime_p1[i-j], UV+1, UV[0]);
+                ADDC(0, UV[0], v, carry, v);
+                ADDC(carry, UV[1], u, carry, u);
+                t += carry;
+            }
+        }
+        ADDC(0, v, ma[i], carry, v);
+        ADDC(carry, u, 0, carry, u);
+        t += carry;
+        mc[i] = v;
+        v = u;
+        u = t;
+        t = 0;
+    }
+
+    for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) {
+        if (count > 0) {
+            count -= 1;
+        }
+        for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) {
+            if (j < (NWORDS_FIELD-count)) {
+                MUL(mc[j], params.prime_p1[i-j], UV+1, UV[0]);
+                ADDC(0, UV[0], v, carry, v);
+                ADDC(carry, UV[1], u, carry, u);
+                t += carry;
+            }
+        }
+        ADDC(0, v, ma[i], carry, v);
+        ADDC(carry, u, 0, carry, u);
+        t += carry;
+        mc[i-NWORDS_FIELD] = v;
+        v = u;
+        u = t;
+        t = 0;
+    }
+    ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v);
+    mc[NWORDS_FIELD-1] = v;
+}
diff --git a/src/kem/sike/p434/fpx.c b/src/kem/sike/p434/fpx.c
new file mode 100644
index 00000000..30233406
--- /dev/null
+++ b/src/kem/sike/p434/fpx.c
@@ -0,0 +1,282 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library
+*
+* Abstract: core functions over GF(p) and GF(p^2)
+*********************************************************************************************/
+#include <stddef.h>
+#include "utils.h"
+#include "fpx.h"
+
+extern const struct params_t params;
+
+// Multiprecision squaring, c = a^2 mod p.
+static void fpsqr_mont(const felm_t ma, felm_t mc)
+{
+    dfelm_t temp = {0};
+    sike_mpmul(ma, ma, temp);
+    sike_fprdc(temp, mc);
+}
+
+// Chain to compute a^(p-3)/4 using Montgomery arithmetic.
+static void fpinv_chain_mont(felm_t a)
+{
+    unsigned int i, j;
+    felm_t t[31], tt;
+
+    // Precomputed table
+    fpsqr_mont(a, tt);
+    sike_fpmul_mont(a, tt, t[0]);
+    for (i = 0; i <= 29; i++) sike_fpmul_mont(t[i], tt, t[i+1]);
+
+    sike_fpcopy(a, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[5], tt, tt);
+    for (i = 0; i < 10; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[14], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[3], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[23], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[13], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[24], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[7], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[12], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[30], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[1], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[30], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[21], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[2], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[19], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[1], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[24], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[26], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[16], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[10], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[6], tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[0], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[20], tt, tt);
+    for (i = 0; i < 8; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[9], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[25], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[30], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[26], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(a, tt, tt);
+    for (i = 0; i < 7; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[28], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[6], tt, tt);
+    for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[10], tt, tt);
+    for (i = 0; i < 9; i++) fpsqr_mont(tt, tt);
+    sike_fpmul_mont(t[22], tt, tt);
+    for (j = 0; j < 35; j++) {
+        for (i = 0; i < 6; i++) fpsqr_mont(tt, tt);
+        sike_fpmul_mont(t[30], tt, tt);
+    }
+    sike_fpcopy(tt, a);
+}
+
+// Field inversion using Montgomery arithmetic, a = a^(-1)*R mod p.
+static void fpinv_mont(felm_t a)
+{
+    felm_t tt = {0};
+    sike_fpcopy(a, tt);
+    fpinv_chain_mont(tt);
+    fpsqr_mont(tt, tt);
+    fpsqr_mont(tt, tt);
+    sike_fpmul_mont(a, tt, a);
+}
+
+// Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit.
+#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64))
+inline static unsigned int mp_add(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) {
+    uint8_t carry = 0;
+    for (size_t i = 0; i < nwords; i++) {
+        ADDC(carry, a[i], b[i], carry, c[i]);
+    }
+    return carry;
+}
+
+// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit.
+inline static unsigned int mp_sub(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) {
+    uint32_t borrow = 0;
+    for (size_t i = 0; i < nwords; i++) {
+        SUBC(borrow, a[i], b[i], borrow, c[i]);
+    }
+    return borrow;
+}
+#endif
+
+// Multiprecision addition, c = a+b.
+inline static void mp_addfast(const felm_t a, const felm_t b, felm_t c)
+{
+#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64))
+    mp_add(a, b, c, NWORDS_FIELD);
+#else
+    sike_mpadd_asm(a, b, c);
+#endif
+}
+
+// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD.
+// If c < 0 then returns mask = 0xFF..F, else mask = 0x00..0
+inline static crypto_word_t mp_subfast(const dfelm_t a, const dfelm_t b, dfelm_t c) {
+#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64))
+    return (0 - (crypto_word_t)mp_sub(a, b, c, 2*NWORDS_FIELD));
+#else
+    return sike_mpsubx2_asm(a, b, c);
+#endif
+}
+
+// Multiprecision subtraction, c = c-a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD.
+// Inputs should be s.t. c > a and c > b
+inline static void mp_dblsubfast(const dfelm_t a, const dfelm_t b, dfelm_t c) {
+#if defined(ARCH_GENERIC) || (!defined(ARCH_X86_64) && !defined(ARCH_AARCH64))
+    mp_sub(c, a, c, 2*NWORDS_FIELD);
+    mp_sub(c, b, c, 2*NWORDS_FIELD);
+#else
+    sike_mpdblsubx2_asm(a, b, c);
+#endif
+}
+
+// Copy a field element, c = a.
+void sike_fpcopy(const felm_t a, felm_t c) {
+    for (size_t i = 0; i < NWORDS_FIELD; i++) {
+        c[i] = a[i];
+    }
+}
+
+// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod prime, where R=2^768
+void sike_fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc)
+{
+    dfelm_t temp = {0};
+    sike_mpmul(ma, mb, temp);
+    sike_fprdc(temp, mc);
+}
+
+// Conversion from Montgomery representation to standard representation,
+// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1].
+void sike_from_mont(const felm_t ma, felm_t c)
+{
+    felm_t one = {0};
+    one[0] = 1;
+
+    sike_fpmul_mont(ma, one, c);
+    sike_fpcorrection(c);
+}
+
+// GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2).
+// Inputs: a = a0+a1*i, where a0, a1 are in [0, 2*p-1]
+// Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1]
+void sike_fp2sqr_mont(const f2elm_t a, f2elm_t c) {
+    felm_t t1, t2, t3;
+
+    mp_addfast(a->c0, a->c1, t1);                      // t1 = a0+a1
+    sike_fpsub(a->c0, a->c1, t2);                      // t2 = a0-a1
+    mp_addfast(a->c0, a->c0, t3);                      // t3 = 2a0
+    sike_fpmul_mont(t1, t2, c->c0);                    // c0 = (a0+a1)(a0-a1)
+    sike_fpmul_mont(t3, a->c1, c->c1);                 // c1 = 2a0*a1
+}
+
+// Modular negation, a = -a mod p503.
+// Input/output: a in [0, 2*p503-1]
+void sike_fpneg(felm_t a) {
+  uint32_t borrow = 0;
+  for (size_t i = 0; i < NWORDS_FIELD; i++) {
+    SUBC(borrow, params.prime_x2[i], a[i], borrow, a[i]);
+  }
+}
+
+// Modular division by two, c = a/2 mod p503.
+// Input : a in [0, 2*p503-1]
+// Output: c in [0, 2*p503-1]
+void sike_fpdiv2(const felm_t a, felm_t c) {
+  uint32_t carry = 0;
+  crypto_word_t mask;
+
+  mask = 0 - (crypto_word_t)(a[0] & 1);    // If a is odd compute a+p503
+  for (size_t i = 0; i < NWORDS_FIELD; i++) {
+    ADDC(carry, a[i], params.prime[i] & mask, carry, c[i]);
+  }
+
+  // Multiprecision right shift by one.
+  for (size_t i = 0; i < NWORDS_FIELD-1; i++) {
+    c[i] = (c[i] >> 1) ^ (c[i+1] << (RADIX - 1));
+  }
+  c[NWORDS_FIELD-1] >>= 1;
+}
+
+// Modular correction to reduce field element a in [0, 2*p503-1] to [0, p503-1].
+void sike_fpcorrection(felm_t a) {
+  uint32_t borrow = 0;
+  crypto_word_t mask;
+
+  for (size_t i = 0; i < NWORDS_FIELD; i++) {
+    SUBC(borrow, a[i], params.prime[i], borrow, a[i]);
+  }
+  mask = 0 - (crypto_word_t)borrow;
+
+  borrow = 0;
+  for (size_t i = 0; i < NWORDS_FIELD; i++) {
+    ADDC(borrow, a[i], params.prime[i] & mask, borrow, a[i]);
+  }
+}
+
+// GF(p^2) multiplication using Montgomery arithmetic, c = a*b in GF(p^2).
+// Inputs: a = a0+a1*i and b = b0+b1*i, where a0, a1, b0, b1 are in [0, 2*p-1]
+// Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1]
+void sike_fp2mul_mont(const f2elm_t a, const f2elm_t b, f2elm_t c) {
+    felm_t t1, t2;
+    dfelm_t tt1, tt2, tt3;
+    crypto_word_t mask;
+
+    mp_addfast(a->c0, a->c1, t1);                      // t1 = a0+a1
+    mp_addfast(b->c0, b->c1, t2);                      // t2 = b0+b1
+    sike_mpmul(a->c0, b->c0, tt1);                     // tt1 = a0*b0
+    sike_mpmul(a->c1, b->c1, tt2);                     // tt2 = a1*b1
+    sike_mpmul(t1, t2, tt3);                           // tt3 = (a0+a1)*(b0+b1)
+    mp_dblsubfast(tt1, tt2, tt3);                      // tt3 = (a0+a1)*(b0+b1) - a0*b0 - a1*b1
+    mask = mp_subfast(tt1, tt2, tt1);                  // tt1 = a0*b0 - a1*b1. If tt1 < 0 then mask = 0xFF..F, else if tt1 >= 0 then mask = 0x00..0
+
+    for (size_t i = 0; i < NWORDS_FIELD; i++) {
+        t1[i] = params.prime[i] & mask;
+    }
+
+    sike_fprdc(tt3, c->c1);                             // c[1] = (a0+a1)*(b0+b1) - a0*b0 - a1*b1
+    mp_addfast(&tt1[NWORDS_FIELD], t1, &tt1[NWORDS_FIELD]);
+    sike_fprdc(tt1, c->c0);                             // c[0] = a0*b0 - a1*b1
+}
+
+// GF(p^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2).
+void sike_fp2inv_mont(f2elm_t a) {
+    f2elm_t t1;
+
+    fpsqr_mont(a->c0, t1->c0);                         // t10 = a0^2
+    fpsqr_mont(a->c1, t1->c1);                         // t11 = a1^2
+    sike_fpadd(t1->c0, t1->c1, t1->c0);                // t10 = a0^2+a1^2
+    fpinv_mont(t1->c0);                                // t10 = (a0^2+a1^2)^-1
+    sike_fpneg(a->c1);                                 // a = a0-i*a1
+    sike_fpmul_mont(a->c0, t1->c0, a->c0);
+    sike_fpmul_mont(a->c1, t1->c0, a->c1);             // a = (a0-i*a1)*(a0^2+a1^2)^-1
+}
diff --git a/src/kem/sike/p434/fpx.h b/src/kem/sike/p434/fpx.h
new file mode 100644
index 00000000..b9255ac7
--- /dev/null
+++ b/src/kem/sike/p434/fpx.h
@@ -0,0 +1,112 @@
+#ifndef FPX_H_
+#define FPX_H_
+
+#include "utils.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+// Modular addition, c = a+b mod p.
+void sike_fpadd(const felm_t a, const felm_t b, felm_t c);
+// Modular subtraction, c = a-b mod p.
+void sike_fpsub(const felm_t a, const felm_t b, felm_t c);
+// Modular division by two, c = a/2 mod p.
+void sike_fpdiv2(const felm_t a, felm_t c);
+// Modular correction to reduce field element a in [0, 2*p-1] to [0, p-1].
+void sike_fpcorrection(felm_t a);
+// Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords.
+void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c);
+// 443-bit Montgomery reduction, c = a mod p
+void sike_fprdc(const dfelm_t a, felm_t c);
+// Double 2x443-bit multiprecision subtraction, c = c-a-b
+void sike_mpdblsubx2_asm(const felm_t a, const felm_t b, felm_t c);
+// Multiprecision subtraction, c = a-b
+crypto_word_t sike_mpsubx2_asm(const dfelm_t a, const dfelm_t b, dfelm_t c);
+// 443-bit multiprecision addition, c = a+b
+void sike_mpadd_asm(const felm_t a, const felm_t b, felm_t c);
+// Modular negation, a = -a mod p.
+void sike_fpneg(felm_t a);
+// Copy of a field element, c = a
+void sike_fpcopy(const felm_t a, felm_t c);
+// Copy a field element, c = a.
+void sike_fpzero(felm_t a);
+// If option = 0xFF...FF x=y; y=x, otherwise swap doesn't happen. Constant time.
+void sike_cswap_asm(point_proj_t x, point_proj_t y, const crypto_word_t option);
+// Conversion from Montgomery representation to standard representation,
+// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1].
+void sike_from_mont(const felm_t ma, felm_t c);
+// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p443, where R=2^768
+void sike_fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc);
+// GF(p443^2) multiplication using Montgomery arithmetic, c = a*b in GF(p443^2)
+void sike_fp2mul_mont(const f2elm_t a, const f2elm_t b, f2elm_t c);
+// GF(p443^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2)
+void sike_fp2inv_mont(f2elm_t a);
+// GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2).
+void sike_fp2sqr_mont(const f2elm_t a, f2elm_t c);
+// Modular correction, a = a in GF(p^2).
+void sike_fp2correction(f2elm_t a);
+
+#if defined(__cplusplus)
+}  // extern C
+#endif
+
+// GF(p^2) addition, c = a+b in GF(p^2).
+#define sike_fp2add(a, b, c)             \
+do {                                     \
+    sike_fpadd(a->c0, b->c0, c->c0);     \
+    sike_fpadd(a->c1, b->c1, c->c1);     \
+} while(0)
+
+// GF(p^2) subtraction, c = a-b in GF(p^2).
+#define sike_fp2sub(a,b,c)               \
+do {                                     \
+    sike_fpsub(a->c0, b->c0, c->c0);     \
+    sike_fpsub(a->c1, b->c1, c->c1);     \
+} while(0)
+
+// Copy a GF(p^2) element, c = a.
+#define sike_fp2copy(a, c)               \
+do {                                     \
+    sike_fpcopy(a->c0, c->c0);           \
+    sike_fpcopy(a->c1, c->c1);           \
+} while(0)
+
+// GF(p^2) negation, a = -a in GF(p^2).
+#define sike_fp2neg(a)                   \
+do {                                     \
+    sike_fpneg(a->c0);                   \
+    sike_fpneg(a->c1);                   \
+} while(0)
+
+// GF(p^2) division by two, c = a/2  in GF(p^2).
+#define sike_fp2div2(a, c)               \
+do {                                     \
+    sike_fpdiv2(a->c0, c->c0);           \
+    sike_fpdiv2(a->c1, c->c1);           \
+} while(0)
+
+// Modular correction, a = a in GF(p^2).
+#define sike_fp2correction(a)            \
+do {                                     \
+    sike_fpcorrection(a->c0);            \
+    sike_fpcorrection(a->c1);            \
+} while(0)
+
+// Conversion of a GF(p^2) element to Montgomery representation,
+// mc_i = a_i*R^2*R^(-1) = a_i*R in GF(p^2).
+#define sike_to_fp2mont(a, mc)           \
+do {                                     \
+    sike_fpmul_mont(a->c0, params.mont_R2, mc->c0);   \
+    sike_fpmul_mont(a->c1, params.mont_R2, mc->c1);   \
+} while(0)
+
+// Conversion of a GF(p^2) element from Montgomery representation to standard representation,
+// c_i = ma_i*R^(-1) = a_i in GF(p^2).
+#define sike_from_fp2mont(ma, c)         \
+do {                                     \
+    sike_from_mont(ma->c0, c->c0);       \
+    sike_from_mont(ma->c1, c->c1);       \
+} while(0)
+
+#endif // FPX_H_
diff --git a/src/kem/sike/p434/isogeny.c b/src/kem/sike/p434/isogeny.c
new file mode 100644
index 00000000..661410e4
--- /dev/null
+++ b/src/kem/sike/p434/isogeny.c
@@ -0,0 +1,262 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library
+*
+* Abstract: elliptic curve and isogeny functions
+*********************************************************************************************/
+#include <stddef.h>
+#include <string.h>
+#include "utils.h"
+#include "isogeny.h"
+#include "fpx.h"
+
+static void xDBL(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24)
+{ // Doubling of a Montgomery point in projective coordinates (X:Z).
+  // Input: projective Montgomery x-coordinates P = (X1:Z1), where x1=X1/Z1 and Montgomery curve constants A+2C and 4C.
+  // Output: projective Montgomery x-coordinates Q = 2*P = (X2:Z2).
+    f2elm_t t0, t1;
+
+    sike_fp2sub(P->X, P->Z, t0);                         // t0 = X1-Z1
+    sike_fp2add(P->X, P->Z, t1);                         // t1 = X1+Z1
+    sike_fp2sqr_mont(t0, t0);                            // t0 = (X1-Z1)^2
+    sike_fp2sqr_mont(t1, t1);                            // t1 = (X1+Z1)^2
+    sike_fp2mul_mont(C24, t0, Q->Z);                     // Z2 = C24*(X1-Z1)^2
+    sike_fp2mul_mont(t1, Q->Z, Q->X);                    // X2 = C24*(X1-Z1)^2*(X1+Z1)^2
+    sike_fp2sub(t1, t0, t1);                             // t1 = (X1+Z1)^2-(X1-Z1)^2
+    sike_fp2mul_mont(A24plus, t1, t0);                   // t0 = A24plus*[(X1+Z1)^2-(X1-Z1)^2]
+    sike_fp2add(Q->Z, t0, Q->Z);                         // Z2 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2
+    sike_fp2mul_mont(Q->Z, t1, Q->Z);                    // Z2 = [A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2]*[(X1+Z1)^2-(X1-Z1)^2]
+}
+
+void xDBLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24, size_t e)
+{ // Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings.
+  // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A+2C and 4C.
+  // Output: projective Montgomery x-coordinates Q <- (2^e)*P.
+
+    memmove(Q, P, sizeof(*P));
+    for (size_t i = 0; i < e; i++) {
+        xDBL(Q, Q, A24plus, C24);
+    }
+}
+
+void get_4_isog(const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t* coeff)
+{ // Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4.
+  // Input:  projective point of order four P = (X4:Z4).
+  // Output: the 4-isogenous Montgomery curve with projective coefficients A+2C/4C and the 3 coefficients
+  //         that are used to evaluate the isogeny at a point in eval_4_isog().
+
+    sike_fp2sub(P->X, P->Z, coeff[1]);                   // coeff[1] = X4-Z4
+    sike_fp2add(P->X, P->Z, coeff[2]);                   // coeff[2] = X4+Z4
+    sike_fp2sqr_mont(P->Z, coeff[0]);                    // coeff[0] = Z4^2
+    sike_fp2add(coeff[0], coeff[0], coeff[0]);           // coeff[0] = 2*Z4^2
+    sike_fp2sqr_mont(coeff[0], C24);                     // C24 = 4*Z4^4
+    sike_fp2add(coeff[0], coeff[0], coeff[0]);           // coeff[0] = 4*Z4^2
+    sike_fp2sqr_mont(P->X, A24plus);                     // A24plus = X4^2
+    sike_fp2add(A24plus, A24plus, A24plus);              // A24plus = 2*X4^2
+    sike_fp2sqr_mont(A24plus, A24plus);                  // A24plus = 4*X4^4
+}
+
+void eval_4_isog(point_proj_t P, f2elm_t* coeff)
+{ // Evaluates the isogeny at the point (X:Z) in the domain of the isogeny, given a 4-isogeny phi defined
+  // by the 3 coefficients in coeff (computed in the function get_4_isog()).
+  // Inputs: the coefficients defining the isogeny, and the projective point P = (X:Z).
+  // Output: the projective point P = phi(P) = (X:Z) in the codomain.
+    f2elm_t t0, t1;
+
+    sike_fp2add(P->X, P->Z, t0);                         // t0 = X+Z
+    sike_fp2sub(P->X, P->Z, t1);                         // t1 = X-Z
+    sike_fp2mul_mont(t0, coeff[1], P->X);                // X = (X+Z)*coeff[1]
+    sike_fp2mul_mont(t1, coeff[2], P->Z);                // Z = (X-Z)*coeff[2]
+    sike_fp2mul_mont(t0, t1, t0);                        // t0 = (X+Z)*(X-Z)
+    sike_fp2mul_mont(t0, coeff[0], t0);                  // t0 = coeff[0]*(X+Z)*(X-Z)
+    sike_fp2add(P->X, P->Z, t1);                         // t1 = (X-Z)*coeff[2] + (X+Z)*coeff[1]
+    sike_fp2sub(P->X, P->Z, P->Z);                       // Z = (X-Z)*coeff[2] - (X+Z)*coeff[1]
+    sike_fp2sqr_mont(t1, t1);                            // t1 = [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2
+    sike_fp2sqr_mont(P->Z, P->Z);                        // Z = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2
+    sike_fp2add(t1, t0, P->X);                           // X = coeff[0]*(X+Z)*(X-Z) + [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2
+    sike_fp2sub(P->Z, t0, t0);                           // t0 = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 - coeff[0]*(X+Z)*(X-Z)
+    sike_fp2mul_mont(P->X, t1, P->X);                    // Xfinal
+    sike_fp2mul_mont(P->Z, t0, P->Z);                    // Zfinal
+}
+
+
+void xTPL(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus)
+{ // Tripling of a Montgomery point in projective coordinates (X:Z).
+  // Input: projective Montgomery x-coordinates P = (X:Z), where x=X/Z and Montgomery curve constants A24plus = A+2C and A24minus = A-2C.
+  // Output: projective Montgomery x-coordinates Q = 3*P = (X3:Z3).
+    f2elm_t t0, t1, t2, t3, t4, t5, t6;
+
+    sike_fp2sub(P->X, P->Z, t0);                         // t0 = X-Z
+    sike_fp2sqr_mont(t0, t2);                            // t2 = (X-Z)^2
+    sike_fp2add(P->X, P->Z, t1);                         // t1 = X+Z
+    sike_fp2sqr_mont(t1, t3);                            // t3 = (X+Z)^2
+    sike_fp2add(t0, t1, t4);                             // t4 = 2*X
+    sike_fp2sub(t1, t0, t0);                             // t0 = 2*Z
+    sike_fp2sqr_mont(t4, t1);                            // t1 = 4*X^2
+    sike_fp2sub(t1, t3, t1);                             // t1 = 4*X^2 - (X+Z)^2
+    sike_fp2sub(t1, t2, t1);                             // t1 = 4*X^2 - (X+Z)^2 - (X-Z)^2
+    sike_fp2mul_mont(t3, A24plus, t5);                   // t5 = A24plus*(X+Z)^2
+    sike_fp2mul_mont(t3, t5, t3);                        // t3 = A24plus*(X+Z)^3
+    sike_fp2mul_mont(A24minus, t2, t6);                  // t6 = A24minus*(X-Z)^2
+    sike_fp2mul_mont(t2, t6, t2);                        // t2 = A24minus*(X-Z)^3
+    sike_fp2sub(t2, t3, t3);                             // t3 = A24minus*(X-Z)^3 - coeff*(X+Z)^3
+    sike_fp2sub(t5, t6, t2);                             // t2 = A24plus*(X+Z)^2 - A24minus*(X-Z)^2
+    sike_fp2mul_mont(t1, t2, t1);                        // t1 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2]
+    sike_fp2add(t3, t1, t2);                             // t2 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + A24minus*(X-Z)^3 - coeff*(X+Z)^3
+    sike_fp2sqr_mont(t2, t2);                            // t2 = t2^2
+    sike_fp2mul_mont(t4, t2, Q->X);                      // X3 = 2*X*t2
+    sike_fp2sub(t3, t1, t1);                             // t1 = A24minus*(X-Z)^3 - A24plus*(X+Z)^3 - [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2]
+    sike_fp2sqr_mont(t1, t1);                            // t1 = t1^2
+    sike_fp2mul_mont(t0, t1, Q->Z);                      // Z3 = 2*Z*t1
+}
+
+void xTPLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus, size_t e)
+{ // Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings.
+  // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A24plus = A+2C and A24minus = A-2C.
+  // Output: projective Montgomery x-coordinates Q <- (3^e)*P.
+    memmove(Q, P, sizeof(*P));
+    for (size_t i = 0; i < e; i++) {
+        xTPL(Q, Q, A24minus, A24plus);
+    }
+}
+
+void get_3_isog(const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus, f2elm_t* coeff)
+{ // Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3.
+  // Input:  projective point of order three P = (X3:Z3).
+  // Output: the 3-isogenous Montgomery curve with projective coefficient A/C.
+    f2elm_t t0, t1, t2, t3, t4;
+
+    sike_fp2sub(P->X, P->Z, coeff[0]);                   // coeff0 = X-Z
+    sike_fp2sqr_mont(coeff[0], t0);                      // t0 = (X-Z)^2
+    sike_fp2add(P->X, P->Z, coeff[1]);                   // coeff1 = X+Z
+    sike_fp2sqr_mont(coeff[1], t1);                      // t1 = (X+Z)^2
+    sike_fp2add(t0, t1, t2);                             // t2 = (X+Z)^2 + (X-Z)^2
+    sike_fp2add(coeff[0], coeff[1], t3);                 // t3 = 2*X
+    sike_fp2sqr_mont(t3, t3);                            // t3 = 4*X^2
+    sike_fp2sub(t3, t2, t3);                             // t3 = 4*X^2 - (X+Z)^2 - (X-Z)^2
+    sike_fp2add(t1, t3, t2);                             // t2 = 4*X^2 - (X-Z)^2
+    sike_fp2add(t3, t0, t3);                             // t3 = 4*X^2 - (X+Z)^2
+    sike_fp2add(t0, t3, t4);                             // t4 = 4*X^2 - (X+Z)^2 + (X-Z)^2
+    sike_fp2add(t4, t4, t4);                             // t4 = 2(4*X^2 - (X+Z)^2 + (X-Z)^2)
+    sike_fp2add(t1, t4, t4);                             // t4 = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2
+    sike_fp2mul_mont(t2, t4, A24minus);                  // A24minus = [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2]
+    sike_fp2add(t1, t2, t4);                             // t4 = 4*X^2 + (X+Z)^2 - (X-Z)^2
+    sike_fp2add(t4, t4, t4);                             // t4 = 2(4*X^2 + (X+Z)^2 - (X-Z)^2)
+    sike_fp2add(t0, t4, t4);                             // t4 = 8*X^2 + 2*(X+Z)^2 - (X-Z)^2
+    sike_fp2mul_mont(t3, t4, t4);                        // t4 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2]
+    sike_fp2sub(t4, A24minus, t0);                       // t0 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2] - [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2]
+    sike_fp2add(A24minus, t0, A24plus);                  // A24plus = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2
+}
+
+
+void eval_3_isog(point_proj_t Q, f2elm_t* coeff)
+{ // Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and
+  // a point P with 2 coefficients in coeff (computed in the function get_3_isog()).
+  // Inputs: projective points P = (X3:Z3) and Q = (X:Z).
+  // Output: the projective point Q <- phi(Q) = (X3:Z3).
+    f2elm_t t0, t1, t2;
+
+    sike_fp2add(Q->X, Q->Z, t0);                       // t0 = X+Z
+    sike_fp2sub(Q->X, Q->Z, t1);                       // t1 = X-Z
+    sike_fp2mul_mont(t0, coeff[0], t0);                // t0 = coeff0*(X+Z)
+    sike_fp2mul_mont(t1, coeff[1], t1);                // t1 = coeff1*(X-Z)
+    sike_fp2add(t0, t1, t2);                           // t2 = coeff0*(X+Z) + coeff1*(X-Z)
+    sike_fp2sub(t1, t0, t0);                           // t0 = coeff1*(X-Z) - coeff0*(X+Z)
+    sike_fp2sqr_mont(t2, t2);                          // t2 = [coeff0*(X+Z) + coeff1*(X-Z)]^2
+    sike_fp2sqr_mont(t0, t0);                          // t0 = [coeff1*(X-Z) - coeff0*(X+Z)]^2
+    sike_fp2mul_mont(Q->X, t2, Q->X);                  // X3final = X*[coeff0*(X+Z) + coeff1*(X-Z)]^2
+    sike_fp2mul_mont(Q->Z, t0, Q->Z);                  // Z3final = Z*[coeff1*(X-Z) - coeff0*(X+Z)]^2
+}
+
+
+void inv_3_way(f2elm_t z1, f2elm_t z2, f2elm_t z3)
+{ // 3-way simultaneous inversion
+  // Input:  z1,z2,z3
+  // Output: 1/z1,1/z2,1/z3 (override inputs).
+    f2elm_t t0, t1, t2, t3;
+
+    sike_fp2mul_mont(z1, z2, t0);                      // t0 = z1*z2
+    sike_fp2mul_mont(z3, t0, t1);                      // t1 = z1*z2*z3
+    sike_fp2inv_mont(t1);                              // t1 = 1/(z1*z2*z3)
+    sike_fp2mul_mont(z3, t1, t2);                      // t2 = 1/(z1*z2)
+    sike_fp2mul_mont(t2, z2, t3);                      // t3 = 1/z1
+    sike_fp2mul_mont(t2, z1, z2);                      // z2 = 1/z2
+    sike_fp2mul_mont(t0, t1, z3);                      // z3 = 1/z3
+    sike_fp2copy(t3, z1);                              // z1 = 1/z1
+}
+
+
+void get_A(const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A)
+{ // Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A.
+  // Input:  the x-coordinates xP, xQ, and xR of the points P, Q and R.
+  // Output: the coefficient A corresponding to the curve E_A: y^2=x^3+A*x^2+x.
+    f2elm_t t0, t1, one = F2ELM_INIT;
+
+    extern const struct params_t params;
+    sike_fpcopy(params.mont_one, one->c0);
+    sike_fp2add(xP, xQ, t1);                           // t1 = xP+xQ
+    sike_fp2mul_mont(xP, xQ, t0);                      // t0 = xP*xQ
+    sike_fp2mul_mont(xR, t1, A);                       // A = xR*t1
+    sike_fp2add(t0, A, A);                             // A = A+t0
+    sike_fp2mul_mont(t0, xR, t0);                      // t0 = t0*xR
+    sike_fp2sub(A, one, A);                            // A = A-1
+    sike_fp2add(t0, t0, t0);                           // t0 = t0+t0
+    sike_fp2add(t1, xR, t1);                           // t1 = t1+xR
+    sike_fp2add(t0, t0, t0);                           // t0 = t0+t0
+    sike_fp2sqr_mont(A, A);                            // A = A^2
+    sike_fp2inv_mont(t0);                              // t0 = 1/t0
+    sike_fp2mul_mont(A, t0, A);                        // A = A*t0
+    sike_fp2sub(A, t1, A);                             // Afinal = A-t1
+}
+
+
+void j_inv(const f2elm_t A, const f2elm_t C, f2elm_t jinv)
+{ // Computes the j-invariant of a Montgomery curve with projective constant.
+  // Input: A,C in GF(p^2).
+  // Output: j=256*(A^2-3*C^2)^3/(C^4*(A^2-4*C^2)), which is the j-invariant of the Montgomery curve B*y^2=x^3+(A/C)*x^2+x or (equivalently) j-invariant of B'*y^2=C*x^3+A*x^2+C*x.
+    f2elm_t t0, t1;
+
+    sike_fp2sqr_mont(A, jinv);                           // jinv = A^2
+    sike_fp2sqr_mont(C, t1);                             // t1 = C^2
+    sike_fp2add(t1, t1, t0);                             // t0 = t1+t1
+    sike_fp2sub(jinv, t0, t0);                           // t0 = jinv-t0
+    sike_fp2sub(t0, t1, t0);                             // t0 = t0-t1
+    sike_fp2sub(t0, t1, jinv);                           // jinv = t0-t1
+    sike_fp2sqr_mont(t1, t1);                            // t1 = t1^2
+    sike_fp2mul_mont(jinv, t1, jinv);                    // jinv = jinv*t1
+    sike_fp2add(t0, t0, t0);                             // t0 = t0+t0
+    sike_fp2add(t0, t0, t0);                             // t0 = t0+t0
+    sike_fp2sqr_mont(t0, t1);                            // t1 = t0^2
+    sike_fp2mul_mont(t0, t1, t0);                        // t0 = t0*t1
+    sike_fp2add(t0, t0, t0);                             // t0 = t0+t0
+    sike_fp2add(t0, t0, t0);                             // t0 = t0+t0
+    sike_fp2inv_mont(jinv);                              // jinv = 1/jinv
+    sike_fp2mul_mont(jinv, t0, jinv);                    // jinv = t0*jinv
+}
+
+
+void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t xPQ, const f2elm_t A24)
+{ // Simultaneous doubling and differential addition.
+  // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4.
+  // Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP.
+    f2elm_t t0, t1, t2;
+
+    sike_fp2add(P->X, P->Z, t0);                         // t0 = XP+ZP
+    sike_fp2sub(P->X, P->Z, t1);                         // t1 = XP-ZP
+    sike_fp2sqr_mont(t0, P->X);                          // XP = (XP+ZP)^2
+    sike_fp2sub(Q->X, Q->Z, t2);                         // t2 = XQ-ZQ
+    sike_fp2correction(t2);
+    sike_fp2add(Q->X, Q->Z, Q->X);                       // XQ = XQ+ZQ
+    sike_fp2mul_mont(t0, t2, t0);                        // t0 = (XP+ZP)*(XQ-ZQ)
+    sike_fp2sqr_mont(t1, P->Z);                          // ZP = (XP-ZP)^2
+    sike_fp2mul_mont(t1, Q->X, t1);                      // t1 = (XP-ZP)*(XQ+ZQ)
+    sike_fp2sub(P->X, P->Z, t2);                         // t2 = (XP+ZP)^2-(XP-ZP)^2
+    sike_fp2mul_mont(P->X, P->Z, P->X);                  // XP = (XP+ZP)^2*(XP-ZP)^2
+    sike_fp2mul_mont(t2, A24, Q->X);                     // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2]
+    sike_fp2sub(t0, t1, Q->Z);                           // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)
+    sike_fp2add(Q->X, P->Z, P->Z);                       // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2
+    sike_fp2add(t0, t1, Q->X);                           // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)
+    sike_fp2mul_mont(P->Z, t2, P->Z);                    // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2]
+    sike_fp2sqr_mont(Q->Z, Q->Z);                        // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2
+    sike_fp2sqr_mont(Q->X, Q->X);                        // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2
+    sike_fp2mul_mont(Q->Z, xPQ, Q->Z);                   // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2
+}
diff --git a/src/kem/sike/p434/isogeny.h b/src/kem/sike/p434/isogeny.h
new file mode 100644
index 00000000..460c8c66
--- /dev/null
+++ b/src/kem/sike/p434/isogeny.h
@@ -0,0 +1,49 @@
+#ifndef ISOGENY_H_
+#define ISOGENY_H_
+
+// Computes [2^e](X:Z) on Montgomery curve with projective
+// constant via e repeated doublings.
+void xDBLe(
+    const point_proj_t P, point_proj_t Q, const f2elm_t A24plus,
+    const f2elm_t C24, size_t e);
+// Simultaneous doubling and differential addition.
+void xDBLADD(
+    point_proj_t P, point_proj_t Q, const f2elm_t xPQ,
+    const f2elm_t A24);
+// Tripling of a Montgomery point in projective coordinates (X:Z).
+void xTPL(
+    const point_proj_t P, point_proj_t Q, const f2elm_t A24minus,
+    const f2elm_t A24plus);
+// Computes [3^e](X:Z) on Montgomery curve with projective constant
+// via e repeated triplings.
+void xTPLe(
+    const point_proj_t P, point_proj_t Q, const f2elm_t A24minus,
+    const f2elm_t A24plus, size_t e);
+// Given the x-coordinates of P, Q, and R, returns the value A
+// corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A.
+void get_A(
+    const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A);
+// Computes the j-invariant of a Montgomery curve with projective constant.
+void j_inv(
+    const f2elm_t A, const f2elm_t C, f2elm_t jinv);
+// Computes the corresponding 4-isogeny of a projective Montgomery
+// point (X4:Z4) of order 4.
+void get_4_isog(
+    const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t* coeff);
+// Computes the corresponding 3-isogeny of a projective Montgomery
+// point (X3:Z3) of order 3.
+void get_3_isog(
+    const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus,
+    f2elm_t* coeff);
+// Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3)
+// of order 3 on a Montgomery curve and a point P with coefficients given in coeff.
+void eval_3_isog(
+    point_proj_t Q, f2elm_t* coeff);
+// Evaluates the isogeny at the point (X:Z) in the domain of the isogeny.
+void eval_4_isog(
+    point_proj_t P, f2elm_t* coeff);
+// 3-way simultaneous inversion
+void inv_3_way(
+    f2elm_t z1, f2elm_t z2, f2elm_t z3);
+
+#endif // ISOGENY_H_
diff --git a/src/kem/sike/p434/params.c b/src/kem/sike/p434/params.c
new file mode 100644
index 00000000..b13f4c87
--- /dev/null
+++ b/src/kem/sike/p434/params.c
@@ -0,0 +1,128 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library
+*
+* Abstract: supersingular isogeny parameters and generation of functions for P434
+*********************************************************************************************/
+
+#include "utils.h"
+
+// Parameters for isogeny system "SIKE"
+const struct params_t params = {
+    .prime = {
+        U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFFFFFFFFFFFFFFFF),
+        U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFDC1767AE2FFFFFF),
+        U64_TO_WORDS(0x7BC65C783158AEA3), U64_TO_WORDS(0x6CFC5FD681C52056),
+        U64_TO_WORDS(0x0002341F27177344)
+    },
+    .prime_p1 = {
+        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
+        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0xFDC1767AE3000000),
+        U64_TO_WORDS(0x7BC65C783158AEA3), U64_TO_WORDS(0x6CFC5FD681C52056),
+        U64_TO_WORDS(0x0002341F27177344)
+    },
+    .prime_x2 = {
+        U64_TO_WORDS(0xFFFFFFFFFFFFFFFE), U64_TO_WORDS(0xFFFFFFFFFFFFFFFF),
+        U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFB82ECF5C5FFFFFF),
+        U64_TO_WORDS(0xF78CB8F062B15D47), U64_TO_WORDS(0xD9F8BFAD038A40AC),
+        U64_TO_WORDS(0x0004683E4E2EE688)
+    },
+    .A_gen = {
+        U64_TO_WORDS(0x05ADF455C5C345BF), U64_TO_WORDS(0x91935C5CC767AC2B),
+        U64_TO_WORDS(0xAFE4E879951F0257), U64_TO_WORDS(0x70E792DC89FA27B1),
+        U64_TO_WORDS(0xF797F526BB48C8CD), U64_TO_WORDS(0x2181DB6131AF621F),
+        U64_TO_WORDS(0x00000A1C08B1ECC4), // XPA0
+        U64_TO_WORDS(0x74840EB87CDA7788), U64_TO_WORDS(0x2971AA0ECF9F9D0B),
+        U64_TO_WORDS(0xCB5732BDF41715D5), U64_TO_WORDS(0x8CD8E51F7AACFFAA),
+        U64_TO_WORDS(0xA7F424730D7E419F), U64_TO_WORDS(0xD671EB919A179E8C),
+        U64_TO_WORDS(0x0000FFA26C5A924A), // XPA1
+        U64_TO_WORDS(0xFEC6E64588B7273B), U64_TO_WORDS(0xD2A626D74CBBF1C6),
+        U64_TO_WORDS(0xF8F58F07A78098C7), U64_TO_WORDS(0xE23941F470841B03),
+        U64_TO_WORDS(0x1B63EDA2045538DD), U64_TO_WORDS(0x735CFEB0FFD49215),
+        U64_TO_WORDS(0x0001C4CB77542876), // XQA0
+        U64_TO_WORDS(0xADB0F733C17FFDD6), U64_TO_WORDS(0x6AFFBD037DA0A050),
+        U64_TO_WORDS(0x680EC43DB144E02F), U64_TO_WORDS(0x1E2E5D5FF524E374),
+        U64_TO_WORDS(0xE2DDA115260E2995), U64_TO_WORDS(0xA6E4B552E2EDE508),
+        U64_TO_WORDS(0x00018ECCDDF4B53E), // XQA1
+        U64_TO_WORDS(0x01BA4DB518CD6C7D), U64_TO_WORDS(0x2CB0251FE3CC0611),
+        U64_TO_WORDS(0x259B0C6949A9121B), U64_TO_WORDS(0x60E17AC16D2F82AD),
+        U64_TO_WORDS(0x3AA41F1CE175D92D), U64_TO_WORDS(0x413FBE6A9B9BC4F3),
+        U64_TO_WORDS(0x00022A81D8D55643), // XRA0
+        U64_TO_WORDS(0xB8ADBC70FC82E54A), U64_TO_WORDS(0xEF9CDDB0D5FADDED),
+        U64_TO_WORDS(0x5820C734C80096A0), U64_TO_WORDS(0x7799994BAA96E0E4),
+        U64_TO_WORDS(0x044961599E379AF8), U64_TO_WORDS(0xDB2B94FBF09F27E2),
+        U64_TO_WORDS(0x0000B87FC716C0C6)  // XRA1
+    },
+    .B_gen = {
+        U64_TO_WORDS(0x6E5497556EDD48A3), U64_TO_WORDS(0x2A61B501546F1C05),
+        U64_TO_WORDS(0xEB919446D049887D), U64_TO_WORDS(0x5864A4A69D450C4F),
+        U64_TO_WORDS(0xB883F276A6490D2B), U64_TO_WORDS(0x22CC287022D5F5B9),
+        U64_TO_WORDS(0x0001BED4772E551F), // XPB0
+        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
+        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
+        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
+        U64_TO_WORDS(0x0000000000000000), // XPB1
+        U64_TO_WORDS(0xFAE2A3F93D8B6B8E), U64_TO_WORDS(0x494871F51700FE1C),
+        U64_TO_WORDS(0xEF1A94228413C27C), U64_TO_WORDS(0x498FF4A4AF60BD62),
+        U64_TO_WORDS(0xB00AD2A708267E8A), U64_TO_WORDS(0xF4328294E017837F),
+        U64_TO_WORDS(0x000034080181D8AE), // XQB0
+        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
+        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
+        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000),
+        U64_TO_WORDS(0x0000000000000000), // XQB1
+        U64_TO_WORDS(0x283B34FAFEFDC8E4), U64_TO_WORDS(0x9208F44977C3E647),
+        U64_TO_WORDS(0x7DEAE962816F4E9A), U64_TO_WORDS(0x68A2BA8AA262EC9D),
+        U64_TO_WORDS(0x8176F112EA43F45B), U64_TO_WORDS(0x02106D022634F504),
+        U64_TO_WORDS(0x00007E8A50F02E37), // XRB0
+        U64_TO_WORDS(0xB378B7C1DA22CCB1), U64_TO_WORDS(0x6D089C99AD1D9230),
+        U64_TO_WORDS(0xEBE15711813E2369), U64_TO_WORDS(0x2B35A68239D48A53),
+        U64_TO_WORDS(0x445F6FD138407C93), U64_TO_WORDS(0xBEF93B29A3F6B54B),
+        U64_TO_WORDS(0x000173FA910377D3)  // XRB1
+    },
+    .mont_R2 = {
+        U64_TO_WORDS(0x28E55B65DCD69B30), U64_TO_WORDS(0xACEC7367768798C2),
+        U64_TO_WORDS(0xAB27973F8311688D), U64_TO_WORDS(0x175CC6AF8D6C7C0B),
+        U64_TO_WORDS(0xABCD92BF2DDE347E), U64_TO_WORDS(0x69E16A61C7686D9A),
+        U64_TO_WORDS(0x000025A89BCDD12A)
+    },
+    .mont_one = {
+        U64_TO_WORDS(0x000000000000742C), U64_TO_WORDS(0x0000000000000000),
+        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0xB90FF404FC000000),
+        U64_TO_WORDS(0xD801A4FB559FACD4), U64_TO_WORDS(0xE93254545F77410C),
+        U64_TO_WORDS(0x0000ECEEA7BD2EDA)
+    },
+    .mont_six = {
+        U64_TO_WORDS(0x000000000002B90A), U64_TO_WORDS(0x0000000000000000),
+        U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x5ADCCB2822000000),
+        U64_TO_WORDS(0x187D24F39F0CAFB4), U64_TO_WORDS(0x9D353A4D394145A0),
+        U64_TO_WORDS(0x00012559A0403298)
+    },
+    .A_strat = {
+        0x30, 0x1C, 0x10, 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01,
+        0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x08, 0x04,
+        0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01,
+        0x02, 0x01, 0x01, 0x0D, 0x07, 0x04, 0x02, 0x01, 0x01, 0x02,
+        0x01, 0x01, 0x03, 0x02, 0x01, 0x01, 0x01, 0x01, 0x05, 0x04,
+        0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01,
+        0x15, 0x0C, 0x07, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01,
+        0x03, 0x02, 0x01, 0x01, 0x01, 0x01, 0x05, 0x03, 0x02, 0x01,
+        0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x09, 0x05, 0x03,
+        0x02, 0x01, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x04,
+        0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01
+    },
+    .B_strat = {
+        0x42, 0x21, 0x11, 0x09, 0x05, 0x03, 0x02, 0x01, 0x01, 0x01,
+        0x01, 0x02, 0x01, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x01,
+        0x02, 0x01, 0x01, 0x08, 0x04, 0x02, 0x01, 0x01, 0x01, 0x02,
+        0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x10,
+        0x08, 0x04, 0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04,
+        0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x08, 0x04, 0x02, 0x01,
+        0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01,
+        0x01, 0x20, 0x10, 0x08, 0x04, 0x03, 0x01, 0x01, 0x01, 0x01,
+        0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01,
+        0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, 0x02,
+        0x01, 0x01, 0x02, 0x01, 0x01, 0x10, 0x08, 0x04, 0x02, 0x01,
+        0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01,
+        0x01, 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04,
+        0x02, 0x01, 0x01, 0x02, 0x01, 0x01
+    }
+};
diff --git a/src/kem/sike/p434/sike.c b/src/kem/sike/p434/sike.c
new file mode 100644
index 00000000..f52fe5c4
--- /dev/null
+++ b/src/kem/sike/p434/sike.c
@@ -0,0 +1,522 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library
+*
+* Abstract: supersingular isogeny key encapsulation (SIKE) protocol
+*********************************************************************************************/
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <randombytes.h>
+#include <common/fips202.h>
+
+#include "utils.h"
+#include "isogeny.h"
+#include "fpx.h"
+
+extern const struct params_t params;
+
+// SIDH_JINV_BYTESZ is a number of bytes used for encoding j-invariant.
+#define SIDH_JINV_BYTESZ    110U
+// SIDH_PRV_A_BITSZ is a number of bits of SIDH private key (2-isogeny)
+#define SIDH_PRV_A_BITSZ    216U
+// SIDH_PRV_A_BITSZ is a number of bits of SIDH private key (3-isogeny)
+#define SIDH_PRV_B_BITSZ    217U
+// MAX_INT_POINTS_ALICE is a number of points used in 2-isogeny tree computation
+#define MAX_INT_POINTS_ALICE    7U
+// MAX_INT_POINTS_ALICE is a number of points used in 3-isogeny tree computation
+#define MAX_INT_POINTS_BOB      8U
+
+// Swap points.
+// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P
+#if !defined(ARCH_X86_64) || defined(ARCH_GENERIC)
+static void sike_cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option)
+{
+    crypto_word_t temp;
+    for (size_t i = 0; i < NWORDS_FIELD; i++) {
+        temp = option & (P->X->c0[i] ^ Q->X->c0[i]);
+        P->X->c0[i] = temp ^ P->X->c0[i];
+        Q->X->c0[i] = temp ^ Q->X->c0[i];
+        temp = option & (P->Z->c0[i] ^ Q->Z->c0[i]);
+        P->Z->c0[i] = temp ^ P->Z->c0[i];
+        Q->Z->c0[i] = temp ^ Q->Z->c0[i];
+        temp = option & (P->X->c1[i] ^ Q->X->c1[i]);
+        P->X->c1[i] = temp ^ P->X->c1[i];
+        Q->X->c1[i] = temp ^ Q->X->c1[i];
+        temp = option & (P->Z->c1[i] ^ Q->Z->c1[i]);
+        P->Z->c1[i] = temp ^ P->Z->c1[i];
+        Q->Z->c1[i] = temp ^ Q->Z->c1[i];
+    }
+}
+#endif
+
+// Swap points.
+// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P
+static inline void sike_fp2cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option)
+{
+#if defined(ARCH_X86_64) && !defined(ARCH_GENERIC)
+    sike_cswap_asm(P, Q, option);
+#else
+    sike_cswap(P, Q, option);
+#endif
+}
+
+static void ladder3Pt(
+    const f2elm_t xP, const f2elm_t xQ, const f2elm_t xPQ, const uint8_t* m,
+    int is_A, point_proj_t R, const f2elm_t A) {
+    point_proj_t R0 = POINT_PROJ_INIT, R2 = POINT_PROJ_INIT;
+    f2elm_t A24 = F2ELM_INIT;
+    crypto_word_t mask;
+    int bit, swap, prevbit = 0;
+
+    const size_t nbits = is_A?SIDH_PRV_A_BITSZ:SIDH_PRV_B_BITSZ;
+
+    // Initializing constant
+    sike_fpcopy(params.mont_one, A24[0].c0);
+    sike_fp2add(A24, A24, A24);
+    sike_fp2add(A, A24, A24);
+    sike_fp2div2(A24, A24);
+    sike_fp2div2(A24, A24); // A24 = (A+2)/4
+
+    // Initializing points
+    sike_fp2copy(xQ, R0->X);
+    sike_fpcopy(params.mont_one, R0->Z[0].c0);
+    sike_fp2copy(xPQ, R2->X);
+    sike_fpcopy(params.mont_one, R2->Z[0].c0);
+    sike_fp2copy(xP, R->X);
+    sike_fpcopy(params.mont_one, R->Z[0].c0);
+    memset(R->Z->c1, 0, sizeof(R->Z->c1));
+
+    // Main loop
+    for (size_t i = 0; i < nbits; i++) {
+        bit = (m[i >> 3] >> (i & 7)) & 1;
+        swap = bit ^ prevbit;
+        prevbit = bit;
+        mask = 0 - (crypto_word_t)swap;
+
+        sike_fp2cswap(R, R2, mask);
+        xDBLADD(R0, R2, R->X, A24);
+        sike_fp2mul_mont(R2->X, R->Z, R2->X);
+    }
+    swap = 0 ^ prevbit;
+    mask = 0 - (crypto_word_t)swap;
+    sike_fp2cswap(R, R2, mask);
+}
+
+// Initialization of basis points
+static inline void sike_init_basis(const crypto_word_t *gen, f2elm_t XP, f2elm_t XQ, f2elm_t XR) {
+    sike_fpcopy(gen,                  XP->c0);
+    sike_fpcopy(gen +   NWORDS_FIELD, XP->c1);
+    sike_fpcopy(gen + 2*NWORDS_FIELD, XQ->c0);
+    sike_fpcopy(gen + 3*NWORDS_FIELD, XQ->c1);
+    sike_fpcopy(gen + 4*NWORDS_FIELD, XR->c0);
+    sike_fpcopy(gen + 5*NWORDS_FIELD, XR->c1);
+}
+
+// Conversion of GF(p^2) element from Montgomery to standard representation.
+static inline void sike_fp2_encode(const f2elm_t x, uint8_t *enc) {
+    f2elm_t t;
+    sike_from_fp2mont(x, t);
+
+    // convert to bytes in little endian form
+    for (size_t i=0; i<FIELD_BYTESZ; i++) {
+        enc[i+           0] = (t[0].c0[i/LSZ] >> (8*(i%LSZ))) & 0xFF;
+        enc[i+FIELD_BYTESZ] = (t[0].c1[i/LSZ] >> (8*(i%LSZ))) & 0xFF;
+    }
+}
+
+// Parse byte sequence back into GF(p^2) element, and conversion to Montgomery representation.
+// Elements over GF(p503) are encoded in 63 octets in little endian format
+// (i.e., the least significant octet is located in the lowest memory address).
+static inline void fp2_decode(const uint8_t *enc, f2elm_t t) {
+    memset(t[0].c0, 0, sizeof(t[0].c0));
+    memset(t[0].c1, 0, sizeof(t[0].c1));
+    // convert bytes in little endian form to f2elm_t
+    for (size_t i = 0; i < FIELD_BYTESZ; i++) {
+        t[0].c0[i/LSZ] |= ((crypto_word_t)enc[i+           0]) << (8*(i%LSZ));
+        t[0].c1[i/LSZ] |= ((crypto_word_t)enc[i+FIELD_BYTESZ]) << (8*(i%LSZ));
+    }
+    sike_to_fp2mont(t, t);
+}
+
+// Alice's ephemeral public key generation
+// Input:  a private key prA in the range [0, 2^250 - 1], stored in 32 bytes.
+// Output: the public key pkA consisting of 3 GF(p503^2) elements encoded in 378 bytes.
+static void gen_iso_A(const uint8_t* skA, uint8_t* pkA)
+{
+    point_proj_t R, pts[MAX_INT_POINTS_ALICE];
+    point_proj_t phiP = POINT_PROJ_INIT;
+    point_proj_t phiQ = POINT_PROJ_INIT;
+    point_proj_t phiR = POINT_PROJ_INIT;
+    f2elm_t XPA, XQA, XRA, coeff[3];
+    f2elm_t A24plus = F2ELM_INIT;
+    f2elm_t C24 = F2ELM_INIT;
+    f2elm_t A = F2ELM_INIT;
+    unsigned int m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0;
+
+    // Initialize basis points
+    sike_init_basis(params.A_gen, XPA, XQA, XRA);
+    sike_init_basis(params.B_gen, phiP->X, phiQ->X, phiR->X);
+    sike_fpcopy(params.mont_one, (phiP->Z)->c0);
+    sike_fpcopy(params.mont_one, (phiQ->Z)->c0);
+    sike_fpcopy(params.mont_one, (phiR->Z)->c0);
+
+    // Initialize constants: A24plus = A+2C, C24 = 4C, where A=6, C=1
+    sike_fpcopy(params.mont_one, A24plus->c0);
+    sike_fp2add(A24plus, A24plus, A24plus);
+    sike_fp2add(A24plus, A24plus, C24);
+    sike_fp2add(A24plus, C24, A);
+    sike_fp2add(C24, C24, A24plus);
+
+    // Retrieve kernel point
+    ladder3Pt(XPA, XQA, XRA, skA, 1, R, A);
+
+    // Traverse tree
+    index = 0;
+    for (size_t row = 1; row < A_max; row++) {
+        while (index < A_max-row) {
+            sike_fp2copy(R->X, pts[npts]->X);
+            sike_fp2copy(R->Z, pts[npts]->Z);
+            pts_index[npts++] = index;
+            m = params.A_strat[ii++];
+            xDBLe(R, R, A24plus, C24, (2*m));
+            index += m;
+        }
+        get_4_isog(R, A24plus, C24, coeff);
+
+        for (size_t i = 0; i < npts; i++) {
+            eval_4_isog(pts[i], coeff);
+        }
+        eval_4_isog(phiP, coeff);
+        eval_4_isog(phiQ, coeff);
+        eval_4_isog(phiR, coeff);
+
+        sike_fp2copy(pts[npts-1]->X, R->X);
+        sike_fp2copy(pts[npts-1]->Z, R->Z);
+        index = pts_index[npts-1];
+        npts -= 1;
+    }
+
+    get_4_isog(R, A24plus, C24, coeff);
+    eval_4_isog(phiP, coeff);
+    eval_4_isog(phiQ, coeff);
+    eval_4_isog(phiR, coeff);
+
+    inv_3_way(phiP->Z, phiQ->Z, phiR->Z);
+    sike_fp2mul_mont(phiP->X, phiP->Z, phiP->X);
+    sike_fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X);
+    sike_fp2mul_mont(phiR->X, phiR->Z, phiR->X);
+
+    // Format public key
+    sike_fp2_encode(phiP->X, pkA);
+    sike_fp2_encode(phiQ->X, pkA + SIDH_JINV_BYTESZ);
+    sike_fp2_encode(phiR->X, pkA + 2*SIDH_JINV_BYTESZ);
+}
+
+// Bob's ephemeral key-pair generation
+// It produces a private key skB and computes the public key pkB.
+// The private key is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes.
+// The public key consists of 3 GF(p503^2) elements encoded in 378 bytes.
+static void gen_iso_B(const uint8_t* skB, uint8_t* pkB)
+{
+    point_proj_t R, pts[MAX_INT_POINTS_BOB];
+    point_proj_t phiP = POINT_PROJ_INIT;
+    point_proj_t phiQ = POINT_PROJ_INIT;
+    point_proj_t phiR = POINT_PROJ_INIT;
+    f2elm_t XPB, XQB, XRB, coeff[3];
+    f2elm_t A24plus = F2ELM_INIT;
+    f2elm_t A24minus = F2ELM_INIT;
+    f2elm_t A = F2ELM_INIT;
+    unsigned int m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0;
+
+    // Initialize basis points
+    sike_init_basis(params.B_gen, XPB, XQB, XRB);
+    sike_init_basis(params.A_gen, phiP->X, phiQ->X, phiR->X);
+    sike_fpcopy(params.mont_one, (phiP->Z)->c0);
+    sike_fpcopy(params.mont_one, (phiQ->Z)->c0);
+    sike_fpcopy(params.mont_one, (phiR->Z)->c0);
+
+    // Initialize constants: A24minus = A-2C, A24plus = A+2C, where A=6, C=1
+    sike_fpcopy(params.mont_one, A24plus->c0);
+    sike_fp2add(A24plus, A24plus, A24plus);
+    sike_fp2add(A24plus, A24plus, A24minus);
+    sike_fp2add(A24plus, A24minus, A);
+    sike_fp2add(A24minus, A24minus, A24plus);
+
+    // Retrieve kernel point
+    ladder3Pt(XPB, XQB, XRB, skB, 0, R, A);
+
+    // Traverse tree
+    index = 0;
+    for (size_t row = 1; row < B_max; row++) {
+        while (index < B_max-row) {
+            sike_fp2copy(R->X, pts[npts]->X);
+            sike_fp2copy(R->Z, pts[npts]->Z);
+            pts_index[npts++] = index;
+            m = params.B_strat[ii++];
+            xTPLe(R, R, A24minus, A24plus, m);
+            index += m;
+        }
+        get_3_isog(R, A24minus, A24plus, coeff);
+
+        for (size_t i = 0; i < npts; i++) {
+            eval_3_isog(pts[i], coeff);
+        }
+        eval_3_isog(phiP, coeff);
+        eval_3_isog(phiQ, coeff);
+        eval_3_isog(phiR, coeff);
+
+        sike_fp2copy(pts[npts-1]->X, R->X);
+        sike_fp2copy(pts[npts-1]->Z, R->Z);
+        index = pts_index[npts-1];
+        npts -= 1;
+    }
+
+    get_3_isog(R, A24minus, A24plus, coeff);
+    eval_3_isog(phiP, coeff);
+    eval_3_isog(phiQ, coeff);
+    eval_3_isog(phiR, coeff);
+
+    inv_3_way(phiP->Z, phiQ->Z, phiR->Z);
+    sike_fp2mul_mont(phiP->X, phiP->Z, phiP->X);
+    sike_fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X);
+    sike_fp2mul_mont(phiR->X, phiR->Z, phiR->X);
+
+    // Format public key
+    sike_fp2_encode(phiP->X, pkB);
+    sike_fp2_encode(phiQ->X, pkB + SIDH_JINV_BYTESZ);
+    sike_fp2_encode(phiR->X, pkB + 2*SIDH_JINV_BYTESZ);
+}
+
+// Alice's ephemeral shared secret computation
+// It produces a shared secret key ssA using her secret key skA and Bob's public key pkB
+// Inputs: Alice's skA is an integer in the range [0, 2^250 - 1], stored in 32 bytes.
+//         Bob's pkB consists of 3 GF(p503^2) elements encoded in 378 bytes.
+// Output: a shared secret ssA that consists of one element in GF(p503^2) encoded in 126 bytes.
+static void ex_iso_A(const uint8_t* skA, const uint8_t* pkB, uint8_t* ssA)
+{
+    point_proj_t R, pts[MAX_INT_POINTS_ALICE];
+    f2elm_t coeff[3], PKB[3], jinv;
+    f2elm_t A24plus = F2ELM_INIT;
+    f2elm_t C24 = F2ELM_INIT;
+    f2elm_t A = F2ELM_INIT;
+    unsigned int m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0;
+
+    // Initialize images of Bob's basis
+    fp2_decode(pkB, PKB[0]);
+    fp2_decode(pkB + SIDH_JINV_BYTESZ, PKB[1]);
+    fp2_decode(pkB + 2*SIDH_JINV_BYTESZ, PKB[2]);
+
+    // Initialize constants
+    get_A(PKB[0], PKB[1], PKB[2], A);
+    sike_fpadd(params.mont_one, params.mont_one, C24->c0);
+    sike_fp2add(A, C24, A24plus);
+    sike_fpadd(C24->c0, C24->c0, C24->c0);
+
+    // Retrieve kernel point
+    ladder3Pt(PKB[0], PKB[1], PKB[2], skA, 1, R, A);
+
+    // Traverse tree
+    index = 0;
+    for (size_t row = 1; row < A_max; row++) {
+        while (index < A_max-row) {
+            sike_fp2copy(R->X, pts[npts]->X);
+            sike_fp2copy(R->Z, pts[npts]->Z);
+            pts_index[npts++] = index;
+            m = params.A_strat[ii++];
+            xDBLe(R, R, A24plus, C24, (2*m));
+            index += m;
+        }
+        get_4_isog(R, A24plus, C24, coeff);
+
+        for (size_t i = 0; i < npts; i++) {
+            eval_4_isog(pts[i], coeff);
+        }
+
+        sike_fp2copy(pts[npts-1]->X, R->X);
+        sike_fp2copy(pts[npts-1]->Z, R->Z);
+        index = pts_index[npts-1];
+        npts -= 1;
+    }
+
+    get_4_isog(R, A24plus, C24, coeff);
+    sike_fp2add(A24plus, A24plus, A24plus);
+    sike_fp2sub(A24plus, C24, A24plus);
+    sike_fp2add(A24plus, A24plus, A24plus);
+    j_inv(A24plus, C24, jinv);
+    sike_fp2_encode(jinv, ssA);
+}
+
+// Bob's ephemeral shared secret computation
+// It produces a shared secret key ssB using his secret key skB and Alice's public key pkA
+// Inputs: Bob's skB is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes.
+//         Alice's pkA consists of 3 GF(p503^2) elements encoded in 378 bytes.
+// Output: a shared secret ssB that consists of one element in GF(p503^2) encoded in 126 bytes.
+static void ex_iso_B(const uint8_t* skB, const uint8_t* pkA, uint8_t* ssB)
+{
+    point_proj_t R, pts[MAX_INT_POINTS_BOB];
+    f2elm_t coeff[3], PKB[3], jinv;
+    f2elm_t A24plus = F2ELM_INIT;
+    f2elm_t A24minus = F2ELM_INIT;
+    f2elm_t A = F2ELM_INIT;
+    unsigned int m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0;
+
+    // Initialize images of Alice's basis
+    fp2_decode(pkA, PKB[0]);
+    fp2_decode(pkA + SIDH_JINV_BYTESZ, PKB[1]);
+    fp2_decode(pkA + 2*SIDH_JINV_BYTESZ, PKB[2]);
+
+    // Initialize constants
+    get_A(PKB[0], PKB[1], PKB[2], A);
+    sike_fpadd(params.mont_one, params.mont_one, A24minus->c0);
+    sike_fp2add(A, A24minus, A24plus);
+    sike_fp2sub(A, A24minus, A24minus);
+
+    // Retrieve kernel point
+    ladder3Pt(PKB[0], PKB[1], PKB[2], skB, 0, R, A);
+
+    // Traverse tree
+    index = 0;
+    for (size_t row = 1; row < B_max; row++) {
+        while (index < B_max-row) {
+            sike_fp2copy(R->X, pts[npts]->X);
+            sike_fp2copy(R->Z, pts[npts]->Z);
+            pts_index[npts++] = index;
+            m = params.B_strat[ii++];
+            xTPLe(R, R, A24minus, A24plus, m);
+            index += m;
+        }
+        get_3_isog(R, A24minus, A24plus, coeff);
+
+        for (size_t i = 0; i < npts; i++) {
+            eval_3_isog(pts[i], coeff);
+        }
+
+        sike_fp2copy(pts[npts-1]->X, R->X);
+        sike_fp2copy(pts[npts-1]->Z, R->Z);
+        index = pts_index[npts-1];
+        npts -= 1;
+    }
+
+    get_3_isog(R, A24minus, A24plus, coeff);
+    sike_fp2add(A24plus, A24minus, A);
+    sike_fp2add(A, A, A);
+    sike_fp2sub(A24plus, A24minus, A24plus);
+    j_inv(A, A24plus, jinv);
+    sike_fp2_encode(jinv, ssB);
+}
+
+int SIKE_keypair(uint8_t out_priv[SIKE_PRV_BYTESZ],
+                 uint8_t out_pub[SIKE_PUB_BYTESZ]) {
+  // Calculate private key for Alice. Needs to be in range [0, 2^0xFA - 1] and <
+  // 253 bits
+  randombytes(out_priv, SIKE_PRV_BYTESZ);
+  out_priv[31] = (out_priv[31] | 0x01) & 0x03;
+
+  gen_iso_B(out_priv, out_pub);
+  return 1;
+}
+
+void SIKE_encaps(uint8_t out_shared_key[SIKE_SS_BYTESZ],
+                 uint8_t out_ciphertext[SIKE_CT_BYTESZ],
+                 const uint8_t pub_key[SIKE_PUB_BYTESZ]) {
+  // Secret buffer is reused by the function to store some ephemeral
+  // secret data. It's size must be maximum of 64,
+  // SIKE_MSG_BYTESZ and SIDH_PRV_A_BITSZ in bytes.
+  uint8_t secret[32]; // OZAPTF, why?
+  uint8_t j[SIDH_JINV_BYTESZ];
+  uint8_t temp[SIKE_MSG_BYTESZ + SIKE_CT_BYTESZ];
+  shake256incctx ctx;
+
+  // Generate secret key for A
+  // secret key A = SHA256({0,1}^n || pub_key)) mod SIDH_PRV_A_BITSZ
+  randombytes(temp, SIKE_MSG_BYTESZ);
+
+  shake256_inc_init(&ctx);
+  shake256_inc_absorb(&ctx, temp, SIKE_MSG_BYTESZ);
+  shake256_inc_absorb(&ctx, pub_key, SIKE_PUB_BYTESZ);
+  shake256_inc_finalize(&ctx);
+  shake256_inc_squeeze(secret, 32, &ctx);
+  shake256_inc_ctx_release(&ctx);
+
+  // Generate public key for A - first part of the ciphertext
+  gen_iso_A(secret, out_ciphertext);
+
+  // Generate c1:
+  //  h = SHA256(j-invariant)
+  // c1 = h ^ m
+  ex_iso_A(secret, pub_key, j);
+  shake256(secret, sizeof secret, j, sizeof j);
+
+  // c1 = h ^ m
+  uint8_t *c1 = &out_ciphertext[SIKE_PUB_BYTESZ];
+  for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) {
+    c1[i] = temp[i] ^ secret[i];
+  }
+
+  shake256_inc_init(&ctx);
+  shake256_inc_absorb(&ctx, temp, SIKE_MSG_BYTESZ);
+  shake256_inc_absorb(&ctx, out_ciphertext, SIKE_CT_BYTESZ);
+  shake256_inc_finalize(&ctx);
+  shake256_inc_squeeze(secret, 32, &ctx);
+  shake256_inc_ctx_release(&ctx);
+  // Generate shared secret out_shared_key = SHA256(m||out_ciphertext)
+  memcpy(out_shared_key, secret, SIKE_SS_BYTESZ);
+}
+
+void SIKE_decaps(uint8_t out_shared_key[SIKE_SS_BYTESZ],
+                 const uint8_t ciphertext[SIKE_CT_BYTESZ],
+                 const uint8_t pub_key[SIKE_PUB_BYTESZ],
+                 const uint8_t priv_key[SIKE_PRV_BYTESZ]) {
+  // Secret buffer is reused by the function to store some ephemeral
+  // secret data. It's size must be maximum of 64,
+  // SIKE_MSG_BYTESZ and SIDH_PRV_A_BITSZ in bytes.
+  uint8_t secret[32];
+  uint8_t j[SIDH_JINV_BYTESZ];
+  uint8_t c0[SIKE_PUB_BYTESZ];
+  uint8_t temp[SIKE_MSG_BYTESZ];
+  uint8_t shared_nok[SIKE_MSG_BYTESZ];
+  shake256incctx ctx;
+
+  // This is OK as we are only using ephemeral keys in BoringSSL
+  randombytes(shared_nok, SIKE_MSG_BYTESZ);
+
+  // Recover m
+  // Let ciphertext = c0 || c1 - both have fixed sizes
+  // m = F(j-invariant(c0, priv_key)) ^ c1
+  ex_iso_B(priv_key, ciphertext, j);
+
+  shake256(secret, sizeof secret, j, sizeof j);
+
+
+  const uint8_t *c1 = &ciphertext[sizeof(c0)];
+  for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) {
+    temp[i] = c1[i] ^ secret[i];
+  }
+
+  shake256_inc_init(&ctx);
+  shake256_inc_absorb(&ctx, temp, SIKE_MSG_BYTESZ);
+  shake256_inc_absorb(&ctx, pub_key, SIKE_PUB_BYTESZ);
+  shake256_inc_finalize(&ctx);
+  shake256_inc_squeeze(secret, 32, &ctx);
+  shake256_inc_ctx_release(&ctx);
+
+  // Recover c0 = public key A
+  gen_iso_A(secret, c0);
+  crypto_word_t ok = ct_uint_eq(
+    ct_mem_eq(c0, ciphertext, SIKE_PUB_BYTESZ), 1);
+  for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) {
+    temp[i] = ct_select_8(ok, temp[i], shared_nok[i]);
+  }
+
+  shake256_inc_init(&ctx);
+  shake256_inc_absorb(&ctx, temp, SIKE_MSG_BYTESZ);
+  shake256_inc_absorb(&ctx, ciphertext, SIKE_CT_BYTESZ);
+  shake256_inc_finalize(&ctx);
+  shake256_inc_squeeze(secret, 32, &ctx);
+  shake256_inc_ctx_release(&ctx);
+
+  // Generate shared secret out_shared_key = SHA256(m||ciphertext)
+  memcpy(out_shared_key, secret, SIKE_SS_BYTESZ);
+}
diff --git a/src/kem/sike/p434/utils.h b/src/kem/sike/p434/utils.h
new file mode 100644
index 00000000..e483d00f
--- /dev/null
+++ b/src/kem/sike/p434/utils.h
@@ -0,0 +1,214 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny cryptography library
+*
+* Abstract: internal header file for P434
+*********************************************************************************************/
+
+#ifndef UTILS_H_
+#define UTILS_H_
+
+#include <stddef.h>
+#include <kem/sike/includes/sike/sike.h>
+
+// Conversion macro from number of bits to number of bytes
+#define BITS_TO_BYTES(nbits)      (((nbits)+7)/8)
+
+// Bit size of the field
+#define BITS_FIELD              434
+// Byte size of the field
+#define FIELD_BYTESZ            BITS_TO_BYTES(BITS_FIELD)
+// Number of 64-bit words of a 224-bit element
+#define NBITS_ORDER             224
+#define NWORDS64_ORDER          ((NBITS_ORDER+63)/64)
+// Number of elements in Alice's strategy
+#define A_max                   108
+// Number of elements in Bob's strategy
+#define B_max                   137
+// Word size size
+#define RADIX                   sizeof(crypto_word_t)*8
+// Byte size of a limb
+#define LSZ                     sizeof(crypto_word_t)
+
+#if defined(CPU_64_BIT)
+    typedef uint64_t crypto_word_t;
+    // Number of words of a 434-bit field element
+    #define NWORDS_FIELD    7
+    // Number of "0" digits in the least significant part of p434 + 1
+    #define ZERO_WORDS 3
+    // U64_TO_WORDS expands |x| for a |crypto_word_t| array literal.
+    #define U64_TO_WORDS(x) UINT64_C(x)
+#else
+    typedef uint32_t crypto_word_t;
+    // Number of words of a 434-bit field element
+    #define NWORDS_FIELD    14
+    // Number of "0" digits in the least significant part of p434 + 1
+    #define ZERO_WORDS 6
+    // U64_TO_WORDS expands |x| for a |crypto_word_t| array literal.
+    #define U64_TO_WORDS(x) \
+        (uint32_t)(UINT64_C(x) & 0xffffffff), (uint32_t)(UINT64_C(x) >> 32)
+#endif
+
+// Extended datatype support
+#if !defined(HAS_UINT128)
+    typedef uint64_t uint128_t[2];
+#endif
+
+// The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise
+// Digit multiplication
+#define MUL(multiplier, multiplicand, hi, lo) digit_x_digit((multiplier), (multiplicand), &(lo));
+
+// If mask |x|==0xff.ff set |x| to 1, otherwise 0
+#define M2B(x) ((x)>>(RADIX-1))
+
+// Digit addition with carry
+#define ADDC(carryIn, addend1, addend2, carryOut, sumOut)                   \
+do {                                                                        \
+  crypto_word_t tempReg = (addend1) + (crypto_word_t)(carryIn);             \
+  (sumOut) = (addend2) + tempReg;                                           \
+  (carryOut) = M2B(ct_uint_lt(tempReg, (crypto_word_t)(carryIn)) |  \
+                   ct_uint_lt((sumOut), tempReg));                  \
+} while(0)
+
+// Digit subtraction with borrow
+#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut)           \
+do {                                                                            \
+    crypto_word_t tempReg = (minuend) - (subtrahend);                           \
+    crypto_word_t borrowReg = M2B(ct_uint_lt((minuend), (subtrahend))); \
+    borrowReg |= ((borrowIn) & ct_uint_eq(tempReg, 0));               \
+    (differenceOut) = tempReg - (crypto_word_t)(borrowIn);                      \
+    (borrowOut) = borrowReg;                                                    \
+} while(0)
+
+/* Old GCC 4.9 (jessie) doesn't implement {0} initialization properly,
+   which violates C11 as described in 6.7.9, 21 (similarily C99, 6.7.8).
+   Defines below are used to work around the bug, and provide a way
+   to initialize f2elem_t and point_proj_t structs.
+   Bug has been fixed in GCC6 (debian stretch).
+*/
+#define F2ELM_INIT {{ {0}, {0} }}
+#define POINT_PROJ_INIT {{ F2ELM_INIT, F2ELM_INIT }}
+
+// Datatype for representing 434-bit field elements (448-bit max.)
+// Elements over GF(p434) are encoded in 63 octets in little endian format
+// (i.e., the least significant octet is located in the lowest memory address).
+typedef crypto_word_t felm_t[NWORDS_FIELD];
+
+// An element in F_{p^2}, is composed of two coefficients from F_p, * i.e.
+// Fp2 element = c0 + c1*i in F_{p^2}
+// Datatype for representing double-precision 2x434-bit field elements (448-bit max.)
+// Elements (a+b*i) over GF(p434^2), where a and b are defined over GF(p434), are
+// encoded as {a, b}, with a in the lowest memory portion.
+typedef struct {
+    felm_t c0;
+    felm_t c1;
+} fp2;
+
+// Our F_{p^2} element type is a pointer to the struct.
+typedef fp2 f2elm_t[1];
+
+// Datatype for representing double-precision 2x434-bit
+// field elements in contiguous memory.
+typedef crypto_word_t dfelm_t[2*NWORDS_FIELD];
+
+// Constants used during SIKE computation.
+struct params_t {
+    // Stores a prime
+    const crypto_word_t prime[NWORDS_FIELD];
+    // Stores prime + 1
+    const crypto_word_t prime_p1[NWORDS_FIELD];
+    // Stores prime * 2
+    const crypto_word_t prime_x2[NWORDS_FIELD];
+    // Alice's generator values {XPA0 + XPA1*i, XQA0 + XQA1*i, XRA0 + XRA1*i}
+    // in GF(prime^2), expressed in Montgomery representation
+    const crypto_word_t A_gen[6*NWORDS_FIELD];
+    // Bob's generator values {XPB0 + XPB1*i, XQB0 + XQB1*i, XRB0 + XRB1*i}
+    // in GF(prime^2), expressed in Montgomery representation
+    const crypto_word_t B_gen[6*NWORDS_FIELD];
+    // Montgomery constant mont_R2 = (2^448)^2 mod prime
+    const crypto_word_t mont_R2[NWORDS_FIELD];
+    // Value 'one' in Montgomery representation
+    const crypto_word_t mont_one[NWORDS_FIELD];
+    // Value '6' in Montgomery representation
+    const crypto_word_t mont_six[NWORDS_FIELD];
+    // Fixed parameters for isogeny tree computation
+    const unsigned int A_strat[A_max-1];
+    const unsigned int B_strat[B_max-1];
+};
+
+// Point representation in projective XZ Montgomery coordinates.
+typedef struct {
+    f2elm_t X;
+    f2elm_t Z;
+} point_proj;
+typedef point_proj point_proj_t[1];
+
+// Checks whether two words are equal. Returns 1 in case it is,
+// otherwise 0.
+static inline crypto_word_t ct_uint_eq(crypto_word_t x, crypto_word_t y)
+{
+    // if x==y then t = 0
+    crypto_word_t t = x ^ y;
+    // if x!=y t will have first bit set
+    t = (t >> 1) - t;
+    // return MSB - 1 in case x==y, otherwise 0
+    return ((~t) >> (RADIX-1));
+}
+// Constant time select.
+// if pick == 1 (out = in1)
+// if pick == 0 (out = in2)
+// else out is undefined
+static inline uint8_t ct_select_8(uint8_t flag, uint8_t in1, uint8_t in2) {
+    uint8_t mask = ((int8_t)(flag << 7))>>7;
+    return (in1&mask) | (in2&(~mask));
+}
+
+// Constant time memcmp. Returns 1 if p==q, otherwise 0
+static inline int ct_mem_eq(const void *p, const void *q, size_t n)
+{
+  const uint8_t *pp = (uint8_t*)p, *qq = (uint8_t*)q;
+  uint8_t a = 0;
+
+  while (n--) a |= *pp++ ^ *qq++;
+  return (ct_uint_eq(a, 0));
+}
+
+static inline crypto_word_t constant_time_msb_w(crypto_word_t a) {
+  return 0u - (a >> (sizeof(a) * 8 - 1));
+}
+
+// constant_time_lt_w returns 0xff..f if a < b and 0 otherwise.
+static inline crypto_word_t ct_uint_lt(crypto_word_t x, crypto_word_t y)
+{
+  // Consider the two cases of the problem:
+  //   msb(a) == msb(b): a < b iff the MSB of a - b is set.
+  //   msb(a) != msb(b): a < b iff the MSB of b is set.
+  //
+  // If msb(a) == msb(b) then the following evaluates as:
+  //   msb(a^((a^b)|((a-b)^a))) ==
+  //   msb(a^((a-b) ^ a))       ==   (because msb(a^b) == 0)
+  //   msb(a^a^(a-b))           ==   (rearranging)
+  //   msb(a-b)                      (because ∀x. x^x == 0)
+  //
+  // Else, if msb(a) != msb(b) then the following evaluates as:
+  //   msb(a^((a^b)|((a-b)^a))) ==
+  //   msb(a^(𝟙 | ((a-b)^a)))   ==   (because msb(a^b) == 1 and 𝟙
+  //                                  represents a value s.t. msb(𝟙) = 1)
+  //   msb(a^𝟙)                 ==   (because ORing with 1 results in 1)
+  //   msb(b)
+  //
+  //
+  // Here is an SMT-LIB verification of this formula:
+  //
+  // (define-fun lt ((a (_ BitVec 32)) (b (_ BitVec 32))) (_ BitVec 32)
+  //   (bvxor a (bvor (bvxor a b) (bvxor (bvsub a b) a)))
+  // )
+  //
+  // (declare-fun a () (_ BitVec 32))
+  // (declare-fun b () (_ BitVec 32))
+  //
+  // (assert (not (= (= #x00000001 (bvlshr (lt a b) #x0000001f)) (bvult a b))))
+  // (check-sat)
+  // (get-model)
+  return constant_time_msb_w(x^((x^y)|((x-y)^x)));
+}
+#endif // UTILS_H_
-- 
2.39.5


From 51a41a31674d5ab572c7a30e47fcba21468819b2 Mon Sep 17 00:00:00 2001
From: Kris Kwiatkowski <contact@amongbytes.com>
Date: Fri, 9 Apr 2021 00:54:14 +0100
Subject: [PATCH 04/12] update sike

---
 src/kem/sike/includes/sike/sike.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/kem/sike/includes/sike/sike.h b/src/kem/sike/includes/sike/sike.h
index 09d1e580..e71cde57 100644
--- a/src/kem/sike/includes/sike/sike.h
+++ b/src/kem/sike/includes/sike/sike.h
@@ -3,6 +3,7 @@
 
 #include <stdint.h>
 #include <string.h>
+#include "randombytes.h"
 
 /* SIKE
  *
@@ -55,7 +56,8 @@
 #define PQCLEAN_SIKE434_CLEAN_CRYPTO_ALGNAME         "SIKE/p434"
 
 static inline int PQCLEAN_SIKE434_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) {
-	SIKE_keypair(sk, pk);
+	randombytes(sk, SIKE_MSG_BYTESZ);
+	SIKE_keypair(sk+SIKE_MSG_BYTESZ, pk);
 	memcpy(&sk[SIKE_PRV_BYTESZ+SIKE_MSG_BYTESZ], pk, SIKE_PUB_BYTESZ);
 	return 1;
 }
-- 
2.39.5


From a2a2b08a84cb4ec0c231bb5b3d2b9a0e7d64cc6d Mon Sep 17 00:00:00 2001
From: Kris Kwiatkowski <contact@amongbytes.com>
Date: Fri, 9 Apr 2021 07:10:50 +0100
Subject: [PATCH 05/12] add cmake

---
 src/kem/sike/CMakeLists.txt | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 src/kem/sike/CMakeLists.txt

diff --git a/src/kem/sike/CMakeLists.txt b/src/kem/sike/CMakeLists.txt
new file mode 100644
index 00000000..0a55522e
--- /dev/null
+++ b/src/kem/sike/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(
+  SRC_CLEAN_SIKE_P434
+  p434/fpx.c
+  p434/isogeny.c
+  p434/fp_generic.c
+  p434/params.c
+  p434/sike.c)
+
+define_kem_alg(
+  sike_p434_clean
+  PQC_SIKEP434_CLEAN "${SRC_CLEAN_SIKE_P434}" "${CMAKE_CURRENT_SOURCE_DIR}")
+
-- 
2.39.5


From 8711dcce1ad3909d682948fa29537a372719482d Mon Sep 17 00:00:00 2001
From: Kris Kwiatkowski <contact@amongbytes.com>
Date: Fri, 9 Apr 2021 10:38:06 +0100
Subject: [PATCH 06/12] SIKE/p434 goes thru KATs

---
 README.md                         |  1 +
 src/kem/sike/includes/sike/sike.h | 21 ++++++++++++++-------
 src/kem/sike/p434/sike.c          | 26 +++++++++++---------------
 test/katrunner/Cargo.toml         |  2 +-
 test/katrunner/src/main.rs        |  7 ++++---
 5 files changed, 31 insertions(+), 26 deletions(-)

diff --git a/README.md b/README.md
index 75d59645..696e00b5 100644
--- a/README.md
+++ b/README.md
@@ -18,6 +18,7 @@ Users shouldn't expect any level of security provided by this code. The library
 | Falcon                   | 2          |    |
 | Rainbow                  | 3          |    |
 | SPHINCS+ SHA256/SHAKE256 | 3          |  x |
+| SIKE/p434                | 3          |  x |
 
 ## Building
 
diff --git a/src/kem/sike/includes/sike/sike.h b/src/kem/sike/includes/sike/sike.h
index e71cde57..ca0d03d7 100644
--- a/src/kem/sike/includes/sike/sike.h
+++ b/src/kem/sike/includes/sike/sike.h
@@ -49,26 +49,33 @@
     const uint8_t priv_key[SIKE_PRV_BYTESZ]);
 
 // boilerplate needed for integration
-#define PQCLEAN_SIKE434_CLEAN_CRYPTO_SECRETKEYBYTES  SIKE_PRV_BYTESZ
+#define PQCLEAN_SIKE434_CLEAN_CRYPTO_SECRETKEYBYTES  SIKE_PRV_BYTESZ+SIKE_MSG_BYTESZ
 #define PQCLEAN_SIKE434_CLEAN_CRYPTO_PUBLICKEYBYTES  SIKE_PUB_BYTESZ
 #define PQCLEAN_SIKE434_CLEAN_CRYPTO_CIPHERTEXTBYTES SIKE_CT_BYTESZ
 #define PQCLEAN_SIKE434_CLEAN_CRYPTO_BYTES           SIKE_SS_BYTESZ
 #define PQCLEAN_SIKE434_CLEAN_CRYPTO_ALGNAME         "SIKE/p434"
 
+#define PQCLEAN_SIKE434_AVX2_CRYPTO_SECRETKEYBYTES  SIKE_PRV_BYTESZ+SIKE_MSG_BYTESZ
+#define PQCLEAN_SIKE434_AVX2_CRYPTO_PUBLICKEYBYTES  SIKE_PUB_BYTESZ
+#define PQCLEAN_SIKE434_AVX2_CRYPTO_CIPHERTEXTBYTES SIKE_CT_BYTESZ
+#define PQCLEAN_SIKE434_AVX2_CRYPTO_BYTES           SIKE_SS_BYTESZ
+#define PQCLEAN_SIKE434_AVX2_CRYPTO_ALGNAME         "SIKE/p434"
+
 static inline int PQCLEAN_SIKE434_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) {
-	randombytes(sk, SIKE_MSG_BYTESZ);
-	SIKE_keypair(sk+SIKE_MSG_BYTESZ, pk);
-	memcpy(&sk[SIKE_PRV_BYTESZ+SIKE_MSG_BYTESZ], pk, SIKE_PUB_BYTESZ);
-	return 1;
+	SIKE_keypair(sk, pk);
+	// KATs require the public key to be concatenated after private key
+	// OZAPTF: maybe change KAT tester
+	memcpy(&sk[SIKE_MSG_BYTESZ+SIKE_PRV_BYTESZ], pk, SIKE_PUB_BYTESZ);
+	return 0;
 }
 static inline int PQCLEAN_SIKE434_CLEAN_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) {
 	SIKE_encaps(ss,ct,pk);
-	return 1;
+	return 0;
 }
 
 static inline int PQCLEAN_SIKE434_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) {
 	SIKE_decaps(ss, ct, &sk[SIKE_PRV_BYTESZ+SIKE_MSG_BYTESZ], sk);
-	return 1;
+	return 0;
 }
 
 
diff --git a/src/kem/sike/p434/sike.c b/src/kem/sike/p434/sike.c
index f52fe5c4..83a9dc1d 100644
--- a/src/kem/sike/p434/sike.c
+++ b/src/kem/sike/p434/sike.c
@@ -411,10 +411,10 @@ int SIKE_keypair(uint8_t out_priv[SIKE_PRV_BYTESZ],
                  uint8_t out_pub[SIKE_PUB_BYTESZ]) {
   // Calculate private key for Alice. Needs to be in range [0, 2^0xFA - 1] and <
   // 253 bits
-  randombytes(out_priv, SIKE_PRV_BYTESZ);
-  out_priv[31] = (out_priv[31] | 0x01) & 0x03;
-
-  gen_iso_B(out_priv, out_pub);
+  randombytes(out_priv, SIKE_MSG_BYTESZ);
+  randombytes(&out_priv[SIKE_MSG_BYTESZ], SIKE_PRV_BYTESZ);
+  out_priv[SIKE_MSG_BYTESZ+28-1] = (out_priv[SIKE_MSG_BYTESZ+28-1] & 0x01);
+  gen_iso_B(&out_priv[SIKE_MSG_BYTESZ], out_pub);
   return 1;
 }
 
@@ -430,7 +430,7 @@ void SIKE_encaps(uint8_t out_shared_key[SIKE_SS_BYTESZ],
   shake256incctx ctx;
 
   // Generate secret key for A
-  // secret key A = SHA256({0,1}^n || pub_key)) mod SIDH_PRV_A_BITSZ
+  // secret key A = SHAKE256({0,1}^n || pub_key)) mod SIDH_PRV_A_BITSZ
   randombytes(temp, SIKE_MSG_BYTESZ);
 
   shake256_inc_init(&ctx);
@@ -444,7 +444,7 @@ void SIKE_encaps(uint8_t out_shared_key[SIKE_SS_BYTESZ],
   gen_iso_A(secret, out_ciphertext);
 
   // Generate c1:
-  //  h = SHA256(j-invariant)
+  //  h = SHAKE256(j-invariant)
   // c1 = h ^ m
   ex_iso_A(secret, pub_key, j);
   shake256(secret, sizeof secret, j, sizeof j);
@@ -461,14 +461,14 @@ void SIKE_encaps(uint8_t out_shared_key[SIKE_SS_BYTESZ],
   shake256_inc_finalize(&ctx);
   shake256_inc_squeeze(secret, 32, &ctx);
   shake256_inc_ctx_release(&ctx);
-  // Generate shared secret out_shared_key = SHA256(m||out_ciphertext)
+  // Generate shared secret out_shared_key = SHAKE256(m||out_ciphertext)
   memcpy(out_shared_key, secret, SIKE_SS_BYTESZ);
 }
 
 void SIKE_decaps(uint8_t out_shared_key[SIKE_SS_BYTESZ],
                  const uint8_t ciphertext[SIKE_CT_BYTESZ],
                  const uint8_t pub_key[SIKE_PUB_BYTESZ],
-                 const uint8_t priv_key[SIKE_PRV_BYTESZ]) {
+                 const uint8_t priv_key[SIKE_MSG_BYTESZ + SIKE_PRV_BYTESZ]) {
   // Secret buffer is reused by the function to store some ephemeral
   // secret data. It's size must be maximum of 64,
   // SIKE_MSG_BYTESZ and SIDH_PRV_A_BITSZ in bytes.
@@ -476,16 +476,12 @@ void SIKE_decaps(uint8_t out_shared_key[SIKE_SS_BYTESZ],
   uint8_t j[SIDH_JINV_BYTESZ];
   uint8_t c0[SIKE_PUB_BYTESZ];
   uint8_t temp[SIKE_MSG_BYTESZ];
-  uint8_t shared_nok[SIKE_MSG_BYTESZ];
   shake256incctx ctx;
 
-  // This is OK as we are only using ephemeral keys in BoringSSL
-  randombytes(shared_nok, SIKE_MSG_BYTESZ);
-
   // Recover m
   // Let ciphertext = c0 || c1 - both have fixed sizes
   // m = F(j-invariant(c0, priv_key)) ^ c1
-  ex_iso_B(priv_key, ciphertext, j);
+  ex_iso_B(&priv_key[SIKE_MSG_BYTESZ], ciphertext, j);
 
   shake256(secret, sizeof secret, j, sizeof j);
 
@@ -507,7 +503,7 @@ void SIKE_decaps(uint8_t out_shared_key[SIKE_SS_BYTESZ],
   crypto_word_t ok = ct_uint_eq(
     ct_mem_eq(c0, ciphertext, SIKE_PUB_BYTESZ), 1);
   for (size_t i = 0; i < SIKE_MSG_BYTESZ; i++) {
-    temp[i] = ct_select_8(ok, temp[i], shared_nok[i]);
+    temp[i] = ct_select_8(ok, temp[i], priv_key[i]);
   }
 
   shake256_inc_init(&ctx);
@@ -517,6 +513,6 @@ void SIKE_decaps(uint8_t out_shared_key[SIKE_SS_BYTESZ],
   shake256_inc_squeeze(secret, 32, &ctx);
   shake256_inc_ctx_release(&ctx);
 
-  // Generate shared secret out_shared_key = SHA256(m||ciphertext)
+  // Generate shared secret out_shared_key = SHAKE256(m||ciphertext)
   memcpy(out_shared_key, secret, SIKE_SS_BYTESZ);
 }
diff --git a/test/katrunner/Cargo.toml b/test/katrunner/Cargo.toml
index c5adb4dc..fd07d6f3 100644
--- a/test/katrunner/Cargo.toml
+++ b/test/katrunner/Cargo.toml
@@ -11,4 +11,4 @@ hex = "0.4.2"
 threadpool = "1.8.1"
 rust-crypto = "^0.2"
 lazy_static = "1.4.0"
-aes_ctr_drbg = "0.0.2"
\ No newline at end of file
+aes_ctr_drbg = "0.0.2"
diff --git a/test/katrunner/src/main.rs b/test/katrunner/src/main.rs
index f6ee752c..0da5c3e5 100644
--- a/test/katrunner/src/main.rs
+++ b/test/katrunner/src/main.rs
@@ -130,9 +130,10 @@ fn test_kem_vector(el: &TestVector) {
         // Check keygen
         pk.resize(el.kem.pk.len(), 0);
         sk.resize(el.kem.sk.len(), 0);
-            assert_eq!(
-                pqc_keygen(p, pk.as_mut_ptr(), sk.as_mut_ptr()),
-            true);
+        assert_eq!(
+            pqc_keygen(p, pk.as_mut_ptr(), sk.as_mut_ptr()),
+        true);
+
         assert_eq!(sk, el.kem.sk);
         assert_eq!(pk, el.kem.pk);
 
-- 
2.39.5


From c18ca419a8241bf2b895f135686409474f87766a Mon Sep 17 00:00:00 2001
From: Kris Kwiatkowski <contact@amongbytes.com>
Date: Wed, 14 Apr 2021 08:04:14 +0100
Subject: [PATCH 07/12] SIKE: enable optimized version

---
 .gitmodules                    |  3 +++
 3rd/cpu_features               |  1 +
 CMakeLists.txt                 |  4 ++++
 src/capi/pqapi.c               |  7 ++++++
 src/capi/schemes.h             |  2 +-
 src/kem/sike/CMakeLists.txt    |  5 +++--
 src/kem/sike/p434/fp-x86_64.S  | 39 ++++++++++++++--------------------
 src/kem/sike/p434/fp_generic.c | 38 +++++++++++++++++++++++++--------
 src/kem/sike/p434/fp_glue.c    |  4 ++++
 9 files changed, 68 insertions(+), 35 deletions(-)
 create mode 160000 3rd/cpu_features
 create mode 100644 src/kem/sike/p434/fp_glue.c

diff --git a/.gitmodules b/.gitmodules
index 84a57e26..b85836eb 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -7,3 +7,6 @@
 [submodule "3rd/gbench"]
 	path = 3rd/gbench
 	url = https://github.com/henrydcase/benchmark.git
+[submodule "3rd/cpu_features"]
+	path = 3rd/cpu_features
+	url = https://github.com/google/cpu_features.git
diff --git a/3rd/cpu_features b/3rd/cpu_features
new file mode 160000
index 00000000..3e8243b7
--- /dev/null
+++ b/3rd/cpu_features
@@ -0,0 +1 @@
+Subproject commit 3e8243b7d9951c078259c3186c039a6e8f036055
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 313200b3..f6961a8c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -34,6 +34,7 @@ else()
 endif()
 
 add_subdirectory(3rd/gtest)
+add_subdirectory(3rd/cpu_features)
 
 # Arch settings
 
@@ -82,6 +83,7 @@ include_directories(
   public
   src/common/
   src
+  3rd/cpu_features/include
 )
 
 set_property(GLOBAL PROPERTY obj_libs "")
@@ -243,12 +245,14 @@ get_property(OBJ_LIBS GLOBAL PROPERTY obj_libs)
 target_link_libraries(
   pqc
   common
+  cpu_features
   ${OBJ_LIBS}
 )
 
 target_link_libraries(
   pqc_s
   common
+  cpu_features
   ${OBJ_LIBS}
 )
 
diff --git a/src/capi/pqapi.c b/src/capi/pqapi.c
index d00260d3..8a76b40f 100644
--- a/src/capi/pqapi.c
+++ b/src/capi/pqapi.c
@@ -1,6 +1,7 @@
 #include <stdint.h>
 #include <stdbool.h>
 #include <pqc/pqc.h>
+#include <cpuinfo_x86.h>
 
 #include "schemes.h"
 
@@ -126,3 +127,9 @@ bool pqc_sig_verify(const params_t *p,
     const uint8_t *pk) {
     return !((sig_params_t *)p)->verify(sig, siglen, m, mlen, pk);
 }
+
+X86Features CPU_CAPS;
+void static_initialization(void) __attribute__((constructor));
+void static_initialization(void) {
+    CPU_CAPS = GetX86Info().features;
+}
diff --git a/src/capi/schemes.h b/src/capi/schemes.h
index 60a68893..9ba29c94 100644
--- a/src/capi/schemes.h
+++ b/src/capi/schemes.h
@@ -115,4 +115,4 @@
 #include "kem/hqc/hqc-rmrs-128/avx2/api.h"
 #include "kem/hqc/hqc-rmrs-192/avx2/api.h"
 #include "kem/hqc/hqc-rmrs-256/avx2/api.h"
-#include "kem/sike/includes/sike/sike.h"
\ No newline at end of file
+#include "kem/sike/includes/sike/sike.h"
diff --git a/src/kem/sike/CMakeLists.txt b/src/kem/sike/CMakeLists.txt
index 0a55522e..8eb116cc 100644
--- a/src/kem/sike/CMakeLists.txt
+++ b/src/kem/sike/CMakeLists.txt
@@ -1,12 +1,13 @@
 set(
   SRC_CLEAN_SIKE_P434
   p434/fpx.c
-  p434/isogeny.c
   p434/fp_generic.c
+  p434/fp_glue.c
+  p434/fp-x86_64.S
+  p434/isogeny.c
   p434/params.c
   p434/sike.c)
 
 define_kem_alg(
   sike_p434_clean
   PQC_SIKEP434_CLEAN "${SRC_CLEAN_SIKE_P434}" "${CMAKE_CURRENT_SOURCE_DIR}")
-
diff --git a/src/kem/sike/p434/fp-x86_64.S b/src/kem/sike/p434/fp-x86_64.S
index f2f32392..e6f30b27 100644
--- a/src/kem/sike/p434/fp-x86_64.S
+++ b/src/kem/sike/p434/fp-x86_64.S
@@ -15,10 +15,10 @@
 .quad	0x6CFC5FD681C52056
 .quad	0x0002341F27177344
 
-.globl	sike_fpadd
-.hidden sike_fpadd
-.type	sike_fpadd,@function
-sike_fpadd:
+.globl	sike_fpadd_asm
+.hidden sike_fpadd_asm
+.type	sike_fpadd_asm,@function
+sike_fpadd_asm:
 .cfi_startproc
 	pushq	%r12
 .cfi_adjust_cfa_offset	8
@@ -107,14 +107,7 @@ sike_fpadd:
 .hidden sike_cswap_asm
 .type	sike_cswap_asm,@function
 sike_cswap_asm:
-
-
 	movq	%rdx,%xmm3
-
-
-
-
-
 	pshufd	$68,%xmm3,%xmm3
 
 	movdqu	0(%rdi),%xmm0
@@ -258,10 +251,10 @@ sike_cswap_asm:
 	movdqu	%xmm1,208(%rsi)
 
 	.byte	0xf3,0xc3
-.globl	sike_fpsub
-.hidden sike_fpsub
-.type	sike_fpsub,@function
-sike_fpsub:
+.globl	sike_fpsub_asm
+.hidden sike_fpsub_asm
+.type	sike_fpsub_asm,@function
+sike_fpsub_asm:
 .cfi_startproc
 	pushq	%r12
 .cfi_adjust_cfa_offset	8
@@ -508,10 +501,10 @@ sike_mpdblsubx2_asm:
 	.byte	0xf3,0xc3
 .cfi_endproc
 
-.globl	sike_fprdc
-.hidden sike_fprdc
-.type	sike_fprdc,@function
-sike_fprdc:
+.globl	sike_fprdc_asm
+.hidden sike_fprdc_asm
+.type	sike_fprdc_asm,@function
+sike_fprdc_asm:
 .cfi_startproc
 	pushq	%r12
 .cfi_adjust_cfa_offset	8
@@ -723,10 +716,10 @@ sike_fprdc:
 .cfi_adjust_cfa_offset	-8
 	.byte	0xf3,0xc3
 .cfi_endproc
-.globl	sike_mpmul
-.hidden sike_mpmul
-.type	sike_mpmul,@function
-sike_mpmul:
+.globl	sike_mpmul_asm
+.hidden sike_mpmul_asm
+.type	sike_mpmul_asm,@function
+sike_mpmul_asm:
 .cfi_startproc
 	pushq	%r12
 .cfi_adjust_cfa_offset	8
diff --git a/src/kem/sike/p434/fp_generic.c b/src/kem/sike/p434/fp_generic.c
index 02e851cf..7fa75d1f 100644
--- a/src/kem/sike/p434/fp_generic.c
+++ b/src/kem/sike/p434/fp_generic.c
@@ -5,12 +5,16 @@
 *********************************************************************************************/
 #include "utils.h"
 #include "fpx.h"
+#include <cpuinfo_x86.h>
+
+extern X86Features CPU_CAPS;
 
 // Global constants
 extern const struct params_t params;
 
+// Digit multiplication, digit * digit -> 2-digit result
 static void digit_x_digit(const crypto_word_t a, const crypto_word_t b, crypto_word_t* c)
-{ // Digit multiplication, digit * digit -> 2-digit result
+{
     crypto_word_t al, ah, bl, bh, temp;
     crypto_word_t albl, albh, ahbl, ahbh, res1, res2, res3, carry;
     crypto_word_t mask_low = (crypto_word_t)(-1) >> (sizeof(crypto_word_t)*4);
@@ -43,10 +47,11 @@ static void digit_x_digit(const crypto_word_t a, const crypto_word_t b, crypto_w
     c[1] ^= (ahbh & mask_high) + carry;             // C11
 }
 
+// Modular addition, c = a+b mod p434.
+// Inputs: a, b in [0, 2*p434-1]
+// Output: c in [0, 2*p434-1]
 void sike_fpadd(const felm_t a, const felm_t b, felm_t c)
-{ // Modular addition, c = a+b mod p434.
-  // Inputs: a, b in [0, 2*p434-1]
-  // Output: c in [0, 2*p434-1]
+{
     unsigned int i, carry = 0;
     crypto_word_t mask;
 
@@ -84,12 +89,20 @@ void sike_fpsub(const felm_t a, const felm_t b, felm_t c)
     }
 }
 
+// Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = NWORDS_FIELD.
+void sike_mpmul_asm(const felm_t a, const felm_t b, dfelm_t c);
 void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c)
-{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = NWORDS_FIELD.
+{
     unsigned int i, j;
     crypto_word_t t = 0, u = 0, v = 0, UV[2];
     unsigned int carry = 0;
 
+    // TODO: it actually needs BMI2 & ADOX. cpu_features needs to be updated
+    if (CPU_CAPS.bmi2) {
+        sike_mpmul_asm(a,b,c);
+        return;
+    }
+
     for (i = 0; i < NWORDS_FIELD; i++) {
         for (j = 0; j <= i; j++) {
             MUL(a[j], b[i-j], UV+1, UV[0]);
@@ -118,11 +131,18 @@ void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c)
     c[2*NWORDS_FIELD-1] = v;
 }
 
+// Efficient Montgomery reduction using comba and exploiting the special form of the prime p434.
+// mc = ma*R^-1 mod p434x2, where R = 2^448.
+// If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1].
+// ma is assumed to be in Montgomery representation.
+void sike_fprdc_asm(const felm_t ma, felm_t mc);
 void sike_fprdc(const felm_t ma, felm_t mc)
-{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p434.
-  // mc = ma*R^-1 mod p434x2, where R = 2^448.
-  // If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1].
-  // ma is assumed to be in Montgomery representation.
+{
+    if (CPU_CAPS.bmi2) {
+        sike_fprdc_asm(ma, mc);
+        return;
+    }
+
     unsigned int i, j, carry, count = ZERO_WORDS;
     crypto_word_t UV[2], t = 0, u = 0, v = 0;
 
diff --git a/src/kem/sike/p434/fp_glue.c b/src/kem/sike/p434/fp_glue.c
new file mode 100644
index 00000000..0495a3a4
--- /dev/null
+++ b/src/kem/sike/p434/fp_glue.c
@@ -0,0 +1,4 @@
+#include "fpx.h"
+#include "utils.h"
+
+void sike_mpmul_asm_X(const felm_t a, const felm_t b, dfelm_t c);
\ No newline at end of file
-- 
2.39.5


From ac4f2b7918c832b3241f633c9f7eb0325a8aeb6f Mon Sep 17 00:00:00 2001
From: Kris Kwiatkowski <contact@amongbytes.com>
Date: Sun, 18 Apr 2021 23:40:08 +0100
Subject: [PATCH 08/12] change path to cpu_features submodule

---
 .gitmodules | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitmodules b/.gitmodules
index b85836eb..143e62d9 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -9,4 +9,4 @@
 	url = https://github.com/henrydcase/benchmark.git
 [submodule "3rd/cpu_features"]
 	path = 3rd/cpu_features
-	url = https://github.com/google/cpu_features.git
+	url = https://github.com/henrydcase/cpu_features.git
-- 
2.39.5


From 3683dcfa641a2b971d96fb00b4022d4bf5408752 Mon Sep 17 00:00:00 2001
From: Kris Kwiatkowski <contact@amongbytes.com>
Date: Sun, 18 Apr 2021 23:44:53 +0100
Subject: [PATCH 09/12] update cpu_features submodule

---
 3rd/cpu_features | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/3rd/cpu_features b/3rd/cpu_features
index 3e8243b7..2b07c2ab 160000
--- a/3rd/cpu_features
+++ b/3rd/cpu_features
@@ -1 +1 @@
-Subproject commit 3e8243b7d9951c078259c3186c039a6e8f036055
+Subproject commit 2b07c2ab7df71d0b6c19afb93f68a808b412a7ff
-- 
2.39.5


From 6b9aa0e10b2340ce012494738ae5aa6db1fd3371 Mon Sep 17 00:00:00 2001
From: Kris Kwiatkowski <contact@amongbytes.com>
Date: Sun, 18 Apr 2021 23:50:59 +0100
Subject: [PATCH 10/12] check if adox available

---
 src/kem/sike/p434/fp_generic.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/kem/sike/p434/fp_generic.c b/src/kem/sike/p434/fp_generic.c
index 7fa75d1f..8634b850 100644
--- a/src/kem/sike/p434/fp_generic.c
+++ b/src/kem/sike/p434/fp_generic.c
@@ -97,8 +97,7 @@ void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c)
     crypto_word_t t = 0, u = 0, v = 0, UV[2];
     unsigned int carry = 0;
 
-    // TODO: it actually needs BMI2 & ADOX. cpu_features needs to be updated
-    if (CPU_CAPS.bmi2) {
+    if (CPU_CAPS.bmi2 && CPU_CAPS.adx) {
         sike_mpmul_asm(a,b,c);
         return;
     }
@@ -138,7 +137,7 @@ void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c)
 void sike_fprdc_asm(const felm_t ma, felm_t mc);
 void sike_fprdc(const felm_t ma, felm_t mc)
 {
-    if (CPU_CAPS.bmi2) {
+    if (CPU_CAPS.bmi2 && CPU_CAPS.adx) {
         sike_fprdc_asm(ma, mc);
         return;
     }
-- 
2.39.5


From 4dcce2cc7e2ed76441dcc36f25dc741865feb092 Mon Sep 17 00:00:00 2001
From: Kris Kwiatkowski <contact@amongbytes.com>
Date: Sun, 18 Apr 2021 23:57:41 +0100
Subject: [PATCH 11/12] use haswell as default arch

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index f6961a8c..14d0c09f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -156,7 +156,7 @@ add_subdirectory(src/kem/sike)
 if(${ARCH} STREQUAL "ARCH_x86_64")
 
 set(CMAKE_C_FLAGS
-  "${CMAKE_C_FLAGS} -march=native -mtune=native")
+  "${CMAKE_C_FLAGS} -march=haswell")
 set(SRC_COMMON_AVX2
   src/common/keccak4x/KeccakP-1600-times4-SIMD256.c
 )
-- 
2.39.5


From de3f719a9dcb3326b3186b449b9ec62004328661 Mon Sep 17 00:00:00 2001
From: Kris Kwiatkowski <contact@amongbytes.com>
Date: Sun, 18 Apr 2021 23:59:38 +0100
Subject: [PATCH 12/12] add drone.yml

---
 buid.dbg/.drone.yml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 buid.dbg/.drone.yml

diff --git a/buid.dbg/.drone.yml b/buid.dbg/.drone.yml
new file mode 100644
index 00000000..41c554ac
--- /dev/null
+++ b/buid.dbg/.drone.yml
@@ -0,0 +1,14 @@
+kind: pipeline
+type: exec
+name: default
+
+steps:
+- name: build
+  commands:
+  - git submodule init
+  - git submodule update --recursive --remote
+  - mkdir build
+  - cd build
+  - cmake ..
+  - make
+  - ./test
-- 
2.39.5