ec/asm/p256-x86_64-asm.pl: get corner case logic right.

(Imported from upstream's 64333004a41a9f4aa587b8e5401420fb70d00687.) RT#4284. This case should be impossible to hit because |EC_POINT_add| doesn't use this function and trying to add equal inputs should never occur during a multiplication. Support for this exists because the pattern has been copied from the first 64-bit P-224 and P-256 work that Emilia, Bodo and I did. There it seemed like a reasonable defense-in-depth in case the code changed in the future. Change-Id: I7ff138669c5468b7d7a5153429bec728cb67e338 Reviewed-on: https://boringssl-review.googlesource.com/7246 Reviewed-by: David Benjamin <davidben@google.com>
2016-03-01 08:23:53 -08:00 · 2016-03-01 08:23:53 -08:00 · 060bd590ce
commit 060bd590ce
parent 7aea80f576
1 changed files with 10 additions and 1 deletions
--- a/crypto/ec/asm/p256-x86_64-asm.pl
+++ b/crypto/ec/asm/p256-x86_64-asm.pl
@ -1729,6 +1729,7 @@ $code.=<<___;
 	push	%r15
 	sub	\$32*5+8, %rsp

+.Lpoint_double_shortcut$x:
 	movdqu	0x00($a_ptr), %xmm0		# copy	*(P256_POINT *)$a_ptr.x
 	mov	$a_ptr, $b_ptr			# backup copy
 	movdqu	0x10($a_ptr), %xmm1
@ -2019,6 +2020,7 @@ $code.=<<___;
 	 mov	0x40+8*1($b_ptr), $acc6
 	 mov	0x40+8*2($b_ptr), $acc7
 	 mov	0x40+8*3($b_ptr), $acc0
+	movq	$b_ptr, %xmm1

 	lea	0x40-$bias($b_ptr), $a_ptr
 	lea	$Z1sqr(%rsp), $r_ptr		# Z1^2
@ -2074,7 +2076,7 @@ $code.=<<___;
 	test	$acc0, $acc0
 	jnz	.Ladd_proceed$x			# (in1infty || in2infty)?
 	test	$acc1, $acc1
-	jz	.Ladd_proceed$x			# is_equal(S1,S2)?
+	jz	.Ladd_double$x			# is_equal(S1,S2)?

 	movq	%xmm0, $r_ptr			# restore $r_ptr
 	pxor	%xmm0, %xmm0
@ -2086,6 +2088,13 @@ $code.=<<___;
 	movdqu	%xmm0, 0x50($r_ptr)
 	jmp	.Ladd_done$x

+.align	32
+.Ladd_double$x:
+	movq	%xmm1, $a_ptr			# restore $a_ptr
+	movq	%xmm0, $r_ptr			# restore $r_ptr
+	add	\$`32*(18-5)`, %rsp		# difference in frame sizes
+	jmp	.Lpoint_double_shortcut$x
+
 .align	32
 .Ladd_proceed$x:
 	`&load_for_sqr("$R(%rsp)", "$src0")`