OpenSSL: make final reduction in Montgomery multiplication constant-time.
(The issue was reported by Shay Gueron.) The final reduction in Montgomery multiplication computes if (X >= m) then X = X - m else X = X In OpenSSL, this was done by computing T = X - m, doing a constant-time selection of the *addresses* of X and T, and loading from the resulting address. But this is not cache-neutral. This patch changes the behaviour by loading both X and T into registers, and doing a constant-time selection of the *values*. TODO(fork): only some of the fixes from the original patch still apply to the 1.0.2 code.
This commit is contained in:
parent
b36a3156b6
commit
75b833cc81
@ -570,16 +570,15 @@ $sbit=$num;
|
||||
&jge (&label("sub"));
|
||||
|
||||
&sbb ("eax",0); # handle upmost overflow bit
|
||||
&and ($tp,"eax");
|
||||
¬ ("eax");
|
||||
&mov ($np,$rp);
|
||||
&and ($np,"eax");
|
||||
&or ($tp,$np); # tp=carry?tp:rp
|
||||
|
||||
&set_label("copy",16); # copy or in-place refresh
|
||||
&mov ("eax",&DWP(0,$tp,$num,4));
|
||||
&mov (&DWP(0,$rp,$num,4),"eax"); # rp[i]=tp[i]
|
||||
&mov (&DWP($frame,"esp",$num,4),$j); # zap temporary vector
|
||||
&mov ("edx",&DWP(0,$tp,$num,4));
|
||||
&mov ($np,&DWP(0,$rp,$num,4));
|
||||
&xor ("edx",$np); # conditional select
|
||||
&and ("edx","eax");
|
||||
&xor ("edx",$np);
|
||||
&mov (&DWP(0,$tp,$num,4),$j) # zap temporary vector
|
||||
&mov (&DWP(0,$rp,$num,4),"edx"); # rp[i]=tp[i]
|
||||
&dec ($num);
|
||||
&jge (&label("copy"));
|
||||
|
||||
|
@ -273,22 +273,21 @@ $code.=<<___;
|
||||
mov %rax,($rp,$i,8) # rp[i]=tp[i]-np[i]
|
||||
mov 8($ap,$i,8),%rax # tp[i+1]
|
||||
lea 1($i),$i # i++
|
||||
dec $j # doesnn't affect CF!
|
||||
dec $j # doesn't affect CF!
|
||||
jnz .Lsub
|
||||
|
||||
sbb \$0,%rax # handle upmost overflow bit
|
||||
xor $i,$i
|
||||
and %rax,$ap
|
||||
not %rax
|
||||
mov $rp,$np
|
||||
and %rax,$np
|
||||
mov $num,$j # j=num
|
||||
or $np,$ap # ap=borrow?tp:rp
|
||||
.align 16
|
||||
.Lcopy: # copy or in-place refresh
|
||||
mov ($ap,$i,8),%rax
|
||||
mov (%rsp,$i,8),$ap
|
||||
mov ($rp,$i,8),$np
|
||||
xor $np,$ap # conditional select:
|
||||
and %rax,$ap # ((ap ^ np) & %rax) ^ np
|
||||
xor $np,$ap # ap = borrow?tp:rp
|
||||
mov $i,(%rsp,$i,8) # zap temporary vector
|
||||
mov %rax,($rp,$i,8) # rp[i]=tp[i]
|
||||
mov $ap,($rp,$i,8) # rp[i]=tp[i]
|
||||
lea 1($i),$i
|
||||
sub \$1,$j
|
||||
jnz .Lcopy
|
||||
@ -643,7 +642,6 @@ my @ri=("%rax","%rdx",$m0,$m1);
|
||||
$code.=<<___;
|
||||
mov 16(%rsp,$num,8),$rp # restore $rp
|
||||
mov 0(%rsp),@ri[0] # tp[0]
|
||||
pxor %xmm0,%xmm0
|
||||
mov 8(%rsp),@ri[1] # tp[1]
|
||||
shr \$2,$num # num/=4
|
||||
lea (%rsp),$ap # borrow ap for tp
|
||||
@ -681,35 +679,36 @@ $code.=<<___;
|
||||
mov @ri[2],16($rp,$i,8) # rp[i]=tp[i]-np[i]
|
||||
|
||||
sbb \$0,@ri[0] # handle upmost overflow bit
|
||||
mov @ri[0],%xmm0
|
||||
punpcklqdq %xmm0,%xmm0 # extend mask to 128 bits
|
||||
mov @ri[3],24($rp,$i,8) # rp[i]=tp[i]-np[i]
|
||||
xor $i,$i # i=0
|
||||
and @ri[0],$ap
|
||||
not @ri[0]
|
||||
mov $rp,$np
|
||||
and @ri[0],$np
|
||||
lea -1($num),$j
|
||||
or $np,$ap # ap=borrow?tp:rp
|
||||
|
||||
movdqu ($ap),%xmm1
|
||||
movdqa %xmm0,(%rsp)
|
||||
movdqu %xmm1,($rp)
|
||||
mov $num,$j
|
||||
pxor %xmm5,%xmm5
|
||||
jmp .Lcopy4x
|
||||
.align 16
|
||||
.Lcopy4x: # copy or in-place refresh
|
||||
movdqu 16($ap,$i),%xmm2
|
||||
movdqu 32($ap,$i),%xmm1
|
||||
movdqa %xmm0,16(%rsp,$i)
|
||||
movdqu %xmm2,16($rp,$i)
|
||||
movdqa %xmm0,32(%rsp,$i)
|
||||
movdqu %xmm1,32($rp,$i)
|
||||
movdqu (%rsp,$i),%xmm2
|
||||
movdqu 16(%rsp,$i),%xmm4
|
||||
movdqu ($rp,$i),%xmm1
|
||||
movdqu 16($rp,$i),%xmm3
|
||||
pxor %xmm1,%xmm2 # conditional select
|
||||
pxor %xmm3,%xmm4
|
||||
pand %xmm0,%xmm2
|
||||
pand %xmm0,%xmm4
|
||||
pxor %xmm1,%xmm2
|
||||
pxor %xmm3,%xmm4
|
||||
movdqu %xmm2,($rp,$i)
|
||||
movdqu %xmm4,16($rp,$i)
|
||||
movdqa %xmm5,(%rsp,$i) # zap temporary vectors
|
||||
movdqa %xmm5,16(%rsp,$i)
|
||||
|
||||
lea 32($i),$i
|
||||
dec $j
|
||||
jnz .Lcopy4x
|
||||
|
||||
shl \$2,$num
|
||||
movdqu 16($ap,$i),%xmm2
|
||||
movdqa %xmm0,16(%rsp,$i)
|
||||
movdqu %xmm2,16($rp,$i)
|
||||
___
|
||||
}
|
||||
$code.=<<___;
|
||||
|
@ -330,17 +330,16 @@ $code.=<<___;
|
||||
|
||||
sbb \$0,%rax # handle upmost overflow bit
|
||||
xor $i,$i
|
||||
and %rax,$ap
|
||||
not %rax
|
||||
mov $rp,$np
|
||||
and %rax,$np
|
||||
mov $num,$j # j=num
|
||||
or $np,$ap # ap=borrow?tp:rp
|
||||
.align 16
|
||||
.Lcopy: # copy or in-place refresh
|
||||
mov ($ap,$i,8),%rax
|
||||
mov (%rsp,$i,8),$ap
|
||||
mov ($rp,$i,8),$np
|
||||
xor $np,$ap # conditional select:
|
||||
and %rax,$ap # ((ap ^ np) & %rax) ^ np
|
||||
xor $np,$ap # ap = borrow?tp:rp
|
||||
mov $i,(%rsp,$i,8) # zap temporary vector
|
||||
mov %rax,($rp,$i,8) # rp[i]=tp[i]
|
||||
mov $ap,($rp,$i,8) # rp[i]=tp[i]
|
||||
lea 1($i),$i
|
||||
sub \$1,$j
|
||||
jnz .Lcopy
|
||||
|
Loading…
Reference in New Issue
Block a user