boringssl/crypto/fipsmodule/CMakeLists.txt
Nir Drucker 3d450d2844 Speed up ECDSA verify on x86-64.
This commit improves the performance of ECDSA signature verification
(over NIST P-256 curve) for x86 platforms. The speedup is by a factor of 1.15x.
It does so by:
  1) Leveraging the fact that the verification does not need
     to run in constant time. To this end, we implemented:
    a) the function ecp_nistz256_points_mul_public in a similar way to
       the current ecp_nistz256_points_mul function by removing its constant
       time features.
    b) the Binary Extended Euclidean Algorithm (BEEU) in x86 assembly to
       replace the current modular inverse function used for the inversion.
  2) The last step in the ECDSA_verify function compares the (x) affine
     coordinate with the signature (r) value. Converting x from the Jacobian's
     representation to the affine coordinate requires to perform one inversions
     (x_affine = x * z^(-2)). We save this inversion and speed up the computations
     by instead bringing r to x (r_jacobian = r*z^2) which is faster.

The measured results are:
Before (on a Kaby Lake desktop with gcc-5):
Did 26000 ECDSA P-224 signing operations in 1002372us (25938.5 ops/sec)
Did 11000 ECDSA P-224 verify operations in 1043821us (10538.2 ops/sec)
Did 55000 ECDSA P-256 signing operations in 1017560us (54050.9 ops/sec)
Did 17000 ECDSA P-256 verify operations in 1051280us (16170.8 ops/sec)

After (on a Kaby Lake desktop with gcc-5):
Did 27000 ECDSA P-224 signing operations in 1011287us (26698.7 ops/sec)
Did 11640 ECDSA P-224 verify operations in 1076698us (10810.8 ops/sec)
Did 55000 ECDSA P-256 signing operations in 1016880us (54087.0 ops/sec)
Did 20000 ECDSA P-256 verify operations in 1038736us (19254.2 ops/sec)

Before (on a Skylake server platform with gcc-5):
Did 25000 ECDSA P-224 signing operations in 1021651us (24470.2 ops/sec)
Did 10373 ECDSA P-224 verify operations in 1046563us (9911.5 ops/sec)
Did 50000 ECDSA P-256 signing operations in 1002774us (49861.7 ops/sec)
Did 15000 ECDSA P-256 verify operations in 1006471us (14903.6 ops/sec)

After (on a Skylake server platform with gcc-5):
Did 25000 ECDSA P-224 signing operations in 1020958us (24486.8 ops/sec)
Did 10373 ECDSA P-224 verify operations in 1046359us (9913.4 ops/sec)
Did 50000 ECDSA P-256 signing operations in 1003996us (49801.0 ops/sec)
Did 18000 ECDSA P-256 verify operations in 1021604us (17619.4 ops/sec)

Developers and authors:
***************************************************************************
Nir Drucker (1,2), Shay Gueron (1,2)
(1) Amazon Web Services Inc.
(2) University of Haifa, Israel
***************************************************************************

Change-Id: Idd42a7bc40626bce974ea000b61fdb5bad33851c
Reviewed-on: https://boringssl-review.googlesource.com/c/31304
Commit-Queue: Adam Langley <agl@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
Reviewed-by: David Benjamin <davidben@google.com>
Reviewed-by: Adam Langley <agl@google.com>
2018-11-05 23:48:07 +00:00

206 lines
5.8 KiB
CMake

include_directories(../../include)
if(${ARCH} STREQUAL "x86_64")
set(
BCM_ASM_SOURCES
aesni-gcm-x86_64.${ASM_EXT}
aesni-x86_64.${ASM_EXT}
aes-x86_64.${ASM_EXT}
bsaes-x86_64.${ASM_EXT}
ghash-x86_64.${ASM_EXT}
md5-x86_64.${ASM_EXT}
p256-x86_64-asm.${ASM_EXT}
p256_beeu-x86_64-asm.${ASM_EXT}
rdrand-x86_64.${ASM_EXT}
rsaz-avx2.${ASM_EXT}
sha1-x86_64.${ASM_EXT}
sha256-x86_64.${ASM_EXT}
sha512-x86_64.${ASM_EXT}
vpaes-x86_64.${ASM_EXT}
x86_64-mont5.${ASM_EXT}
x86_64-mont.${ASM_EXT}
)
endif()
if(${ARCH} STREQUAL "x86")
set(
BCM_ASM_SOURCES
aes-586.${ASM_EXT}
aesni-x86.${ASM_EXT}
bn-586.${ASM_EXT}
co-586.${ASM_EXT}
ghash-x86.${ASM_EXT}
md5-586.${ASM_EXT}
sha1-586.${ASM_EXT}
sha256-586.${ASM_EXT}
sha512-586.${ASM_EXT}
vpaes-x86.${ASM_EXT}
x86-mont.${ASM_EXT}
)
endif()
if(${ARCH} STREQUAL "arm")
set(
BCM_ASM_SOURCES
aes-armv4.${ASM_EXT}
aesv8-armx.${ASM_EXT}
armv4-mont.${ASM_EXT}
bsaes-armv7.${ASM_EXT}
ghash-armv4.${ASM_EXT}
ghashv8-armx.${ASM_EXT}
sha1-armv4-large.${ASM_EXT}
sha256-armv4.${ASM_EXT}
sha512-armv4.${ASM_EXT}
)
endif()
if(${ARCH} STREQUAL "aarch64")
set(
BCM_ASM_SOURCES
aesv8-armx.${ASM_EXT}
armv8-mont.${ASM_EXT}
ghashv8-armx.${ASM_EXT}
sha1-armv8.${ASM_EXT}
sha256-armv8.${ASM_EXT}
sha512-armv8.${ASM_EXT}
)
endif()
if(${ARCH} STREQUAL "ppc64le")
set(
BCM_ASM_SOURCES
aesp8-ppc.${ASM_EXT}
ghashp8-ppc.${ASM_EXT}
)
endif()
perlasm(aes-586.${ASM_EXT} aes/asm/aes-586.pl)
perlasm(aes-armv4.${ASM_EXT} aes/asm/aes-armv4.pl)
perlasm(aesni-gcm-x86_64.${ASM_EXT} modes/asm/aesni-gcm-x86_64.pl)
perlasm(aesni-x86_64.${ASM_EXT} aes/asm/aesni-x86_64.pl)
perlasm(aesni-x86.${ASM_EXT} aes/asm/aesni-x86.pl)
perlasm(aesp8-ppc.${ASM_EXT} aes/asm/aesp8-ppc.pl)
perlasm(aesv8-armx.${ASM_EXT} aes/asm/aesv8-armx.pl)
perlasm(aes-x86_64.${ASM_EXT} aes/asm/aes-x86_64.pl)
perlasm(armv4-mont.${ASM_EXT} bn/asm/armv4-mont.pl)
perlasm(armv8-mont.${ASM_EXT} bn/asm/armv8-mont.pl)
perlasm(bn-586.${ASM_EXT} bn/asm/bn-586.pl)
perlasm(bsaes-armv7.${ASM_EXT} aes/asm/bsaes-armv7.pl)
perlasm(bsaes-x86_64.${ASM_EXT} aes/asm/bsaes-x86_64.pl)
perlasm(co-586.${ASM_EXT} bn/asm/co-586.pl)
perlasm(ghash-armv4.${ASM_EXT} modes/asm/ghash-armv4.pl)
perlasm(ghashp8-ppc.${ASM_EXT} modes/asm/ghashp8-ppc.pl)
perlasm(ghashv8-armx.${ASM_EXT} modes/asm/ghashv8-armx.pl)
perlasm(ghash-x86_64.${ASM_EXT} modes/asm/ghash-x86_64.pl)
perlasm(ghash-x86.${ASM_EXT} modes/asm/ghash-x86.pl)
perlasm(md5-586.${ASM_EXT} md5/asm/md5-586.pl)
perlasm(md5-x86_64.${ASM_EXT} md5/asm/md5-x86_64.pl)
perlasm(p256-x86_64-asm.${ASM_EXT} ec/asm/p256-x86_64-asm.pl)
perlasm(p256_beeu-x86_64-asm.${ASM_EXT} ec/asm/p256_beeu-x86_64-asm.pl)
perlasm(rdrand-x86_64.${ASM_EXT} rand/asm/rdrand-x86_64.pl)
perlasm(rsaz-avx2.${ASM_EXT} bn/asm/rsaz-avx2.pl)
perlasm(sha1-586.${ASM_EXT} sha/asm/sha1-586.pl)
perlasm(sha1-armv4-large.${ASM_EXT} sha/asm/sha1-armv4-large.pl)
perlasm(sha1-armv8.${ASM_EXT} sha/asm/sha1-armv8.pl)
perlasm(sha1-x86_64.${ASM_EXT} sha/asm/sha1-x86_64.pl)
perlasm(sha256-586.${ASM_EXT} sha/asm/sha256-586.pl)
perlasm(sha256-armv4.${ASM_EXT} sha/asm/sha256-armv4.pl)
perlasm(sha256-armv8.${ASM_EXT} sha/asm/sha512-armv8.pl)
perlasm(sha256-x86_64.${ASM_EXT} sha/asm/sha512-x86_64.pl)
perlasm(sha512-586.${ASM_EXT} sha/asm/sha512-586.pl)
perlasm(sha512-armv4.${ASM_EXT} sha/asm/sha512-armv4.pl)
perlasm(sha512-armv8.${ASM_EXT} sha/asm/sha512-armv8.pl)
perlasm(sha512-x86_64.${ASM_EXT} sha/asm/sha512-x86_64.pl)
perlasm(vpaes-x86_64.${ASM_EXT} aes/asm/vpaes-x86_64.pl)
perlasm(vpaes-x86.${ASM_EXT} aes/asm/vpaes-x86.pl)
perlasm(x86_64-mont5.${ASM_EXT} bn/asm/x86_64-mont5.pl)
perlasm(x86_64-mont.${ASM_EXT} bn/asm/x86_64-mont.pl)
perlasm(x86-mont.${ASM_EXT} bn/asm/x86-mont.pl)
if(FIPS_DELOCATE)
if(OPENSSL_NO_ASM)
# If OPENSSL_NO_ASM was defined then ASM will not have been enabled, but in
# FIPS mode we have to have it because the module build requires going via
# textual assembly.
enable_language(ASM)
endif()
add_library(
bcm_c_generated_asm
STATIC
bcm.c
)
add_dependencies(bcm_c_generated_asm global_target)
set_target_properties(bcm_c_generated_asm PROPERTIES COMPILE_OPTIONS "-S")
set_target_properties(bcm_c_generated_asm PROPERTIES POSITION_INDEPENDENT_CODE ON)
go_executable(delocate boringssl.googlesource.com/boringssl/util/fipstools/delocate)
add_custom_command(
OUTPUT bcm-delocated.S
COMMAND ./delocate -a $<TARGET_FILE:bcm_c_generated_asm> -o bcm-delocated.S ${BCM_ASM_SOURCES}
DEPENDS bcm_c_generated_asm delocate ${BCM_ASM_SOURCES}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
)
add_library(
bcm_hashunset
STATIC
bcm-delocated.S
)
add_dependencies(bcm_hashunset global_target)
set_target_properties(bcm_hashunset PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(bcm_hashunset PROPERTIES LINKER_LANGUAGE C)
go_executable(inject_hash
boringssl.googlesource.com/boringssl/util/fipstools/inject_hash)
add_custom_command(
OUTPUT bcm.o
COMMAND ./inject_hash -o bcm.o -in-archive $<TARGET_FILE:bcm_hashunset>
DEPENDS bcm_hashunset inject_hash
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
)
# The outputs of add_custom_command cannot be referenced outside of the
# CMakeLists.txt that defines it. Thus we have to wrap bcm.o in a custom target
# so that crypto can depend on it.
add_custom_target(bcm_o_target DEPENDS bcm.o)
add_library(
fipsmodule
OBJECT
is_fips.c
)
add_dependencies(fipsmodule global_target)
set_target_properties(fipsmodule PROPERTIES LINKER_LANGUAGE C)
else()
add_library(
fipsmodule
OBJECT
bcm.c
is_fips.c
${BCM_ASM_SOURCES}
)
add_dependencies(fipsmodule global_target)
endif()