Some phones have a buggy NEON unit and the Poly1305 NEON code fails on them, even though other NEON code appears to work fine. This change: 1) Fixes a bug where NEON was assumed even when the code wasn't compiled in NEON mode. 2) Adds a second NEON control bit that can be disabled in order to run NEON code, but not the Poly1305 NEON code. https://code.google.com/p/chromium/issues/detail?id=341598 Change-Id: Icb121bf8dba47c7a46c7667f676ff7a4bc973625 Reviewed-on: https://boringssl-review.googlesource.com/1351 Reviewed-by: Adam Langley <agl@google.com>kris/onging/CECPQ3_patch15
@@ -88,13 +88,22 @@ | |||||
#endif | #endif | ||||
#if !__ASSEMBLER__ | #if !__ASSEMBLER__ | ||||
/* OPENSSL_armcap_P contains flags describing the capabilities of the CPU and | /* OPENSSL_armcap_P contains flags describing the capabilities of the CPU and | ||||
* is easy for assembly code to acesss. For C code, see the functions in | * is easy for assembly code to acesss. For C code, see the functions in | ||||
* |cpu.h|. */ | * |cpu.h|. */ | ||||
extern unsigned int OPENSSL_armcap_P; | |||||
extern uint32_t OPENSSL_armcap_P; | |||||
#define ARMV7_NEON (1<<0) | |||||
#endif | |||||
/* ARMV7_NEON is true when a NEON unit is present in the current CPU. */ | |||||
#define ARMV7_NEON (1 << 0) | |||||
/* ARMV7_NEON_FUNCTIONAL is true when the NEON unit doesn't contain subtle bugs. | |||||
* The Poly1305 NEON code is known to trigger bugs in the NEON units of some | |||||
* phones. If this bit isn't set then the Poly1305 NEON code won't be used. | |||||
* See https://code.google.com/p/chromium/issues/detail?id=341598. */ | |||||
#define ARMV7_NEON_FUNCTIONAL (1 << 1) | |||||
#endif /* !__ASSEMBLER__ */ | |||||
#endif /* OPENSSL_HEADER_THREAD_H */ | #endif /* OPENSSL_HEADER_THREAD_H */ |
@@ -64,9 +64,9 @@ | |||||
#include "arm_arch.h" | #include "arm_arch.h" | ||||
#if defined(__ARM_NEON__) | #if defined(__ARM_NEON__) | ||||
uint32_t OPENSSL_armcap_P = ARMV7_NEON; | |||||
uint32_t OPENSSL_armcap_P = ARMV7_NEON | ARMV7_NEON_FUNCTIONAL; | |||||
#else | #else | ||||
uint32_t OPENSSL_armcap_P = ARMV7_NEON; | |||||
uint32_t OPENSSL_armcap_P = ARMV7_NEON_FUNCTIONAL; | |||||
#endif | #endif | ||||
char CRYPTO_is_NEON_capable() { | char CRYPTO_is_NEON_capable() { | ||||
@@ -81,4 +81,17 @@ void CRYPTO_set_NEON_capable(char neon_capable) { | |||||
} | } | ||||
} | } | ||||
char CRYPTO_is_NEON_functional() { | |||||
static const uint32_t kWantFlags = ARMV7_NEON | ARMV7_NEON_FUNCTIONAL; | |||||
return (OPENSSL_armcap_P & kWantFlags) == kWantFlags; | |||||
} | |||||
void CRYPTO_set_NEON_functional(char neon_functional) { | |||||
if (neon_functional) { | |||||
OPENSSL_armcap_P |= ARMV7_NEON_FUNCTIONAL; | |||||
} else { | |||||
OPENSSL_armcap_P &= ~ARMV7_NEON_FUNCTIONAL; | |||||
} | |||||
} | |||||
#endif /* defined(OPENSSL_ARM) */ | #endif /* defined(OPENSSL_ARM) */ |
@@ -166,7 +166,7 @@ void CRYPTO_poly1305_init(poly1305_state *statep, const uint8_t key[32]) { | |||||
uint32_t t0, t1, t2, t3; | uint32_t t0, t1, t2, t3; | ||||
#if defined(OPENSSL_ARM) | #if defined(OPENSSL_ARM) | ||||
if (CRYPTO_is_NEON_capable()) { | |||||
if (CRYPTO_is_NEON_functional()) { | |||||
CRYPTO_poly1305_init_neon(statep, key); | CRYPTO_poly1305_init_neon(statep, key); | ||||
return; | return; | ||||
} | } | ||||
@@ -213,7 +213,7 @@ void CRYPTO_poly1305_update(poly1305_state *statep, const uint8_t *in, | |||||
struct poly1305_state_st *state = (struct poly1305_state_st *)statep; | struct poly1305_state_st *state = (struct poly1305_state_st *)statep; | ||||
#if defined(OPENSSL_ARM) | #if defined(OPENSSL_ARM) | ||||
if (CRYPTO_is_NEON_capable()) { | |||||
if (CRYPTO_is_NEON_functional()) { | |||||
CRYPTO_poly1305_update_neon(statep, in, in_len); | CRYPTO_poly1305_update_neon(statep, in, in_len); | ||||
return; | return; | ||||
} | } | ||||
@@ -256,7 +256,7 @@ void CRYPTO_poly1305_finish(poly1305_state *statep, uint8_t mac[16]) { | |||||
uint32_t b, nb; | uint32_t b, nb; | ||||
#if defined(OPENSSL_ARM) | #if defined(OPENSSL_ARM) | ||||
if (CRYPTO_is_NEON_capable()) { | |||||
if (CRYPTO_is_NEON_functional()) { | |||||
CRYPTO_poly1305_finish_neon(statep, mac); | CRYPTO_poly1305_finish_neon(statep, mac); | ||||
return; | return; | ||||
} | } | ||||
@@ -91,8 +91,21 @@ OPENSSL_EXPORT char CRYPTO_is_NEON_capable(); | |||||
/* CRYPTO_set_NEON_capable sets the return value of |CRYPTO_is_NEON_capable|. | /* CRYPTO_set_NEON_capable sets the return value of |CRYPTO_is_NEON_capable|. | ||||
* By default, unless the code was compiled with |-mfpu=neon|, NEON is assumed | * By default, unless the code was compiled with |-mfpu=neon|, NEON is assumed | ||||
* not to be present. It is not autodetected. */ | |||||
* not to be present. It is not autodetected. Calling this with a zero | |||||
* argument also causes |CRYPTO_is_NEON_functional| to return false. */ | |||||
OPENSSL_EXPORT void CRYPTO_set_NEON_capable(char neon_capable); | OPENSSL_EXPORT void CRYPTO_set_NEON_capable(char neon_capable); | ||||
/* CRYPTO_is_NEON_functional returns true if the current CPU has a /working/ | |||||
* NEON unit. Some phones have a NEON unit, but the Poly1305 NEON code causes | |||||
* it to fail. See https://code.google.com/p/chromium/issues/detail?id=341598 */ | |||||
OPENSSL_EXPORT char CRYPTO_is_NEON_functional(); | |||||
/* CRYPTO_set_NEON_functional sets the "NEON functional" flag. For | |||||
* |CRYPTO_is_NEON_functional| to return true, both this flag and the NEON flag | |||||
* must be true. By default NEON is assumed to be functional if the code was | |||||
* compiled with |-mfpu=neon| or if |CRYPTO_set_NEON_capable| has been called | |||||
* with a non-zero argument. */ | |||||
OPENSSL_EXPORT void CRYPTO_set_NEON_functional(char neon_functional); | |||||
#endif /* OPENSSL_ARM */ | #endif /* OPENSSL_ARM */ | ||||