From f21fe46764a9ff5b119023a444e5e41dff2a3fb8 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Sun, 12 Jul 2015 12:32:46 -0400 Subject: [PATCH] Simplify the AMD-specific codepath. See TODO comment being removed. Change-Id: I92ce7018f88c24b3e2e61441397fda36b977d3b8 Reviewed-on: https://boringssl-review.googlesource.com/5435 Reviewed-by: Adam Langley --- crypto/cpu-intel.c | 64 +++++++++++++++------------------------------- 1 file changed, 21 insertions(+), 43 deletions(-) diff --git a/crypto/cpu-intel.c b/crypto/cpu-intel.c index 69439994..a50d9582 100644 --- a/crypto/cpu-intel.c +++ b/crypto/cpu-intel.c @@ -63,7 +63,6 @@ #if !defined(OPENSSL_NO_ASM) && (defined(OPENSSL_X86) || defined(OPENSSL_X86_64)) -#include #include #include #include @@ -154,13 +153,12 @@ void OPENSSL_cpuid_setup(void) { ecx == 0x444d4163 /* cAMD */; int has_amd_xop = 0; - uint32_t num_amd_extended_ids = 0; if (is_amd) { /* AMD-specific logic. * See http://developer.amd.com/wordpress/media/2012/10/254811.pdf */ OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 0x80000000); - num_amd_extended_ids = eax; - if (num_amd_extended_ids >= 0x80000001) { + uint32_t num_extended_ids = eax; + if (num_extended_ids >= 0x80000001) { OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 0x80000001); if (ecx & (1 << 11)) { has_amd_xop = 1; @@ -174,47 +172,27 @@ void OPENSSL_cpuid_setup(void) { extended_features = ebx; } - /* Query the main feature flags and adjust the hyper-threading bit. - * - * TODO(davidben): Can the AMD half of logic be trimmed down? I haven't found - * evidence that any current AMD CPUs share an L1 data cache between threads, - * and the CPUID manual (see section 3) suggests this code will always clear - * HTT on AMD. Only aes-586.pl queries this, so hopefully any future CPUs will - * use better implementations anyway. */ - if (num_amd_extended_ids >= 0x80000008) { - /* See AMD CPUID manual (2.34), page 27. */ - assert(is_amd); - OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 0x80000008); - uint32_t num_physical_cores = 1 + (ecx & 0xff); + /* Determine the number of cores sharing an L1 data cache to adjust the + * hyper-threading bit. */ + uint32_t cores_per_cache = 0; + if (is_amd) { + /* AMD CPUs never share an L1 data cache between threads but do set the HTT + * bit on multi-core CPUs. */ + cores_per_cache = 1; + } else if (num_ids >= 4) { + /* TODO(davidben): The Intel manual says this CPUID leaf enumerates all + * caches using ECX and doesn't say which is first. Does this matter? */ + OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 4); + cores_per_cache = 1 + ((eax >> 14) & 0xfff); + } - OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 1); + OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 1); - /* Correct the hyper-threading bit. */ - if (edx & (1 << 28)) { - /* See AMD CPUID manual (2.34), pages 11 and 13. */ - uint32_t num_logical_cores = (ebx >> 16) & 0xff; - if (num_logical_cores <= num_physical_cores) { - edx &= ~(1 << 28); - } - } - } else { - uint32_t cores_per_cache = 0; - if (num_ids >= 4) { - /* TODO(davidben): The Intel manual says this CPUID leaf enumerates all - * caches using ECX and doesn't say which is first. Does this matter? */ - OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 4); - cores_per_cache = 1 + ((eax >> 14) & 0xfff); - } - - OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 1); - - /* Correct the hyper-threading bit if the data cache isn't shared between - * logical cores. */ - if (edx & (1 << 28)) { - uint32_t num_logical_cores = (ebx >> 16) & 0xff; - if (cores_per_cache == 1 || num_logical_cores <= 1) { - edx &= ~(1 << 28); - } + /* Adjust the hyper-threading bit. */ + if (edx & (1 << 28)) { + uint32_t num_logical_cores = (ebx >> 16) & 0xff; + if (cores_per_cache == 1 || num_logical_cores <= 1) { + edx &= ~(1 << 28); } }