Simplify the AMD-specific codepath.
See TODO comment being removed. Change-Id: I92ce7018f88c24b3e2e61441397fda36b977d3b8 Reviewed-on: https://boringssl-review.googlesource.com/5435 Reviewed-by: Adam Langley <agl@google.com>
This commit is contained in:
parent
7315251d4e
commit
f21fe46764
@ -63,7 +63,6 @@
|
|||||||
|
|
||||||
#if !defined(OPENSSL_NO_ASM) && (defined(OPENSSL_X86) || defined(OPENSSL_X86_64))
|
#if !defined(OPENSSL_NO_ASM) && (defined(OPENSSL_X86) || defined(OPENSSL_X86_64))
|
||||||
|
|
||||||
#include <assert.h>
|
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
@ -154,13 +153,12 @@ void OPENSSL_cpuid_setup(void) {
|
|||||||
ecx == 0x444d4163 /* cAMD */;
|
ecx == 0x444d4163 /* cAMD */;
|
||||||
|
|
||||||
int has_amd_xop = 0;
|
int has_amd_xop = 0;
|
||||||
uint32_t num_amd_extended_ids = 0;
|
|
||||||
if (is_amd) {
|
if (is_amd) {
|
||||||
/* AMD-specific logic.
|
/* AMD-specific logic.
|
||||||
* See http://developer.amd.com/wordpress/media/2012/10/254811.pdf */
|
* See http://developer.amd.com/wordpress/media/2012/10/254811.pdf */
|
||||||
OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 0x80000000);
|
OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 0x80000000);
|
||||||
num_amd_extended_ids = eax;
|
uint32_t num_extended_ids = eax;
|
||||||
if (num_amd_extended_ids >= 0x80000001) {
|
if (num_extended_ids >= 0x80000001) {
|
||||||
OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 0x80000001);
|
OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 0x80000001);
|
||||||
if (ecx & (1 << 11)) {
|
if (ecx & (1 << 11)) {
|
||||||
has_amd_xop = 1;
|
has_amd_xop = 1;
|
||||||
@ -174,47 +172,27 @@ void OPENSSL_cpuid_setup(void) {
|
|||||||
extended_features = ebx;
|
extended_features = ebx;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Query the main feature flags and adjust the hyper-threading bit.
|
/* Determine the number of cores sharing an L1 data cache to adjust the
|
||||||
*
|
* hyper-threading bit. */
|
||||||
* TODO(davidben): Can the AMD half of logic be trimmed down? I haven't found
|
uint32_t cores_per_cache = 0;
|
||||||
* evidence that any current AMD CPUs share an L1 data cache between threads,
|
if (is_amd) {
|
||||||
* and the CPUID manual (see section 3) suggests this code will always clear
|
/* AMD CPUs never share an L1 data cache between threads but do set the HTT
|
||||||
* HTT on AMD. Only aes-586.pl queries this, so hopefully any future CPUs will
|
* bit on multi-core CPUs. */
|
||||||
* use better implementations anyway. */
|
cores_per_cache = 1;
|
||||||
if (num_amd_extended_ids >= 0x80000008) {
|
} else if (num_ids >= 4) {
|
||||||
/* See AMD CPUID manual (2.34), page 27. */
|
/* TODO(davidben): The Intel manual says this CPUID leaf enumerates all
|
||||||
assert(is_amd);
|
* caches using ECX and doesn't say which is first. Does this matter? */
|
||||||
OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 0x80000008);
|
OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 4);
|
||||||
uint32_t num_physical_cores = 1 + (ecx & 0xff);
|
cores_per_cache = 1 + ((eax >> 14) & 0xfff);
|
||||||
|
}
|
||||||
|
|
||||||
OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 1);
|
OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 1);
|
||||||
|
|
||||||
/* Correct the hyper-threading bit. */
|
/* Adjust the hyper-threading bit. */
|
||||||
if (edx & (1 << 28)) {
|
if (edx & (1 << 28)) {
|
||||||
/* See AMD CPUID manual (2.34), pages 11 and 13. */
|
uint32_t num_logical_cores = (ebx >> 16) & 0xff;
|
||||||
uint32_t num_logical_cores = (ebx >> 16) & 0xff;
|
if (cores_per_cache == 1 || num_logical_cores <= 1) {
|
||||||
if (num_logical_cores <= num_physical_cores) {
|
edx &= ~(1 << 28);
|
||||||
edx &= ~(1 << 28);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
uint32_t cores_per_cache = 0;
|
|
||||||
if (num_ids >= 4) {
|
|
||||||
/* TODO(davidben): The Intel manual says this CPUID leaf enumerates all
|
|
||||||
* caches using ECX and doesn't say which is first. Does this matter? */
|
|
||||||
OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 4);
|
|
||||||
cores_per_cache = 1 + ((eax >> 14) & 0xfff);
|
|
||||||
}
|
|
||||||
|
|
||||||
OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 1);
|
|
||||||
|
|
||||||
/* Correct the hyper-threading bit if the data cache isn't shared between
|
|
||||||
* logical cores. */
|
|
||||||
if (edx & (1 << 28)) {
|
|
||||||
uint32_t num_logical_cores = (ebx >> 16) & 0xff;
|
|
||||||
if (cores_per_cache == 1 || num_logical_cores <= 1) {
|
|
||||||
edx &= ~(1 << 28);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user