From 87b133a3df55e4e444f893a354f01e10e7557ac6 Mon Sep 17 00:00:00 2001
From: Erich Elsen <eriche@google.com>
Date: Mon, 22 May 2017 18:08:58 -0700
Subject: [PATCH 1/2] add tunable for non temporal store. slightly refactor
cache info code to be allow for the possiblity of calling the implementation.
---
elf/dl-tunables.list | 7 ++++
sysdeps/x86/cacheinfo.c | 95 +++++++++++++++++++++++++++++++++++++++----------
2 files changed, 84 insertions(+), 18 deletions(-)
@@ -30,6 +30,13 @@
# NONE: Read all the time.
glibc {
+ x86_cache {
+ x86_shared_non_temporal_threshold {
+ type: SIZE_T
+ env_alias: SHARED_NON_TEMPORAL_THRESHOLD
+ security_level: SXID_IGNORE
+ }
+ }
malloc {
check {
type: INT_32
@@ -23,6 +23,15 @@
#include <cpuid.h>
#include <init-arch.h>
+#if HAVE_TUNABLES
+# define TUNABLE_NAMESPACE x86_cache
+#else
+ #include <string.h>
+ extern char **_environ;
+#endif
+#include <elf/dl-tunables.h>
+
+
#define is_intel GLRO(dl_x86_cpu_features).kind == arch_kind_intel
#define is_amd GLRO(dl_x86_cpu_features).kind == arch_kind_amd
#define max_cpuid GLRO(dl_x86_cpu_features).max_cpuid
@@ -128,7 +137,7 @@ intel_02_known_compare (const void *p1, const void *p2)
static long int
__attribute__ ((noinline))
intel_check_word (int name, unsigned int value, bool *has_level_2,
- bool *no_level_2_or_3)
+ bool *no_level_2_or_3, const struct cpu_features* x86_cpu_features)
{
if ((value & 0x80000000) != 0)
/* The register value is reserved. */
@@ -206,8 +215,8 @@ intel_check_word (int name, unsigned int value, bool *has_level_2,
/* Intel reused this value. For family 15, model 6 it
specifies the 3rd level cache. Otherwise the 2nd
level cache. */
- unsigned int family = GLRO(dl_x86_cpu_features).family;
- unsigned int model = GLRO(dl_x86_cpu_features).model;
+ unsigned int family = x86_cpu_features->family;
+ unsigned int model = x86_cpu_features->model;
if (family == 15 && model == 6)
{
@@ -257,7 +266,8 @@ intel_check_word (int name, unsigned int value, bool *has_level_2,
static long int __attribute__ ((noinline))
-handle_intel (int name, unsigned int maxidx)
+handle_intel (int name, unsigned int maxidx,
+ const struct cpu_features* x86_cpu_features)
{
/* Return -1 for older CPUs. */
if (maxidx < 2)
@@ -289,19 +299,23 @@ handle_intel (int name, unsigned int maxidx)
}
/* Process the individual registers' value. */
- result = intel_check_word (name, eax, &has_level_2, &no_level_2_or_3);
+ result = intel_check_word (name, eax, &has_level_2, &no_level_2_or_3,
+ x86_cpu_features);
if (result != 0)
return result;
- result = intel_check_word (name, ebx, &has_level_2, &no_level_2_or_3);
+ result = intel_check_word (name, ebx, &has_level_2, &no_level_2_or_3,
+ x86_cpu_features);
if (result != 0)
return result;
- result = intel_check_word (name, ecx, &has_level_2, &no_level_2_or_3);
+ result = intel_check_word (name, ecx, &has_level_2, &no_level_2_or_3,
+ x86_cpu_features);
if (result != 0)
return result;
- result = intel_check_word (name, edx, &has_level_2, &no_level_2_or_3);
+ result = intel_check_word (name, edx, &has_level_2, &no_level_2_or_3,
+ x86_cpu_features);
if (result != 0)
return result;
}
@@ -437,7 +451,7 @@ attribute_hidden
__cache_sysconf (int name)
{
if (is_intel)
- return handle_intel (name, max_cpuid);
+ return handle_intel (name, max_cpuid, &GLRO(dl_x86_cpu_features));
if (is_amd)
return handle_amd (name);
@@ -475,9 +489,9 @@ int __x86_prefetchw attribute_hidden;
#endif
-static void
-__attribute__((constructor))
-init_cacheinfo (void)
+void
+attribute_hidden
+__init_cacheinfo_impl (const struct cpu_features* x86_cpu_features)
{
/* Find out what brand of processor. */
unsigned int eax;
@@ -492,14 +506,17 @@ init_cacheinfo (void)
if (is_intel)
{
- data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid);
+ data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid,
+ x86_cpu_features);
- long int core = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
+ long int core = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid,
+ x86_cpu_features);
bool inclusive_cache = true;
/* Try L3 first. */
level = 3;
- shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid);
+ shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid,
+ x86_cpu_features);
/* Number of logical processors sharing L2 cache. */
int threads_l2;
@@ -529,8 +546,8 @@ init_cacheinfo (void)
highest cache level. */
if (max_cpuid >= 4)
{
- unsigned int family = GLRO(dl_x86_cpu_features).family;
- unsigned int model = GLRO(dl_x86_cpu_features).model;
+ unsigned int family = x86_cpu_features->family;
+ unsigned int model = x86_cpu_features->model;
int i = 0;
@@ -673,7 +690,7 @@ intel_bug_no_cache_info:
level. */
threads
- = ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx
+ = ((x86_cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx
>> 16) & 0xff);
}
@@ -768,4 +785,46 @@ intel_bug_no_cache_info:
shared cache size is the approximate value above which non-temporal
store becomes faster. */
__x86_shared_non_temporal_threshold = __x86_shared_cache_size * 6;
+
+#if HAVE_TUNABLES
+ TUNABLE_SET_VAL(x86_shared_non_temporal_threshold,
+ &__x86_shared_non_temporal_threshold);
+#else
+ if (__glibc_likely (_environ != NULL))
+ {
+ char **runp = _environ;
+ char *envline;
+
+ while (*runp != NULL)
+ {
+ envline = *runp;
+ runp++;
+ size_t len = strcspn (envline, "=");
+
+ if (envline[len] != '=')
+ continue;
+
+ switch (len)
+ {
+ case 29:
+ if (!__builtin_expect (__libc_enable_secure, 0))
+ {
+ if (memcmp (envline,
+ "SHARED_NON_TEMPORAL_THRESHOLD", 29) == 0)
+ __x86_shared_non_temporal_threshold = atoi (&envline[29]);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+#endif
+}
+
+static void
+__attribute__((constructor))
+init_cacheinfo (void)
+{
+ __init_cacheinfo_impl (&GLRO(dl_x86_cpu_features));
}
--
2.13.0.219.gdb65acc882-goog