From patchwork Wed Apr 29 15:10:49 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Senkevich X-Patchwork-Id: 466145 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id D04A4140320 for ; Thu, 30 Apr 2015 01:11:32 +1000 (AEST) Authentication-Results: ozlabs.org; dkim=pass reason="1024-bit key; unprotected key" header.d=sourceware.org header.i=@sourceware.org header.b=JkzuOr+f; dkim-adsp=none (unprotected policy); dkim-atps=neutral DomainKey-Signature: a=rsa-sha1; c=nofws; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:mime-version:from:date:message-id:subject:to :content-type; q=dns; s=default; b=gVw2XyHt2xtsSKNugjNLyrvcA7uXU FUh005LohZibfM+LzADOmANnN/9qLUA+GMcKBKrz9eVgwcj8wqY1hMMZiIINLezI 3f6UMAnAblguSRl6oE0dStr1SU0M+pI517BUR2gHLBhJUnqRNVQUBAmdGFWbbS9h TUi/OVmp7ZJc30= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:mime-version:from:date:message-id:subject:to :content-type; s=default; bh=I8yb6J9CDmMRD8RlEcWNfognV9Y=; b=Jkz uOr+fbFqSOr2FxZ6Y9DcQ2Hh2re4u3gTXKsfhPA2rT+EI6wMP1QE5NHhriV2rYOk f8/32Tu2Imhto2Bg/YkygMn34PoYmep7cbVeUCZifoFLIT/EcQTMuHeyY9WeSOp6 oU3yN7XWZZG6Nhj8JjsmStEpqNw7OU0HoGsLyCZs= Received: (qmail 120120 invoked by alias); 29 Apr 2015 15:11:26 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 120109 invoked by uid 89); 29 Apr 2015 15:11:25 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.0 required=5.0 tests=AWL, BAYES_00, FREEMAIL_FROM, RCVD_IN_DNSWL_LOW, SPF_PASS autolearn=ham version=3.3.2 X-HELO: mail-lb0-f171.google.com X-Received: by 10.152.2.227 with SMTP id 3mr13014549lax.73.1430320280075; Wed, 29 Apr 2015 08:11:20 -0700 (PDT) MIME-Version: 1.0 From: Andrew Senkevich Date: Wed, 29 Apr 2015 18:10:49 +0300 Message-ID: Subject: [PATCH] [x86_64] Detection of availability of AVX512F and AVX512DQ ISAs To: libc-alpha Hi, this patch adds detection of availability of AVX512F and AVX512DQ ISAs. 2015-04-29 Andrew Senkevich * sysdeps/x86_64/multiarch/init-arch.h (bit_AVX512F_Usable, bit_AVX512DQ_Usable): New macro. * sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features): Check and set bit_AVX512F_Usable, bit_AVX512DQ_Usable. (AVX_Fast_Unaligned_Load) Ok for trunk? --- WBR, Andrew diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c index 7dec218..6a81d48 100644 --- a/sysdeps/x86_64/multiarch/init-arch.c +++ b/sysdeps/x86_64/multiarch/init-arch.c @@ -179,6 +179,19 @@ __init_cpu_features (void) if (CPUID_AVX2) __cpu_features.feature[index_AVX2_Usable] |= bit_AVX2_Usable | bit_AVX_Fast_Unaligned_Load; + /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and + ZMM16-ZMM31 state are enabled. */ + if ((xcrlow & 0xe0) == 0xe0 ) + { + /* Determine if AVX512F is usable. */ + if (CPUID_AVX512F) + __cpu_features.feature[index_AVX512F_Usable] + |= bit_AVX512F_Usable; + /* Determine if AVX512DQ is usable. */ + if (CPUID_AVX512DQ) + __cpu_features.feature[index_AVX512DQ_Usable] + |= bit_AVX512DQ_Usable; + } /* Determine if FMA is usable. */ if (CPUID_FMA) __cpu_features.feature[index_FMA_Usable] |= bit_FMA_Usable; diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h index e6b5ba5..b4b445e 100644 --- a/sysdeps/x86_64/multiarch/init-arch.h +++ b/sysdeps/x86_64/multiarch/init-arch.h @@ -26,6 +26,8 @@ #define bit_Slow_SSE4_2 (1 << 9) #define bit_AVX2_Usable (1 << 10) #define bit_AVX_Fast_Unaligned_Load (1 << 11) +#define bit_AVX512F_Usable (1 << 12) +#define bit_AVX512DQ_Usable (1 << 13) /* CPUID Feature flags. */ @@ -43,6 +45,8 @@ /* COMMON_CPUID_INDEX_7. */ #define bit_RTM (1 << 11) #define bit_AVX2 (1 << 5) +#define bit_AVX512F (1 << 16) +#define bit_AVX512DQ (1 << 17) /* XCR0 Feature flags. */ #define bit_XMM_state (1 << 1) @@ -76,6 +80,8 @@ # define index_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE # define index_AVX2_Usable FEATURE_INDEX_1*FEATURE_SIZE # define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE +# define index_AVX512F_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_AVX512DQ_Usable FEATURE_INDEX_1*FEATURE_SIZE #else /* __ASSEMBLER__ */ @@ -152,6 +158,10 @@ extern const struct cpu_features *__get_cpu_features (void) HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_RTM) # define CPUID_AVX2 \ HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX2) +# define CPUID_AVX512F \ + HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX512F) +# define CPUID_AVX512DQ \ + HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX512DQ) /* HAS_* evaluates to true if we may use the feature at runtime. */ # define HAS_SSE2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2) @@ -172,6 +182,8 @@ extern const struct cpu_features *__get_cpu_features (void) # define index_Slow_SSE4_2 FEATURE_INDEX_1 # define index_AVX2_Usable FEATURE_INDEX_1 # define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1 +# define index_AVX512F_Usable FEATURE_INDEX_1 +# define index_AVX512DQ_Usable FEATURE_INDEX_1 # define HAS_ARCH_FEATURE(name) \ ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0) @@ -182,6 +194,8 @@ extern const struct cpu_features *__get_cpu_features (void) # define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load) # define HAS_AVX HAS_ARCH_FEATURE (AVX_Usable) # define HAS_AVX2 HAS_ARCH_FEATURE (AVX2_Usable) +# define HAS_AVX512F HAS_ARCH_FEATURE (AVX512F_Usable) +# define HAS_AVX512DQ HAS_ARCH_FEATURE (AVX512DQ_Usable) # define HAS_FMA HAS_ARCH_FEATURE (FMA_Usable) # define HAS_FMA4 HAS_ARCH_FEATURE (FMA4_Usable) # define HAS_AVX_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE