@@ -456,7 +456,8 @@ i[34567]86-*-* | x86_64-*-*)
avx10_2mediaintrin.h avx10_2-512mediaintrin.h
avx10_2convertintrin.h avx10_2-512convertintrin.h
avx10_2bf16intrin.h avx10_2-512bf16intrin.h
- avx10_2satcvtintrin.h avx10_2-512satcvtintrin.h"
+ avx10_2satcvtintrin.h avx10_2-512satcvtintrin.h
+ avx10_2minmaxintrin.h avx10_2-512minmaxintrin.h"
;;
ia64-*-*)
extra_headers=ia64intrin.h
new file mode 100644
@@ -0,0 +1,489 @@
+/* Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of GCC.
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <avx10_2-512minmaxintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX10_2_512MINMAXINTRIN_H_INCLUDED
+#define _AVX10_2_512MINMAXINTRIN_H_INCLUDED
+
+#if !defined (__AVX10_2_512__)
+#pragma GCC push_options
+#pragma GCC target("avx10.2-512")
+#define __DISABLE_AVX10_2_512__
+#endif /* __AVX10_2_512__ */
+
+#ifdef __OPTIMIZE__
+extern __inline __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_nepbh (__m512bh __A, __m512bh __B, const int __C)
+{
+ return (__m512bh) __builtin_ia32_minmaxnepbf16512_mask ((__v32bf) __A,
+ (__v32bf) __B,
+ __C,
+ (__v32bf)(__m512bh)
+ _mm512_setzero_si512 (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_nepbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, __m512bh __B, const int __C)
+{
+ return (__m512bh) __builtin_ia32_minmaxnepbf16512_mask ((__v32bf) __A,
+ (__v32bf) __B,
+ __C,
+ (__v32bf) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_nepbh (__mmask32 __U, __m512bh __A,
+ __m512bh __B, const int __C)
+{
+ return (__m512bh) __builtin_ia32_minmaxnepbf16512_mask ((__v32bf) __A,
+ (__v32bf) __B,
+ __C,
+ (__v32bf)(__m512bh)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_pd (__m512d __A, __m512d __B, const int __C)
+{
+ return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+ (__v8df) __B,
+ __C,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B, const int __C)
+{
+ return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+ (__v8df) __B,
+ __C,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ const int __C)
+{
+ return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+ (__v8df) __B,
+ __C,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_round_pd (__m512d __A, __m512d __B, const int __C,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+ (__v8df) __B,
+ __C,
+ (__v8df)
+ _mm512_undefined_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B, const int __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+ (__v8df) __B,
+ __C,
+ (__v8df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ const int __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+ (__v8df) __B,
+ __C,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_ph (__m512h __A, __m512h __B, const int __C)
+{
+ return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+ (__v32hf) __B,
+ __C,
+ (__v32hf)
+ _mm512_undefined_ph (),
+ (__mmask32) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_ph (__m512h __W, __mmask32 __U, __m512h __A,
+ __m512h __B, const int __C)
+{
+ return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+ (__v32hf) __B,
+ __C,
+ (__v32hf) __W,
+ (__mmask32) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_ph (__mmask32 __U, __m512h __A, __m512h __B,
+ const int __C)
+{
+ return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+ (__v32hf) __B,
+ __C,
+ (__v32hf)
+ _mm512_setzero_ph (),
+ (__mmask32) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_round_ph (__m512h __A, __m512h __B, const int __C, const int __R)
+{
+ return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+ (__v32hf) __B,
+ __C,
+ (__v32hf)
+ _mm512_undefined_ph (),
+ (__mmask32) -1, __R);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
+ __m512h __B, const int __C, const int __R)
+{
+ return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+ (__v32hf) __B,
+ __C,
+ (__v32hf) __W,
+ (__mmask32) __U, __R);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
+ const int __C, const int __R)
+{
+ return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+ (__v32hf) __B,
+ __C,
+ (__v32hf)
+ _mm512_setzero_ph (),
+ (__mmask32) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_ps (__m512 __A, __m512 __B, const int __C)
+{
+ return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ __C,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B, const int __C)
+{
+ return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ __C,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ const int __C)
+{
+ return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ __C,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_round_ps (__m512 __A, __m512 __B, const int __C, const int __R)
+{
+ return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ __C,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B, const int __C, const int __R)
+{
+ return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ __C,
+ (__v16sf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ const int __C, const int __R)
+{
+ return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+ (__v16sf) __B,
+ __C,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U, __R);
+}
+
+#else
+#define _mm512_minmax_nepbh(A, B, C) \
+ ((__m512bh) __builtin_ia32_minmaxnepbf16512_mask ((__v32bf) (A), \
+ (__v32bf) (B), \
+ (int) (C), \
+ (__v32bf) (__m512bh) \
+ _mm512_setzero_si512 (), \
+ (__mmask32) (-1)))
+
+#define _mm512_mask_minmax_nepbh(W, U, A, B, C) \
+ ((__m512bh) __builtin_ia32_minmaxnepbf16512_mask ((__v32bf) (A), \
+ (__v32bf) (B), \
+ (int) (C), \
+ (__v32bf) (__m512bh) (W), \
+ (__mmask32) (U)))
+
+#define _mm512_maskz_minmax_nepbh(U, A, B, C) \
+ ((__m512bh) __builtin_ia32_minmaxnepbf16512_mask ((__v32bf) (A), \
+ (__v32bf) (B), \
+ (int) (C), \
+ (__v32bf) (__m512bh) \
+ _mm512_setzero_si512 (), \
+ (__mmask32) (U)))
+
+#define _mm512_minmax_round_pd(A, B, C, R) \
+ ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
+ (__v8df) (B), \
+ (int) (C), \
+ (__v8df) (__m512d) \
+ _mm512_undefined_pd (), \
+ (__mmask8) (-1), \
+ (int) (R)))
+
+#define _mm512_mask_minmax_round_pd(W, U, A, B, C, R) \
+ ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
+ (__v8df) (B), \
+ (int) (C), \
+ (__v8df) (__m512d) (W), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#define _mm512_maskz_minmax_round_pd(U, A, B, C, R) \
+ ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
+ (__v8df) (B), \
+ (int) (C), \
+ (__v8df) (__m512d) \
+ _mm512_setzero_pd (), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#define _mm512_minmax_round_ph(A, B, C, R) \
+ ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
+ (__v32hf) (B), \
+ (int) (C), \
+ (__v32hf) (__m512h) \
+ _mm512_undefined_ph (), \
+ (__mmask32) (-1), \
+ (int) (R)))
+
+#define _mm512_mask_minmax_round_ph(W, U, A, B, C, R) \
+ ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
+ (__v32hf) (B), \
+ (int) (C), \
+ (__v32hf) (__m512h) (W), \
+ (__mmask32) (U), \
+ (int) (R)))
+
+#define _mm512_maskz_minmax_round_ph(U, A, B, C, R) \
+ ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
+ (__v32hf) (B), \
+ (int) (C), \
+ (__v32hf) (__m512h) \
+ _mm512_setzero_ph (), \
+ (__mmask32) (U), \
+ (int) (R)))
+
+#define _mm512_minmax_round_ps(A, B, C, R) \
+ ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (int) (C), \
+ (__v16sf) (__m512) \
+ _mm512_undefined_ps (), \
+ (__mmask16) (-1), \
+ (int) (R)))
+
+#define _mm512_mask_minmax_round_ps(W, U, A, B, C, R) \
+ ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (int) (C), \
+ (__v16sf) (__m512) (W), \
+ (__mmask16) (U), \
+ (int) (R)))
+
+#define _mm512_maskz_minmax_round_ps(U, A, B, C, R) \
+ ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (int) (C), \
+ (__v16sf) (__m512) \
+ _mm512_setzero_ps (), \
+ (__mmask16) (U), \
+ (int) (R)))
+
+#define _mm512_minmax_pd(A, B, C) \
+ ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
+ (__v8df) (B), \
+ (int) (C), \
+ (__v8df) (__m512d) \
+ _mm512_undefined_pd (), \
+ (__mmask8) (-1), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_minmax_pd(W, U, A, B, C) \
+ ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
+ (__v8df) (B), \
+ (int) (C), \
+ (__v8df) (__m512d) (W), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_minmax_pd(U, A, B, C) \
+ ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \
+ (__v8df) (B), \
+ (int) (C), \
+ (__v8df) (__m512d) \
+ _mm512_setzero_pd (), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_minmax_ph(A, B, C) \
+ ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
+ (__v32hf) (B), \
+ (int) (C), \
+ (__v32hf) (__m512h) \
+ _mm512_undefined_ph (), \
+ (__mmask32) (-1), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_minmax_ph(W, U, A, B, C) \
+ ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
+ (__v32hf) (B), \
+ (int) (C), \
+ (__v32hf) (__m512h) (W), \
+ (__mmask32) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_minmax_ph(U, A, B, C) \
+ ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \
+ (__v32hf) (B), \
+ (int) (C), \
+ (__v32hf) (__m512h) \
+ _mm512_setzero_ph (), \
+ (__mmask32) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_minmax_ps(A, B, C) \
+ ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (int) (C), \
+ (__v16sf) (__m512) \
+ _mm512_undefined_ps (), \
+ (__mmask16) (-1), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_minmax_ps(W, U, A, B, C) \
+ ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (int) (C), \
+ (__v16sf) (__m512) (W), \
+ (__mmask16) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_minmax_ps(U, A, B, C) \
+ ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \
+ (__v16sf) (B), \
+ (int) (C), \
+ (__v16sf) (__m512) \
+ _mm512_setzero_ps (), \
+ (__mmask16) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#endif
+
+#ifdef __DISABLE_AVX10_2_512__
+#undef __DISABLE_AVX10_2_512__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX10_2_512__ */
+
+#endif /* _AVX10_2_512MINMAXINTRIN_H_INCLUDED */
new file mode 100644
@@ -0,0 +1,1063 @@
+/* Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of GCC.
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <avx10_2minmaxintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX10_2MINMAXINTRIN_H_INCLUDED
+#define _AVX10_2MINMAXINTRIN_H_INCLUDED
+
+#if !defined(__AVX10_2_256__)
+#pragma GCC push_options
+#pragma GCC target("avx10.2")
+#define __DISABLE_AVX10_2_256__
+#endif /* __AVX10_2_256__ */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_minmax_nepbh (__m128bh __A, __m128bh __B, const int __C)
+{
+ return (__m128bh) __builtin_ia32_minmaxnepbf16128_mask ((__v8bf) __A,
+ (__v8bf) __B,
+ __C,
+ (__v8bf)(__m128bh)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_minmax_nepbh (__m128bh __W, __mmask8 __U, __m128bh __A,
+ __m128bh __B, const int __C)
+{
+ return (__m128bh) __builtin_ia32_minmaxnepbf16128_mask ((__v8bf) __A,
+ (__v8bf) __B,
+ __C,
+ (__v8bf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_minmax_nepbh (__mmask8 __U, __m128bh __A, __m128bh __B, const int __C)
+{
+ return (__m128bh) __builtin_ia32_minmaxnepbf16128_mask ((__v8bf) __A,
+ (__v8bf) __B,
+ __C,
+ (__v8bf)(__m128bh)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_minmax_nepbh (__m256bh __A, __m256bh __B, const int __C)
+{
+ return (__m256bh) __builtin_ia32_minmaxnepbf16256_mask ((__v16bf) __A,
+ (__v16bf) __B,
+ __C,
+ (__v16bf)(__m256bh)
+ _mm256_setzero_si256 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_minmax_nepbh (__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B,
+ const int __C)
+{
+ return (__m256bh) __builtin_ia32_minmaxnepbf16256_mask ((__v16bf) __A,
+ (__v16bf) __B,
+ __C,
+ (__v16bf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_minmax_nepbh (__mmask16 __U, __m256bh __A, __m256bh __B, const int __C)
+{
+ return (__m256bh) __builtin_ia32_minmaxnepbf16256_mask ((__v16bf) __A,
+ (__v16bf) __B,
+ __C,
+ (__v16bf)(__m256bh)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_minmax_pd (__m128d __A, __m128d __B, const int __C)
+{
+ return (__m128d) __builtin_ia32_minmaxpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ __C,
+ (__v2df)(__m128d)
+ _mm_undefined_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_minmax_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
+ const int __C)
+{
+ return (__m128d) __builtin_ia32_minmaxpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ __C,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_minmax_pd (__mmask8 __U, __m128d __A, __m128d __B, const int __C)
+{
+ return (__m128d) __builtin_ia32_minmaxpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ __C,
+ (__v2df)(__m128d)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_minmax_pd (__m256d __A, __m256d __B, const int __C)
+{
+ return (__m256d) __builtin_ia32_minmaxpd256_mask_round (
+ (__v4df) __A, (__v4df) __B, __C,
+ (__v4df) (__m256d) _mm256_undefined_pd (),
+ (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_minmax_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B,
+ const int __C)
+{
+ return (__m256d) __builtin_ia32_minmaxpd256_mask_round (
+ (__v4df) __A, (__v4df) __B, __C, (__v4df) __W,
+ (__mmask8) __U, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_minmax_pd (__mmask8 __U, __m256d __A, __m256d __B, const int __C)
+{
+ return (__m256d) __builtin_ia32_minmaxpd256_mask_round (
+ (__v4df) __A, (__v4df) __B, __C,
+ (__v4df) (__m256d) _mm256_setzero_pd (),
+ (__mmask8) __U, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_minmax_round_pd (__m256d __A, __m256d __B, const int __C, const int __R)
+{
+ return (__m256d) __builtin_ia32_minmaxpd256_mask_round (
+ (__v4df) __A, (__v4df) __B, __C,
+ (__v4df) (__m256d) _mm256_undefined_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_minmax_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B, const int __C, const int __R)
+{
+ return (__m256d) __builtin_ia32_minmaxpd256_mask_round (
+ (__v4df) __A, (__v4df) __B, __C, (__v4df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_minmax_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
+ const int __C, const int __R)
+{
+ return (__m256d) __builtin_ia32_minmaxpd256_mask_round (
+ (__v4df) __A, (__v4df) __B, __C,
+ (__v4df) (__m256d) _mm256_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_minmax_ph (__m128h __A, __m128h __B, const int __C)
+{
+ return (__m128h) __builtin_ia32_minmaxph128_mask ((__v8hf) __A,
+ (__v8hf) __B,
+ __C,
+ (__v8hf)(__m128h)
+ _mm_undefined_ph (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_minmax_ph (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
+ const int __C)
+{
+ return (__m128h) __builtin_ia32_minmaxph128_mask ((__v8hf) __A,
+ (__v8hf) __B,
+ __C,
+ (__v8hf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_minmax_ph (__mmask8 __U, __m128h __A, __m128h __B, const int __C)
+{
+ return (__m128h) __builtin_ia32_minmaxph128_mask ((__v8hf) __A,
+ (__v8hf) __B,
+ __C,
+ (__v8hf)(__m128h)
+ _mm_setzero_ph (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_minmax_ph (__m256h __A, __m256h __B, const int __C)
+{
+ return (__m256h) __builtin_ia32_minmaxph256_mask_round (
+ (__v16hf) __A, (__v16hf) __B, __C,
+ (__v16hf) (__m256h) _mm256_undefined_ph (),
+ (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_minmax_ph (__m256h __W, __mmask16 __U, __m256h __A, __m256h __B,
+ const int __C)
+{
+ return (__m256h) __builtin_ia32_minmaxph256_mask_round (
+ (__v16hf) __A, (__v16hf) __B, __C, (__v16hf) __W,
+ (__mmask16) __U, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_minmax_ph (__mmask16 __U, __m256h __A, __m256h __B, const int __C)
+{
+ return (__m256h) __builtin_ia32_minmaxph256_mask_round (
+ (__v16hf) __A, (__v16hf) __B, __C,
+ (__v16hf) (__m256h) _mm256_setzero_ph (),
+ (__mmask16) __U, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_minmax_round_ph (__m256h __A, __m256h __B, const int __C, const int __R)
+{
+ return (__m256h) __builtin_ia32_minmaxph256_mask_round (
+ (__v16hf) __A, (__v16hf) __B, __C,
+ (__v16hf) (__m256h) _mm256_undefined_ph (),
+ (__mmask16) -1, __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_minmax_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
+ __m256h __B, const int __C, const int __R)
+{
+ return (__m256h) __builtin_ia32_minmaxph256_mask_round (
+ (__v16hf) __A, (__v16hf) __B, __C, (__v16hf) __W,
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_minmax_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
+ const int __C, const int __R)
+{
+ return (__m256h) __builtin_ia32_minmaxph256_mask_round (
+ (__v16hf) __A, (__v16hf) __B, __C,
+ (__v16hf) (__m256h) _mm256_setzero_ph (),
+ (__mmask16) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_minmax_ps (__m128 __A, __m128 __B, const int __C)
+{
+ return (__m128) __builtin_ia32_minmaxps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ __C,
+ (__v4sf)(__m128)
+ _mm_undefined_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_minmax_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
+ const int __C)
+{
+ return (__m128) __builtin_ia32_minmaxps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ __C,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_minmax_ps (__mmask8 __U, __m128 __A, __m128 __B, const int __C)
+{
+ return (__m128) __builtin_ia32_minmaxps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ __C,
+ (__v4sf)(__m128)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_minmax_ps (__m256 __A, __m256 __B, const int __C)
+{
+ return (__m256) __builtin_ia32_minmaxps256_mask_round (
+ (__v8sf) __A, (__v8sf) __B, __C,
+ (__v8sf) (__m256) _mm256_undefined_ps (),
+ (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_minmax_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
+ const int __C)
+{
+ return (__m256) __builtin_ia32_minmaxps256_mask_round (
+ (__v8sf) __A, (__v8sf) __B, __C, (__v8sf) __W,
+ (__mmask8) __U, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_minmax_ps (__mmask8 __U, __m256 __A, __m256 __B, const int __C)
+{
+ return (__m256) __builtin_ia32_minmaxps256_mask_round (
+ (__v8sf) __A, (__v8sf) __B, __C,
+ (__v8sf) (__m256) _mm256_setzero_ps (),
+ (__mmask8) __U, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_minmax_round_ps (__m256 __A, __m256 __B, const int __C, const int __R)
+{
+ return (__m256) __builtin_ia32_minmaxps256_mask_round (
+ (__v8sf) __A, (__v8sf) __B, __C,
+ (__v8sf) (__m256) _mm256_undefined_ps (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_minmax_round_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
+ const int __C, const int __R)
+{
+ return (__m256) __builtin_ia32_minmaxps256_mask_round (
+ (__v8sf) __A, (__v8sf) __B, __C, (__v8sf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_minmax_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
+ const int __C, const int __R)
+{
+ return (__m256) __builtin_ia32_minmaxps256_mask_round (
+ (__v8sf) __A, (__v8sf) __B, __C,
+ (__v8sf) (__m256) _mm256_setzero_ps (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_minmax_sd (__m128d __A, __m128d __B, const int __C)
+{
+ return (__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ __C,
+ (__v2df)
+ _mm_undefined_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_minmax_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, const int __C)
+{
+ return (__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ __C,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_minmax_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ const int __C)
+{
+ return (__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ __C,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_minmax_round_sd (__m128d __A, __m128d __B, const int __C, const int __R)
+{
+ return (__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ __C,
+ (__v2df)
+ _mm_undefined_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_minmax_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, const int __C, const int __R)
+{
+ return (__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ __C,
+ (__v2df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_minmax_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ const int __C, const int __R)
+{
+ return (__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ __C,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_minmax_sh (__m128h __A, __m128h __B, const int __C)
+{
+ return (__m128h) __builtin_ia32_minmaxsh_mask_round ((__v8hf) __A,
+ (__v8hf) __B,
+ __C,
+ (__v8hf)
+ _mm_undefined_ph (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_minmax_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
+ const int __C)
+{
+ return (__m128h) __builtin_ia32_minmaxsh_mask_round ((__v8hf) __A,
+ (__v8hf) __B,
+ __C,
+ (__v8hf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_minmax_sh (__mmask8 __U, __m128h __A, __m128h __B,
+ const int __C)
+{
+ return (__m128h) __builtin_ia32_minmaxsh_mask_round ((__v8hf) __A,
+ (__v8hf) __B,
+ __C,
+ (__v8hf)
+ _mm_setzero_ph (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_minmax_round_sh (__m128h __A, __m128h __B, const int __C, const int __R)
+{
+ return (__m128h) __builtin_ia32_minmaxsh_mask_round ((__v8hf) __A,
+ (__v8hf) __B,
+ __C,
+ (__v8hf)
+ _mm_undefined_ph (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_minmax_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
+ const int __C, const int __R)
+{
+ return (__m128h) __builtin_ia32_minmaxsh_mask_round ((__v8hf) __A,
+ (__v8hf) __B,
+ __C,
+ (__v8hf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_minmax_round_sh (__mmask8 __U, __m128h __A, __m128h __B,
+ const int __C, const int __R)
+{
+ return (__m128h) __builtin_ia32_minmaxsh_mask_round ((__v8hf) __A,
+ (__v8hf) __B,
+ __C,
+ (__v8hf)
+ _mm_setzero_ph (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_minmax_ss (__m128 __A, __m128 __B, const int __C)
+{
+ return (__m128) __builtin_ia32_minmaxss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __C,
+ (__v4sf)
+ _mm_undefined_ps (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_minmax_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
+ const int __C)
+{
+ return (__m128) __builtin_ia32_minmaxss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __C,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_minmax_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ const int __C)
+{
+ return (__m128) __builtin_ia32_minmaxss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __C,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_minmax_round_ss (__m128 __A, __m128 __B, const int __C, const int __R)
+{
+ return (__m128) __builtin_ia32_minmaxss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __C,
+ (__v4sf)
+ _mm_undefined_ps (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_minmax_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
+ const int __C, const int __R)
+{
+ return (__m128) __builtin_ia32_minmaxss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __C,
+ (__v4sf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ const int __C, const int __R)
+{
+ return (__m128) __builtin_ia32_minmaxss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __C,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U, __R);
+}
+
+#else
+#define _mm_minmax_nepbh(A, B, C) \
+ ((__m128bh) __builtin_ia32_minmaxnepbf16128_mask ((__v8bf) (A), \
+ (__v8bf) (B), \
+ (int) (C), \
+ (__v8bf) (__m128bh) \
+ _mm_setzero_si128 (), \
+ (__mmask8) (-1)))
+
+#define _mm_mask_minmax_nepbh(W, U, A, B, C) \
+ ((__m128bh) __builtin_ia32_minmaxnepbf16128_mask ((__v8bf) (A), \
+ (__v8bf) (B), \
+ (int) (C), \
+ (__v8bf) (__m128bh) (W), \
+ (__mmask8) (U)))
+
+#define _mm_maskz_minmax_nepbh(U, A, B, C) \
+ ((__m128bh) __builtin_ia32_minmaxnepbf16128_mask ((__v8bf) (A), \
+ (__v8bf) (B), \
+ (int) (C), \
+ (__v8bf) (__m128bh) \
+ _mm_setzero_si128 (), \
+ (__mmask8) (U)))
+
+#define _mm256_minmax_nepbh(A, B, C) \
+ ((__m256bh) __builtin_ia32_minmaxnepbf16256_mask ((__v16bf) (A), \
+ (__v16bf) (B), \
+ (int) (C), \
+ (__v16bf) (__m256bh) \
+ _mm256_setzero_si256 (), \
+ (__mmask16) (-1)))
+
+#define _mm256_mask_minmax_nepbh(W, U, A, B, C) \
+ ((__m256bh) __builtin_ia32_minmaxnepbf16256_mask ((__v16bf) (A), \
+ (__v16bf) (B), \
+ (int) (C), \
+ (__v16bf) (__m256bh) (W), \
+ (__mmask16) (U)))
+
+#define _mm256_maskz_minmax_nepbh(U, A, B, C) \
+ ((__m256bh) __builtin_ia32_minmaxnepbf16256_mask ((__v16bf) (A), \
+ (__v16bf) (B), \
+ (int) (C), \
+ (__v16bf) (__m256bh) \
+ _mm256_setzero_si256 (), \
+ (__mmask16) (U)))
+
+#define _mm_minmax_pd(A, B, C) \
+ ((__m128d) __builtin_ia32_minmaxpd128_mask ((__v2df) (A), \
+ (__v2df) (B), \
+ (int) (C), \
+ (__v2df) (__m128d) \
+ _mm_undefined_pd (), \
+ (__mmask8) (-1)))
+
+#define _mm_mask_minmax_pd(W, U, A, B, C) \
+ ((__m128d) __builtin_ia32_minmaxpd128_mask ((__v2df) (A), \
+ (__v2df) (B), \
+ (int) (C), \
+ (__v2df) (__m128d) (W), \
+ (__mmask8) (U)))
+
+#define _mm_maskz_minmax_pd(U, A, B, C) \
+ ((__m128d) __builtin_ia32_minmaxpd128_mask ((__v2df) (A), \
+ (__v2df) (B), \
+ (int) (C), \
+ (__v2df) (__m128d) \
+ _mm_setzero_pd (), \
+ (__mmask8) (U)))
+
+#define _mm256_minmax_pd(A, B, C) \
+ ((__m256d) __builtin_ia32_minmaxpd256_mask_round ((__v4df) (A), \
+ (__v4df) (B), \
+ (int) (C), \
+ (__v4df) (__m256d) \
+ _mm256_undefined_pd (), \
+ (__mmask8) (-1), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm256_mask_minmax_pd(W, U, A, B, C) \
+ ((__m256d) __builtin_ia32_minmaxpd256_mask_round ((__v4df) (A), \
+ (__v4df) (B), \
+ (int) (C), \
+ (__v4df) (__m256d) (W), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm256_maskz_minmax_pd(U, A, B, C) \
+ ((__m256d) __builtin_ia32_minmaxpd256_mask_round ((__v4df) (A), \
+ (__v4df) (B), \
+ (int) (C), \
+ (__v4df) (__m256d) \
+ _mm256_setzero_pd (), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm256_minmax_round_pd(A, B, C, R) \
+ ((__m256d) __builtin_ia32_minmaxpd256_mask_round ((__v4df) (A), \
+ (__v4df) (B), \
+ (int) (C), \
+ (__v4df) (__m256d) \
+ _mm256_undefined_pd (), \
+ (__mmask8) (-1), \
+ (int) (R)))
+
+#define _mm256_mask_minmax_round_pd(W, U, A, B, C, R) \
+ ((__m256d) __builtin_ia32_minmaxpd256_mask_round ((__v4df) (A), \
+ (__v4df) (B), \
+ (int) (C), \
+ (__v4df) (__m256d) (W), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#define _mm256_maskz_minmax_round_pd(U, A, B, C, R) \
+ ((__m256d) __builtin_ia32_minmaxpd256_mask_round ((__v4df) (A), \
+ (__v4df) (B), \
+ (int) (C), \
+ (__v4df) (__m256d) \
+ _mm256_setzero_pd (), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#define _mm_minmax_ph(A, B, C) \
+ ((__m128h) __builtin_ia32_minmaxph128_mask ((__v8hf) (A), \
+ (__v8hf) (B), \
+ (int) (C), \
+ (__v8hf) (__m128h) \
+ _mm_undefined_ph (), \
+ (__mmask8) (-1)))
+
+#define _mm_mask_minmax_ph(W, U, A, B, C) \
+ ((__m128h) __builtin_ia32_minmaxph128_mask ((__v8hf) (A), \
+ (__v8hf) (B), \
+ (int) (C), \
+ (__v8hf) (__m128h) (W), \
+ (__mmask8) (U)))
+
+#define _mm_maskz_minmax_ph(U, A, B, C) \
+ ((__m128h) __builtin_ia32_minmaxph128_mask ((__v8hf) (A), \
+ (__v8hf) (B), \
+ (int) (C), \
+ (__v8hf) (__m128h) \
+ _mm_setzero_ph (), \
+ (__mmask8) (U)))
+
+#define _mm256_minmax_ph(A, B, C) \
+ ((__m256h) __builtin_ia32_minmaxph256_mask_round ((__v16hf) (A), \
+ (__v16hf) (B), \
+ (int) (C), \
+ (__v16hf) (__m256h) \
+ _mm256_undefined_ph (), \
+ (__mmask16) (-1), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm256_mask_minmax_ph(W, U, A, B, C) \
+ ((__m256h) __builtin_ia32_minmaxph256_mask_round ((__v16hf) (A), \
+ (__v16hf) (B), \
+ (int) (C), \
+ (__v16hf) (__m256h) (W), \
+ (__mmask16) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm256_maskz_minmax_ph(U, A, B, C) \
+ ((__m256h) __builtin_ia32_minmaxph256_mask_round ((__v16hf) (A), \
+ (__v16hf) (B), \
+ (int) (C), \
+ (__v16hf) (__m256h) \
+ _mm256_setzero_ph (), \
+ (__mmask16) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm256_minmax_round_ph(A, B, C, R) \
+ ((__m256h) __builtin_ia32_minmaxph256_mask_round ((__v16hf) (A), \
+ (__v16hf) (B), \
+ (int) (C), \
+ (__v16hf) (__m256h) \
+ _mm256_undefined_ph (), \
+ (__mmask16) (-1), \
+ (int) (R)))
+
+#define _mm256_mask_minmax_round_ph(W, U, A, B, C, R) \
+ ((__m256h) __builtin_ia32_minmaxph256_mask_round ((__v16hf) (A), \
+ (__v16hf) (B), \
+ (int) (C), \
+ (__v16hf) (__m256h) (W), \
+ (__mmask16) (U), \
+ (int) (R)))
+
+#define _mm256_maskz_minmax_round_ph(U, A, B, C, R) \
+ ((__m256h) __builtin_ia32_minmaxph256_mask_round ((__v16hf) (A), \
+ (__v16hf) (B), \
+ (int) (C), \
+ (__v16hf) (__m256h) \
+ _mm256_setzero_ph (), \
+ (__mmask16) (U), \
+ (int) (R)))
+
+#define _mm_minmax_ps(A, B, C) \
+ ((__m128) __builtin_ia32_minmaxps128_mask ((__v4sf) (A), \
+ (__v4sf) (B), \
+ (int) (C), \
+ (__v4sf) (__m128) \
+ _mm_undefined_ps (), \
+ (__mmask8) (-1)))
+
+#define _mm_mask_minmax_ps(W, U, A, B, C) \
+ ((__m128) __builtin_ia32_minmaxps128_mask ((__v4sf) (A), \
+ (__v4sf) (B), \
+ (int) (C), \
+ (__v4sf) (__m128) (W), \
+ (__mmask8) (U)))
+
+#define _mm_maskz_minmax_ps(U, A, B, C) \
+ ((__m128) __builtin_ia32_minmaxps128_mask ((__v4sf) (A), \
+ (__v4sf) (B), \
+ (int) (C), \
+ (__v4sf) (__m128) \
+ _mm_setzero_ps (), \
+ (__mmask8) (U)))
+
+#define _mm256_minmax_ps(A, B, C) \
+ ((__m256) __builtin_ia32_minmaxps256_mask_round ((__v8sf) (A), \
+ (__v8sf) (B), \
+ (int) (C), \
+ (__v8sf) (__m256) \
+ _mm256_undefined_ps (), \
+ (__mmask8) (-1), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm256_mask_minmax_ps(W, U, A, B, C) \
+ ((__m256) __builtin_ia32_minmaxps256_mask_round ((__v8sf) (A), \
+ (__v8sf) (B), \
+ (int) (C), \
+ (__v8sf) (__m256) (W), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm256_maskz_minmax_ps(U, A, B, C) \
+ ((__m256) __builtin_ia32_minmaxps256_mask_round ((__v8sf) (A), \
+ (__v8sf) (B), \
+ (int) (C), \
+ (__v8sf) (__m256) \
+ _mm256_setzero_ps (), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm256_minmax_round_ps(A, B, C, R) \
+ ((__m256) __builtin_ia32_minmaxps256_mask_round ((__v8sf) (A), \
+ (__v8sf) (B), \
+ (int) (C), \
+ (__v8sf) (__m256) \
+ _mm256_undefined_ps (), \
+ (__mmask8) (-1), \
+ (int) (R)))
+
+#define _mm256_mask_minmax_round_ps(W, U, A, B, C, R) \
+ ((__m256) __builtin_ia32_minmaxps256_mask_round ((__v8sf) (A), \
+ (__v8sf) (B), \
+ (int) (C), \
+ (__v8sf) (__m256) (W), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#define _mm256_maskz_minmax_round_ps(U, A, B, C, R) \
+ ((__m256) __builtin_ia32_minmaxps256_mask_round ((__v8sf) (A), \
+ (__v8sf) (B), \
+ (int) (C), \
+ (__v8sf) (__m256) \
+ _mm256_setzero_ps (), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#define _mm_minmax_round_sd(A, B, C, R) \
+ ((__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) (A), \
+ (__v2df) (B), \
+ (int) (C), \
+ (__v2df) (__m128d) \
+ _mm_undefined_pd (), \
+ (__mmask8) (-1), \
+ (int) (R)))
+
+#define _mm_mask_minmax_round_sd(W, U, A, B, C, R) \
+ ((__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) (A), \
+ (__v2df) (B), \
+ (int) (C), \
+ (__v2df) (__m128d) (W), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#define _mm_maskz_minmax_round_sd(U, A, B, C, R) \
+ ((__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) (A), \
+ (__v2df)(B), \
+ (int) (C), \
+ (__v2df) (__m128d) \
+ _mm_setzero_pd (), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#define _mm_minmax_round_sh(A, B, C, R) \
+ ((__m128h) __builtin_ia32_minmaxsh_mask_round ((__v8hf) (A), \
+ (__v8hf) (B), \
+ (int) (C), \
+ (__v8hf) (__m128h) \
+ _mm_undefined_ph (), \
+ (__mmask8) (-1), \
+ (int) (R)))
+
+#define _mm_mask_minmax_round_sh(W, U, A, B, C, R) \
+ ((__m128h) __builtin_ia32_minmaxsh_mask_round ((__v8hf) (A), \
+ (__v8hf) (B), \
+ (int) (C), \
+ (__v8hf) (__m128h) (W), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#define _mm_maskz_minmax_round_sh(U, A, B, C, R) \
+ ((__m128h) __builtin_ia32_minmaxsh_mask_round ((__v8hf) (A), \
+ (__v8hf) (B), \
+ (int) (C), \
+ (__v8hf) (__m128h) \
+ _mm_setzero_ph (), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#define _mm_minmax_round_ss(A, B, C, R) \
+ ((__m128) __builtin_ia32_minmaxss_mask_round ((__v4sf) (A), \
+ (__v4sf) (B), \
+ (int) (C), \
+ (__v4sf) (__m128) \
+ _mm_undefined_ps (), \
+ (__mmask8) (-1), \
+ (int) (R)))
+
+#define _mm_mask_minmax_round_ss(W, U, A, B, C, R) \
+ ((__m128) __builtin_ia32_minmaxss_mask_round ((__v4sf) (A), \
+ (__v4sf) (B), \
+ (int) (C), \
+ (__v4sf) (__m128) (W), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#define _mm_maskz_minmax_round_ss(U, A, B, C, R) \
+ ((__m128) __builtin_ia32_minmaxss_mask_round ((__v4sf) (A), \
+ (__v4sf) (B), \
+ (int) (C), \
+ (__v4sf)(__m128) \
+ _mm_setzero_ps (), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#define _mm_minmax_sd(A, B, C) \
+ ((__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) (A), \
+ (__v2df) (B), \
+ (int) (C), \
+ (__v2df) (__m128d) \
+ _mm_undefined_pd (), \
+ (__mmask8) (-1), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_minmax_sd(W, U, A, B, C) \
+ ((__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) (A), \
+ (__v2df) (B), \
+ (int) (C), \
+ (__v2df) (__m128d) (W), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_minmax_sd(U, A, B, C) \
+ ((__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) (A), \
+ (__v2df) (B), \
+ (int) (C), \
+ (__v2df) (__m128d) \
+ _mm_setzero_pd (), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_minmax_sh(A, B, C) \
+ ((__m128h) __builtin_ia32_minmaxsh_mask_round ((__v8hf) (A), \
+ (__v8hf) (B), \
+ (int) (C), \
+ (__v8hf) (__m128h) \
+ _mm_undefined_ph (), \
+ (__mmask8) (-1), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_minmax_sh(W, U, A, B, C) \
+ ((__m128h) __builtin_ia32_minmaxsh_mask_round ((__v8hf) (A), \
+ (__v8hf) (B), \
+ (int) (C), \
+ (__v8hf) (__m128h) (W), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_minmax_sh(U, A, B, C) \
+ ((__m128h) __builtin_ia32_minmaxsh_mask_round ((__v8hf) (A), \
+ (__v8hf) (B), \
+ (int) (C), \
+ (__v8hf) (__m128h) \
+ _mm_setzero_ph (), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_minmax_ss(A, B, C) \
+ ((__m128) __builtin_ia32_minmaxss_mask_round ((__v4sf) (A), \
+ (__v4sf) (B), \
+ (int) (C), \
+ (__v4sf) (__m128) \
+ _mm_undefined_ps (), \
+ (__mmask8) (-1), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_minmax_ss(W, U, A, B, C) \
+ ((__m128) __builtin_ia32_minmaxss_mask_round ((__v4sf) (A), \
+ (__v4sf) (B), \
+ (int) (C), \
+ (__v4sf) (__m128) (W), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_minmax_ss(U, A, B, C) \
+ ((__m128) __builtin_ia32_minmaxss_mask_round ((__v4sf) (A), \
+ (__v4sf) (B), \
+ (int) (C), \
+ (__v4sf) (__m128) \
+ _mm_setzero_ps (), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#endif
+
+#ifdef __DISABLE_AVX10_2_256__
+#undef __DISABLE_AVX10_2_256__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX10_2_256__ */
+
+#endif /* _AVX10_2MINMAXINTRIN_H_INCLUDED */
@@ -1499,3 +1499,11 @@ DEF_FUNCTION_TYPE (V32HI, V32BF, V32HI, USI)
DEF_FUNCTION_TYPE (V16SI, V16SF, V16SI, UHI, INT)
DEF_FUNCTION_TYPE (V16HI, V16BF, V16HI, UHI, INT)
DEF_FUNCTION_TYPE (V32HI, V32BF, V32HI, USI, INT)
+DEF_FUNCTION_TYPE (V8BF, V8BF, V8BF, INT, V8BF, UQI)
+DEF_FUNCTION_TYPE (V16BF, V16BF, V16BF, INT, V16BF, UHI)
+DEF_FUNCTION_TYPE (V32BF, V32BF, V32BF, INT, V32BF, USI)
+DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, INT, V8HF, UQI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, INT, V8DF, UQI, INT)
+DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, INT, V32HF, USI, INT)
+DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, INT, V16HF, UHI, INT)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT, V16SF, UHI, INT)
@@ -3298,6 +3298,12 @@ BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2dqsv4sf_mask, "
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2qqsv2di_mask, "__builtin_ia32_cvttps2qqs128_mask", IX86_BUILTIN_VCVTTPS2QQS128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2udqsv4sf_mask, "__builtin_ia32_cvttps2udqs128_mask", IX86_BUILTIN_VCVTTPS2UDQS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2uqqsv2di_mask, "__builtin_ia32_cvttps2uqqs128_mask", IX86_BUILTIN_VCVTTPS2UQQS128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxnepbf16_v8bf_mask, "__builtin_ia32_minmaxnepbf16128_mask", IX86_BUILTIN_MINMAXNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_INT_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxnepbf16_v16bf_mask, "__builtin_ia32_minmaxnepbf16256_mask", IX86_BUILTIN_MINMAXNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_INT_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_minmaxnepbf16_v32bf_mask, "__builtin_ia32_minmaxnepbf16512_mask", IX86_BUILTIN_MINMAXNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_INT_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv2df_mask, "__builtin_ia32_minmaxpd128_mask", IX86_BUILTIN_MINMAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv8hf_mask, "__builtin_ia32_minmaxph128_mask", IX86_BUILTIN_MINMAXPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv4sf_mask, "__builtin_ia32_minmaxps128_mask", IX86_BUILTIN_MINMAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI)
/* Builtins with rounding support. */
BDESC_END (ARGS, ROUND_ARGS)
@@ -3774,7 +3780,6 @@ BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttps2ibsv8sf_mask_rou
BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvttps2ibsv16sf_mask_round, "__builtin_ia32_cvttps2ibs512_mask_round", IX86_BUILTIN_CVTTPS2IBS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_UHI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttps2iubsv8sf_mask_round, "__builtin_ia32_cvttps2iubs256_mask_round", IX86_BUILTIN_CVTTPS2IUBS256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvttps2iubsv16sf_mask_round, "__builtin_ia32_cvttps2iubs512_mask_round", IX86_BUILTIN_CVTTPS2IUBS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_UHI_INT)
-
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2dqsv4df_mask_round, "__builtin_ia32_cvttpd2dqs256_mask_round", IX86_BUILTIN_VCVTTPD2DQS256_MASK_ROUND, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttpd2dqsv8df_mask_round, "__builtin_ia32_cvttpd2dqs512_mask_round", IX86_BUILTIN_VCVTTPD2DQS512_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2qqsv4df_mask_round, "__builtin_ia32_cvttpd2qqs256_mask_round", IX86_BUILTIN_VCVTTPD2QQS256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI_INT)
@@ -3799,6 +3804,15 @@ BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttss2sissi_round, "_
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttss2sisdi_round, "__builtin_ia32_cvttss2sis64_round", IX86_BUILTIN_VCVTTSS2SIS64_ROUND, UNKNOWN, (int) INT64_FTYPE_V4SF_INT)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttss2usissi_round, "__builtin_ia32_cvttss2usis32_round", IX86_BUILTIN_VCVTTSS2USIS32_ROUND, UNKNOWN, (int) INT_FTYPE_V4SF_INT)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttss2usisdi_round, "__builtin_ia32_cvttss2usis64_round", IX86_BUILTIN_VCVTTSS2USIS64_ROUND, UNKNOWN, (int) INT64_FTYPE_V4SF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_minmaxpv8df_mask_round, "__builtin_ia32_minmaxpd512_mask_round", IX86_BUILTIN_MINMAXPD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_minmaxpv32hf_mask_round, "__builtin_ia32_minmaxph512_mask_round", IX86_BUILTIN_MINMAXPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_minmaxpv16sf_mask_round, "__builtin_ia32_minmaxps512_mask_round", IX86_BUILTIN_MINMAXPS512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv4df_mask_round, "__builtin_ia32_minmaxpd256_mask_round", IX86_BUILTIN_MINMAXPD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv16hf_mask_round, "__builtin_ia32_minmaxph256_mask_round", IX86_BUILTIN_MINMAXPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_INT_V16HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv8sf_mask_round, "__builtin_ia32_minmaxps256_mask_round", IX86_BUILTIN_MINMAXPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxsv2df_mask_round, "__builtin_ia32_minmaxsd_mask_round", IX86_BUILTIN_MINMAXSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxsv8hf_mask_round, "__builtin_ia32_minmaxsh_mask_round", IX86_BUILTIN_MINMAXSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxsv4sf_mask_round, "__builtin_ia32_minmaxss_mask_round", IX86_BUILTIN_MINMAXSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
BDESC_END (ROUND_ARGS, MULTI_ARG)
@@ -11954,6 +11954,10 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT:
case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT:
case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT:
+ case V8BF_FTYPE_V8BF_V8BF_INT_V8BF_UQI:
+ case V16BF_FTYPE_V16BF_V16BF_INT_V16BF_UHI:
+ case V32BF_FTYPE_V32BF_V32BF_INT_V32BF_USI:
+ case V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI:
nargs = 5;
mask_pos = 1;
nargs_constant = 2;
@@ -12604,6 +12608,10 @@ ix86_expand_round_builtin (const struct builtin_description *d,
case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT:
case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT:
case V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT:
+ case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI_INT:
+ case V32HF_FTYPE_V32HF_V32HF_INT_V32HF_USI_INT:
+ case V16HF_FTYPE_V16HF_V16HF_INT_V16HF_UHI_INT:
+ case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI_INT:
nargs = 6;
nargs_constant = 4;
break;
@@ -155,4 +155,9 @@
#include <avx10_2satcvtintrin.h>
#include <avx10_2-512satcvtintrin.h>
+
+#include <avx10_2minmaxintrin.h>
+
+#include <avx10_2-512minmaxintrin.h>
+
#endif /* _IMMINTRIN_H_INCLUDED */
@@ -249,6 +249,8 @@
UNSPEC_VCVTTPS2IUBS
UNSPEC_SFIX_SATURATION
UNSPEC_UFIX_SATURATION
+ UNSPEC_MINMAXNEPBF16
+ UNSPEC_MINMAX
])
(define_c_enum "unspecv" [
@@ -501,6 +503,11 @@
(V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
(V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+(define_mode_iterator VFH_AVX10_2
+ [(V32HF "TARGET_AVX10_2_512") V16HF V8HF
+ (V16SF "TARGET_AVX10_2_512") V8SF V4SF
+ (V8DF "TARGET_AVX10_2_512") V4DF V2DF])
+
(define_mode_iterator VF2_AVX512VL
[(V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
@@ -32388,3 +32395,42 @@
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
+
+(define_insn "avx10_2_minmaxnepbf16_<mode><mask_name>"
+ [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
+ (unspec:VBF_AVX10_2
+ [(match_operand:VBF_AVX10_2 1 "register_operand" "v")
+ (match_operand:VBF_AVX10_2 2 "bcst_vector_operand" "vmBr")
+ (match_operand:SI 3 "const_0_to_255_operand")]
+ UNSPEC_MINMAXNEPBF16))]
+ "TARGET_AVX10_2_256"
+ "vminmaxnepbf16\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx10_2_minmaxp<mode><mask_name><round_saeonly_name>"
+ [(set (match_operand:VFH_AVX10_2 0 "register_operand" "=v")
+ (unspec:VFH_AVX10_2
+ [(match_operand:VFH_AVX10_2 1 "register_operand" "v")
+ (match_operand:VFH_AVX10_2 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+ (match_operand:SI 3 "const_0_to_255_operand")]
+ UNSPEC_MINMAX))]
+ "TARGET_AVX10_2_256"
+ "vminmax<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx10_2_minmaxs<mode><mask_scalar_name><round_saeonly_scalar_name>"
+ [(set (match_operand:VFH_128 0 "register_operand" "=v")
+ (vec_merge:VFH_128
+ (unspec:VFH_128
+ [(match_operand:VFH_128 1 "register_operand" "v")
+ (match_operand:VFH_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
+ (match_operand:SI 3 "const_0_to_255_operand")]
+ UNSPEC_MINMAX)
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX10_2_256"
+ "vminmax<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %2<round_saeonly_scalar_mask_op4>, %3}"
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<ssescalarmode>")])
@@ -1081,6 +1081,25 @@
#define __builtin_ia32_cvttss2usis64_round(A, B) __builtin_ia32_cvttss2usis64_round(A, 8)
#endif
+/* avx10_2-512minmaxintrin.h */
+#define __builtin_ia32_minmaxpd512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxpd512_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxph512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxph512_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxps512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps512_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxnepbf16512_mask(A, B, C, W, U) __builtin_ia32_minmaxnepbf16512_mask (A, B, 4, W, U)
+
+/* avx10_2minmaxintrin.h */
+#define __builtin_ia32_minmaxsd_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxsd_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxsh_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxsh_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxss_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxss_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxnepbf16128_mask(A, B, C, D, E) __builtin_ia32_minmaxnepbf16128_mask (A, B, 4, D, E)
+#define __builtin_ia32_minmaxnepbf16256_mask(A, B, C, D, E) __builtin_ia32_minmaxnepbf16256_mask (A, B, 4, D, E)
+#define __builtin_ia32_minmaxpd128_mask(A, B, C, D, E) __builtin_ia32_minmaxpd128_mask (A, B, 4, D, E)
+#define __builtin_ia32_minmaxpd256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxpd256_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxph128_mask(A, B, C, D, E) __builtin_ia32_minmaxph128_mask (A, B, 4, D, E)
+#define __builtin_ia32_minmaxph256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxph256_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxps128_mask(A, B, C, D, E) __builtin_ia32_minmaxps128_mask (A, B, 4, D, E)
+#define __builtin_ia32_minmaxps256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps256_mask_round (A, B, 4, D, E, 4)
+
#include <wmmintrin.h>
#include <immintrin.h>
#include <mm3dnow.h>
new file mode 100644
@@ -0,0 +1,257 @@
+#ifndef AVX10MINMAX_HELPERFUNC_INCLUDED
+#define AVX10MINMAX_HELPERFUNC_INCLUDED
+
+#include <math.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <float.h>
+#include "avx512f-helper.h"
+#define SNAN_float __builtin_nansf ("")
+#define SNAN_flag_float 0x7fa00000
+#define QNAN_float __builtin_nanf ("")
+#define QNAN_flag_float 0x7fc00000
+#define SNAN_double ((double)__builtin_nans (""))
+#define SNAN_flag_double 0x7ff4000000000000
+#define QNAN_double ((double)__builtin_nan (""))
+#define QNAN_flag_double 0x7ff8000000000000
+#define SNAN__Float16 ((_Float16)__builtin_nansf16 (""))
+#define SNAN_flag__Float16 0x7d00
+#define QNAN__Float16 ((_Float16)__builtin_nanf16 (""))
+#define QNAN_flag__Float16 0x7e00
+#define SNAN___bf16 ((__bf16)__builtin_nansf16b (""))
+#define SNAN_flag___bf16 0x7fa0
+#define QNAN___bf16 ((__bf16)__builtin_nanf (""))
+#define QNAN_flag___bf16 0x7fc0
+#define ISNAN(x) (x != x)
+#define ABS_float(x) fabsf (x)
+#define ABS_double(x) fabs (x)
+#define ABS__Float16(x) __builtin_fabsf16 (x)
+#define ABS___bf16(x) __builtin_fabsf (x)
+
+#define Union_Data(typef, typei) \
+typedef union \
+{ \
+ typef f; \
+ typei i; \
+} union_##typef;
+
+Union_Data(float, int)
+Union_Data(double, long long)
+Union_Data(__bf16, short)
+Union_Data(_Float16, short)
+
+#define IS_SNAN(union_x, type) ((union_x.i & SNAN_flag_##type) == union_snan.i)
+
+#define IS_QNAN(union_x, type) ((union_x.i & QNAN_flag_##type) == union_qnan.i)
+
+#define CHECK_EXP_MINMAX(UNION_TYPE, VALUE_TYPE, INT_TYPE) \
+static int \
+__attribute__((noinline, unused)) \
+check_minmax_##UNION_TYPE (UNION_TYPE u, const VALUE_TYPE *v) \
+{ \
+ int i; \
+ int err = 0; \
+ for (i = 0; i < ARRAY_SIZE (u.a); i++) \
+ { \
+ union_##VALUE_TYPE union_x, union_y; \
+ union_x.f = u.a[i]; \
+ union_y.f = v[i]; \
+ if (union_x.i != union_y.i) \
+ { \
+ err++; \
+ PRINTF ("%i: " "%f" " != " "%f" "\n", \
+ i, v[i], u.a[i]); \
+ } \
+ } \
+ return err; \
+}
+
+#if defined (AVX10_512BIT)
+CHECK_EXP_MINMAX (union512, float, int)
+CHECK_EXP_MINMAX (union512d, double, long int)
+CHECK_EXP_MINMAX (union512bf16_bf, __bf16, short int)
+CHECK_EXP_MINMAX (union512h, _Float16, short int)
+#endif
+CHECK_EXP_MINMAX (union256, float, int)
+CHECK_EXP_MINMAX (union256d, double, long int)
+CHECK_EXP_MINMAX (union128, float, int)
+CHECK_EXP_MINMAX (union128d, double, long int)
+CHECK_EXP_MINMAX (union256bf16_bf, __bf16, short int)
+CHECK_EXP_MINMAX (union128bf16_bf, __bf16, short int)
+CHECK_EXP_MINMAX (union256h, _Float16, short int)
+CHECK_EXP_MINMAX (union128h, _Float16, short int)
+
+#define UNION_CHECK_MINMAX(SIZE, NAME) EVAL(check_minmax_union, SIZE, NAME)
+
+#define CMP(res, x, y, type, value, op1, np, op2, zero, num, mag) \
+{ \
+ union_##type union_a, union_b; \
+ union_a.f = x; \
+ union_b.f = y; \
+ union_##type union_snan, union_qnan; \
+ union_snan.f = SNAN_##type; \
+ union_qnan.f = QNAN_##type; \
+ bool flag = false; \
+ if(num) \
+ { \
+ if(ISNAN(x) && ISNAN(y)) \
+ { \
+ if(IS_SNAN(union_a,type) || (IS_QNAN(union_a,type) && IS_QNAN(union_b,type))) \
+ { \
+ union_a.i |= value; \
+ res = union_a.f; \
+ flag = true; \
+ } \
+ else \
+ { \
+ union_b.i |= value; \
+ res = union_b.f; \
+ flag = true; \
+ } \
+ } \
+ else if(ISNAN(x)) \
+ { \
+ res = y; \
+ flag = true; \
+ } \
+ else if(ISNAN(y)) \
+ { \
+ res = x; \
+ flag = true; \
+ } \
+ } \
+ else \
+ { \
+ if(IS_SNAN(union_a,type) || (IS_QNAN(union_a,type) && !IS_SNAN(union_b,type))) \
+ { \
+ union_a.i |= value; \
+ res = union_a.f; \
+ flag = true; \
+ } \
+ else if(ISNAN(y)) \
+ { \
+ union_b.i |= value; \
+ res = union_b.f; \
+ flag = true; \
+ } \
+ } \
+ if(!flag) \
+ { \
+ if(!mag) \
+ { \
+ if((x == zero && y == - zero) || (x == - zero && y == zero)) \
+ res = np zero; \
+ else if(x op1 y) \
+ res = x; \
+ else \
+ res = y; \
+ } \
+ else \
+ { \
+ if(ABS_##type(x) op2 ABS_##type(y)) \
+ res = x; \
+ else if(ABS_##type(y) op2 ABS_##type(x)) \
+ res = y; \
+ else \
+ { \
+ if((x == zero && y == - zero) || (x == - zero && y == zero)) \
+ res = np zero; \
+ else if(x op1 y) \
+ res = x; \
+ else \
+ res = y; \
+ } \
+ } \
+ } \
+}
+
+#define MINMAX(type, value, zero) \
+type \
+minmax_##type (type * a, type * b, int imm) \
+{ \
+ int op_select = imm & 0x03; \
+ int sign_control = (imm & 0x0C) >> 2; \
+ int nan_prop_select = (imm & 0x10) >> 4; \
+ type tmp; \
+ if(nan_prop_select == 0) \
+ if(op_select == 0) \
+ CMP(tmp, *a, *b, type, value, <=, -, <, zero, false, false) \
+ else if(op_select == 1) \
+ CMP(tmp, *a, *b, type, value, >=, +, >, zero, false, false) \
+ else if(op_select == 2) \
+ CMP(tmp, *a, *b, type, value, <=, -, <, zero, false, true) \
+ else \
+ CMP(tmp, *a, *b, type, value, >=, +, >, zero, false, true) \
+ else \
+ if(op_select == 0) \
+ CMP(tmp, *a, *b, type, value, <=, -, <, zero, true, false) \
+ else if(op_select == 1) \
+ CMP(tmp, *a, *b, type, value, >=, +, >, zero, true, false) \
+ else if(op_select == 2) \
+ CMP(tmp, *a, *b, type, value, <=, -, <, zero, true, true) \
+ else \
+ CMP(tmp, *a, *b, type, value, >=, +, >, zero, true, true) \
+ if(!ISNAN(tmp)) \
+ if(sign_control == 0 && !ISNAN(*a)) \
+ if((tmp < 0 && *a > 0) || (tmp > 0 && *a < 0)) \
+ tmp = -tmp; \
+ else if(sign_control == 2) \
+ if(tmp < 0) tmp = -tmp; \
+ else if(sign_control == 3) \
+ if(tmp > 0) tmp = -tmp; \
+ return tmp; \
+}
+
+
+MINMAX(double, 0x7ff8000000000000, 0.0)
+MINMAX(float, 0x7fc00000, 0.0f)
+MINMAX(_Float16, 0x7e00, 0.0f16)
+MINMAX(__bf16, 0x7fc0, 0.0bf16)
+
+#define UNIT_TEST(R, InsnSuffix, MaskType, type) \
+ sign = -1; \
+ for (i = 0; i < SIZE; i++) \
+ { \
+ src1.a[i] = i % 2 ? SNAN_##type : 1.5 + 34.67 * i * sign; \
+ src2.a[i] = i % 3 ? QNAN_##type : -22.17 * i * sign; \
+ sign = sign * -1; \
+ } \
+ for (i = 0; i < SIZE; i++) \
+ res2.a[i] = DEFAULT_VALUE; \
+ res1.x = INTRINSIC(_minmax_##InsnSuffix) (src1.x, src2.x, R); \
+ res2.x = INTRINSIC(_mask_minmax_##InsnSuffix) (res2.x, mask, src1.x, src2.x, R); \
+ res3.x = INTRINSIC(_maskz_minmax_##InsnSuffix) (mask, src1.x, src2.x, R); \
+ CALC (res_ref, src1.a, src2.a, R); \
+ if (UNION_CHECK_MINMAX (AVX512F_LEN, MaskType) (res1, res_ref)) \
+ abort(); \
+ MASK_MERGE (MaskType) (res_ref, mask, SIZE); \
+ if (UNION_CHECK_MINMAX (AVX512F_LEN, MaskType) (res2, res_ref)) \
+ abort(); \
+ MASK_ZERO (MaskType) (res_ref, mask, SIZE); \
+ if (UNION_CHECK_MINMAX (AVX512F_LEN, MaskType) (res3, res_ref)) \
+ abort();
+
+#define SCALAR_UNIT_TEST(R, InsnSuffix, MaskType, type) \
+ sign = -1; \
+ for (i = 0; i < SIZE; i++) \
+ { \
+ src1.a[i] = i % 2 ? SNAN_##type : 1.5 + 34.67 * i * sign; \
+ src2.a[i] = i % 3 ? QNAN_##type : -22.17 * i * sign; \
+ sign = sign * -1; \
+ } \
+ for (i = 0; i < SIZE; i++) \
+ res2.a[i] = DEFAULT_VALUE; \
+ res1.x = _mm_minmax_##InsnSuffix (src1.x, src2.x, R); \
+ res2.x = _mm_mask_minmax_##InsnSuffix (res2.x, mask, src1.x, src2.x, R); \
+ res3.x = _mm_maskz_minmax_##InsnSuffix (mask, src1.x, src2.x, R); \
+ CALC (res_ref, src1.a, src2.a, R); \
+ if (UNION_CHECK_MINMAX (128, MaskType) (res1, res_ref)) \
+ abort(); \
+ MASK_MERGE (MaskType) (res_ref, mask, 1); \
+ if (UNION_CHECK_MINMAX (128, MaskType) (res2, res_ref)) \
+ abort(); \
+ MASK_ZERO (MaskType) (res_ref, mask, 1); \
+ if (UNION_CHECK_MINMAX (128, MaskType) (res3, res_ref)) \
+ abort();
+
+#endif
new file mode 100644
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 2 } } */
+
+
+#include <immintrin.h>
+
+volatile __m512bh x1;
+volatile __m512h x2;
+volatile __m512 x3;
+volatile __m512d x4;
+volatile __mmask32 m32;
+volatile __mmask16 m16;
+volatile __mmask8 m8;
+
+void extern
+avx10_2_512_test (void)
+{
+ x1 = _mm512_minmax_nepbh (x1, x1, 100);
+ x1 = _mm512_mask_minmax_nepbh (x1, m32, x1, x1, 100);
+ x1 = _mm512_maskz_minmax_nepbh (m32, x1, x1, 100);
+ x2 = _mm512_minmax_ph (x2, x2, 1);
+ x2 = _mm512_mask_minmax_ph (x2, m32, x2, x2, 1);
+ x2 = _mm512_maskz_minmax_ph (m32, x2, x2, 1);
+ x2 = _mm512_minmax_round_ph (x2, x2, 1, 4);
+ x2 = _mm512_mask_minmax_round_ph (x2, m32, x2, x2, 1, 4);
+ x2 = _mm512_maskz_minmax_round_ph (m32, x2, x2, 1, 4);
+ x3 = _mm512_minmax_ps (x3, x3, 1);
+ x3 = _mm512_mask_minmax_ps (x3, m16, x3, x3, 1);
+ x3 = _mm512_maskz_minmax_ps (m16, x3, x3, 1);
+ x3 = _mm512_minmax_round_ps (x3, x3, 1, 4);
+ x3 = _mm512_mask_minmax_round_ps (x3, m16, x3, x3, 1, 4);
+ x3 = _mm512_maskz_minmax_round_ps (m16, x3, x3, 1, 4);
+ x4 = _mm512_minmax_pd (x4, x4, 100);
+ x4 = _mm512_mask_minmax_pd (x4, m8, x4, x4, 100);
+ x4 = _mm512_maskz_minmax_pd (m8, x4, x4, 100);
+ x4 = _mm512_minmax_round_pd (x4, x4, 100, 4);
+ x4 = _mm512_mask_minmax_round_pd (x4, m8, x4, x4, 100, 4);
+ x4 = _mm512_maskz_minmax_round_pd (m8, x4, x4, 100, 4);
+}
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-do run } */
+/* { dg-options "-fsignaling-nans -mfpmath=sse -O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_512BIT
+#endif
+#define SIZE (AVX512F_LEN / 16)
+#include "avx10-helper.h"
+#include <stdbool.h>
+#include "avx10-minmax-helper.h"
+
+void static
+CALC (__bf16 *r, __bf16 *s1, __bf16 *s2, int R)
+{
+ for(int i = 0; i < SIZE; i++)
+ r[i] = minmax___bf16(&s1[i], &s2[i], R);
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, bf16_bf) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ __bf16 res_ref[SIZE];
+
+ UNIT_TEST(0, nepbh, bf16_bf, __bf16);
+ UNIT_TEST(1, nepbh, bf16_bf, __bf16);
+ UNIT_TEST(4, nepbh, bf16_bf, __bf16);
+ UNIT_TEST(5, nepbh, bf16_bf, __bf16);
+ UNIT_TEST(16, nepbh, bf16_bf, __bf16);
+ UNIT_TEST(17, nepbh, bf16_bf, __bf16);
+}
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-do run } */
+/* { dg-options "-fsignaling-nans -mfpmath=sse -O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_512BIT
+#endif
+#define SIZE (AVX512F_LEN / 64)
+#include "avx10-helper.h"
+#include <stdbool.h>
+#include "avx10-minmax-helper.h"
+
+void static
+CALC (double *r, double *s1, double *s2, int R)
+{
+ for(int i = 0; i < SIZE; i++)
+ r[i] = minmax_double(&s1[i], &s2[i], R);
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, d) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+
+ UNIT_TEST(0, pd, d, double);
+ UNIT_TEST(1, pd, d, double);
+ UNIT_TEST(4, pd, d, double);
+ UNIT_TEST(5, pd, d, double);
+ UNIT_TEST(16, pd, d, double);
+ UNIT_TEST(17, pd, d, double);
+}
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-do run } */
+/* { dg-options "-fsignaling-nans -mfpmath=sse -O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_512BIT
+#endif
+#define SIZE (AVX512F_LEN / 16)
+#include "avx10-helper.h"
+#include <stdbool.h>
+#include "avx10-minmax-helper.h"
+
+void static
+CALC (_Float16 *r, _Float16 *s1, _Float16 *s2, int R)
+{
+ for(int i = 0; i < SIZE; i++)
+ r[i] = minmax__Float16(&s1[i], &s2[i], R);
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, h) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ _Float16 res_ref[SIZE];
+
+ UNIT_TEST(0, ph, h, _Float16);
+ UNIT_TEST(1, ph, h, _Float16);
+ UNIT_TEST(4, ph, h, _Float16);
+ UNIT_TEST(5, ph, h, _Float16);
+ UNIT_TEST(16, ph, h, _Float16);
+ UNIT_TEST(17, ph, h, _Float16);
+}
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-do run } */
+/* { dg-options "-fsignaling-nans -mfpmath=sse -O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_512BIT
+#endif
+#define SIZE (AVX512F_LEN / 32)
+#include "avx10-helper.h"
+#include <stdbool.h>
+#include "avx10-minmax-helper.h"
+
+void static
+CALC (float *r, float *s1, float *s2, int R)
+{
+ for(int i = 0; i < SIZE; i++)
+ r[i] = minmax_float(&s1[i], &s2[i], R);
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, ) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+
+ UNIT_TEST(0, ps, , float);
+ UNIT_TEST(1, ps, , float);
+ UNIT_TEST(4, ps, , float);
+ UNIT_TEST(5, ps, , float);
+ UNIT_TEST(16, ps, , float);
+ UNIT_TEST(17, ps, , float);
+}
new file mode 100644
@@ -0,0 +1,122 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxnepbf16\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxph\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxps\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\{\n\]*\[^\}\]%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxpd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxsh\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxsh\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxsh\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxsh\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxsh\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxsh\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxss\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxss\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxss\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxsd\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxsd\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxsd\[ \\t\]+\[^\{\n\]*\[^\}\]%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxsd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxsd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminmaxsd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256bh y1_;
+volatile __m256h y2;
+volatile __m256 y3;
+volatile __m256d y4;
+volatile __m128bh x1;
+volatile __m128h x2;
+volatile __m128 x3;
+volatile __m128d x4;
+volatile __mmask16 m16;
+volatile __mmask8 m8;
+
+void extern
+avx10_2_test (void)
+{
+ x1 = _mm_minmax_nepbh (x1, x1, 100);
+ x1 = _mm_mask_minmax_nepbh (x1, m8, x1, x1, 100);
+ x1 = _mm_maskz_minmax_nepbh (m8, x1, x1, 100);
+ y1_ = _mm256_minmax_nepbh (y1_, y1_, 100);
+ y1_ = _mm256_mask_minmax_nepbh (y1_, m16, y1_, y1_, 100);
+ y1_ = _mm256_maskz_minmax_nepbh (m16, y1_, y1_, 100);
+ x2 = _mm_minmax_ph (x2, x2, 100);
+ x2 = _mm_mask_minmax_ph (x2, m8, x2, x2, 100);
+ x2 = _mm_maskz_minmax_ph (m8, x2, x2, 100);
+ y2 = _mm256_minmax_ph (y2, y2, 100);
+ y2 = _mm256_mask_minmax_ph (y2, m16, y2, y2, 100);
+ y2 = _mm256_maskz_minmax_ph (m16, y2, y2, 100);
+ y2 = _mm256_minmax_round_ph (y2, y2, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ y2 = _mm256_mask_minmax_round_ph (y2, m16, y2, y2, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ y2 = _mm256_maskz_minmax_round_ph (m16, y2, y2, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ x3 = _mm_minmax_ps (x3, x3, 100);
+ x3 = _mm_mask_minmax_ps (x3, m8, x3, x3, 100);
+ x3 = _mm_maskz_minmax_ps (m8, x3, x3, 100);
+ y3 = _mm256_minmax_ps (y3, y3, 100);
+ y3 = _mm256_mask_minmax_ps (y3, m8, y3, y3, 100);
+ y3 = _mm256_maskz_minmax_ps (m8, y3, y3, 100);
+ y3 = _mm256_minmax_round_ps (y3, y3, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ y3 = _mm256_mask_minmax_round_ps (y3, m8, y3, y3, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ y3 = _mm256_maskz_minmax_round_ps (m8, y3, y3, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ x4 = _mm_minmax_pd (x4, x4, 100);
+ x4 = _mm_mask_minmax_pd (x4, m8, x4, x4, 100);
+ x4 = _mm_maskz_minmax_pd (m8, x4, x4, 100);
+ y4 = _mm256_minmax_pd (y4, y4, 100);
+ y4 = _mm256_mask_minmax_pd (y4, m8, y4, y4, 100);
+ y4 = _mm256_maskz_minmax_pd (m8, y4, y4, 100);
+ y4 = _mm256_minmax_round_pd (y4, y4, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ y4 = _mm256_mask_minmax_round_pd (y4, m8, y4, y4, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ y4 = _mm256_maskz_minmax_round_pd (m8, y4, y4, 100, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ x2 = _mm_minmax_sh (x2, x2, 1);
+ x2 = _mm_mask_minmax_sh (x2, m8, x2, x2, 1);
+ x2 = _mm_maskz_minmax_sh (m8, x2, x2, 1);
+ x2 = _mm_minmax_round_sh (x2, x2, 1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ x2 = _mm_mask_minmax_round_sh (x2, m8, x2, x2, 1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ x2 = _mm_maskz_minmax_round_sh (m8, x2, x2, 1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ x3 = _mm_minmax_ss (x3, x3, 1);
+ x3 = _mm_mask_minmax_ss (x3, m8, x3, x3, 1);
+ x3 = _mm_maskz_minmax_ss (m8, x3, x3, 1);
+ x3 = _mm_minmax_round_ss (x3, x3, 1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ x3 = _mm_mask_minmax_round_ss (x3, m8, x3, x3, 1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ x3 = _mm_maskz_minmax_round_ss (m8, x3, x3, 1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ x4 = _mm_minmax_sd (x4, x4, 1);
+ x4 = _mm_mask_minmax_sd (x4, m8, x4, x4, 1);
+ x4 = _mm_maskz_minmax_sd (m8, x4, x4, 1);
+ x4 = _mm_minmax_round_sd (x4, x4, 1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ x4 = _mm_mask_minmax_round_sd (x4, m8, x4, x4, 1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ x4 = _mm_maskz_minmax_round_sd (m8, x4, x4, 1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do run } */
+/* { dg-options "-fsignaling-nans -mfpmath=sse -O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#include "avx10_2-512-vminmaxnepbf16-2.c"
+
+#undef AVX512F_LEN
+
+#define AVX512F_LEN 128
+#include "avx10_2-512-vminmaxnepbf16-2.c"
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do run } */
+/* { dg-options "-fsignaling-nans -mfpmath=sse -O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#include "avx10_2-512-vminmaxpd-2.c"
+
+#undef AVX512F_LEN
+
+#define AVX512F_LEN 128
+#include "avx10_2-512-vminmaxpd-2.c"
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-do run } */
+/* { dg-options "-fsignaling-nans -mfpmath=sse -O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+typedef _Float16 __m256h __attribute__ ((__vector_size__ (32), __may_alias__));
+#include "avx10_2-512-vminmaxph-2.c"
+
+#undef AVX512F_LEN
+
+#define AVX512F_LEN 128
+typedef _Float16 __m128h __attribute__ ((__vector_size__ (16), __may_alias__));
+#include "avx10_2-512-vminmaxph-2.c"
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do run } */
+/* { dg-options "-fsignaling-nans -mfpmath=sse -O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#include "avx10_2-512-vminmaxps-2.c"
+
+#undef AVX512F_LEN
+
+#define AVX512F_LEN 128
+#include "avx10_2-512-vminmaxps-2.c"
new file mode 100644
@@ -0,0 +1,34 @@
+/* { dg-do run } */
+/* { dg-options "-fsignaling-nans -mfpmath=sse -O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX10_SCALAR
+#define SIZE (128 / 64)
+#include "avx10-helper.h"
+#include <stdbool.h>
+#include "avx10-minmax-helper.h"
+
+void static
+CALC (double *r, double *s1, double *s2, int R)
+{
+ r[0] = minmax_double(&s1[0], &s2[0], R);
+ for(int i = 1; i < SIZE; i++)
+ r[i] = s1[i];
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (128, d) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+
+ SCALAR_UNIT_TEST(0, sd, d, double);
+ SCALAR_UNIT_TEST(1, sd, d, double);
+ SCALAR_UNIT_TEST(4, sd, d, double);
+ SCALAR_UNIT_TEST(5, sd, d, double);
+ SCALAR_UNIT_TEST(16, sd, d, double);
+ SCALAR_UNIT_TEST(17, sd, d, double);
+}
new file mode 100644
@@ -0,0 +1,34 @@
+/* { dg-do run } */
+/* { dg-options "-fsignaling-nans -mfpmath=sse -O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX10_SCALAR
+#define SIZE (128 / 16)
+#include "avx10-helper.h"
+#include <stdbool.h>
+#include "avx10-minmax-helper.h"
+
+void static
+CALC (_Float16 *r, _Float16 *s1, _Float16 *s2, int R)
+{
+ r[0] = minmax__Float16(&s1[0], &s2[0], R);
+ for(int i = 1; i < SIZE; i++)
+ r[i] = s1[i];
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (128, h) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ _Float16 res_ref[SIZE];
+
+ SCALAR_UNIT_TEST(0, sh, h, _Float16);
+ SCALAR_UNIT_TEST(1, sh, h, _Float16);
+ SCALAR_UNIT_TEST(4, sh, h, _Float16);
+ SCALAR_UNIT_TEST(5, sh, h, _Float16);
+ SCALAR_UNIT_TEST(16, sh, h, _Float16);
+ SCALAR_UNIT_TEST(17, sh, h, _Float16);
+}
new file mode 100644
@@ -0,0 +1,34 @@
+/* { dg-do run } */
+/* { dg-options "-fsignaling-nans -mfpmath=sse -O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX10_SCALAR
+#define SIZE (128 / 32)
+#include "avx10-helper.h"
+#include <stdbool.h>
+#include "avx10-minmax-helper.h"
+
+void static
+CALC (float *r, float *s1, float *s2, int R)
+{
+ r[0] = minmax_float(&s1[0], &s2[0], R);
+ for(int i = 1; i < SIZE; i++)
+ r[i] = s1[i];
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (128, ) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+
+ SCALAR_UNIT_TEST(0, ss, , float);
+ SCALAR_UNIT_TEST(1, ss, , float);
+ SCALAR_UNIT_TEST(4, ss, , float);
+ SCALAR_UNIT_TEST(5, ss, , float);
+ SCALAR_UNIT_TEST(16, ss, , float);
+ SCALAR_UNIT_TEST(17, ss, , float);
+}
@@ -41,6 +41,7 @@ MAKE_MASK_MERGE(i_b, char)
MAKE_MASK_MERGE(i_w, short)
MAKE_MASK_MERGE(i_d, int)
MAKE_MASK_MERGE(i_q, long long)
+MAKE_MASK_MERGE(h, _Float16)
MAKE_MASK_MERGE(, float)
MAKE_MASK_MERGE(d, double)
MAKE_MASK_MERGE(i_ub, unsigned char)
@@ -68,6 +69,7 @@ MAKE_MASK_ZERO(i_b, char)
MAKE_MASK_ZERO(i_w, short)
MAKE_MASK_ZERO(i_d, int)
MAKE_MASK_ZERO(i_q, long long)
+MAKE_MASK_ZERO(h, _Float16)
MAKE_MASK_ZERO(, float)
MAKE_MASK_ZERO(d, double)
MAKE_MASK_ZERO(i_ub, unsigned char)
@@ -1089,4 +1089,23 @@
#define __builtin_ia32_cvttss2usis64_round(A, B) __builtin_ia32_cvttss2usis64_round(A, 8)
#endif
+/* avx10_2-512minmaxintrin.h */
+#define __builtin_ia32_minmaxpd512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxpd512_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxph512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxph512_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxps512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps512_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxnepbf16512_mask(A, B, C, W, U) __builtin_ia32_minmaxnepbf16512_mask (A, B, 4, W, U)
+
+/* avx10_2minmaxintrin.h */
+#define __builtin_ia32_minmaxsd_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxsd_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxsh_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxsh_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxss_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxss_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxnepbf16128_mask(A, B, C, D, E) __builtin_ia32_minmaxnepbf16128_mask (A, B, 4, D, E)
+#define __builtin_ia32_minmaxnepbf16256_mask(A, B, C, D, E) __builtin_ia32_minmaxnepbf16256_mask (A, B, 4, D, E)
+#define __builtin_ia32_minmaxpd128_mask(A, B, C, D, E) __builtin_ia32_minmaxpd128_mask (A, B, 4, D, E)
+#define __builtin_ia32_minmaxpd256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxpd256_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxph128_mask(A, B, C, D, E) __builtin_ia32_minmaxph128_mask (A, B, 4, D, E)
+#define __builtin_ia32_minmaxph256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxph256_mask_round (A, B, 4, D, E, 4)
+#define __builtin_ia32_minmaxps128_mask(A, B, C, D, E) __builtin_ia32_minmaxps128_mask (A, B, 4, D, E)
+#define __builtin_ia32_minmaxps256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps256_mask_round (A, B, 4, D, E, 4)
+
#include <x86intrin.h>
@@ -1541,3 +1541,70 @@ test_1 (_mm_cvtts_roundsd_epu64, unsigned long long, __m128d, 8)
test_1 (_mm_cvtts_roundss_epi64, long long, __m128, 8)
test_1 (_mm_cvtts_roundss_epu64, unsigned long long, __m128, 8)
#endif
+
+/* avx10_2-512minmaxintrin.h */
+test_2 (_mm512_minmax_nepbh, __m512bh, __m512bh, __m512bh, 100)
+test_3 (_mm512_maskz_minmax_nepbh, __m512bh, __mmask32, __m512bh, __m512bh, 100)
+test_4 (_mm512_mask_minmax_nepbh, __m512bh, __m512bh, __mmask32, __m512bh, __m512bh, 100)
+test_2x (_mm512_minmax_round_pd, __m512d, __m512d, __m512d, 100, 4)
+test_3x (_mm512_maskz_minmax_round_pd, __m512d, __mmask8, __m512d, __m512d, 100, 4)
+test_4x (_mm512_mask_minmax_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 100, 4)
+test_2x (_mm512_minmax_round_ps, __m512, __m512, __m512, 100, 4)
+test_3x (_mm512_maskz_minmax_round_ps, __m512, __mmask16, __m512, __m512, 100, 4)
+test_4x (_mm512_mask_minmax_round_ps, __m512, __m512, __mmask16, __m512, __m512, 100, 4)
+test_2x (_mm512_minmax_round_ph, __m512h, __m512h, __m512h, 100, 4)
+test_3x (_mm512_maskz_minmax_round_ph, __m512h, __mmask32, __m512h, __m512h, 100, 4)
+test_4x (_mm512_mask_minmax_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 100, 4)
+test_2 (_mm512_minmax_pd, __m512d, __m512d, __m512d, 100)
+test_3 (_mm512_maskz_minmax_pd, __m512d, __mmask8, __m512d, __m512d, 100)
+test_4 (_mm512_mask_minmax_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 100)
+test_2 (_mm512_minmax_ps, __m512, __m512, __m512, 100)
+test_3 (_mm512_maskz_minmax_ps, __m512, __mmask16, __m512, __m512, 100)
+test_4 (_mm512_mask_minmax_ps, __m512, __m512, __mmask16, __m512, __m512, 100)
+test_2 (_mm512_minmax_ph, __m512h, __m512h, __m512h, 100)
+test_3 (_mm512_maskz_minmax_ph, __m512h, __mmask32, __m512h, __m512h, 100)
+test_4 (_mm512_mask_minmax_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 100)
+
+/* avx10_2minmaxintrin.h */
+test_2 (_mm256_minmax_nepbh, __m256bh, __m256bh, __m256bh, 100)
+test_3 (_mm256_maskz_minmax_nepbh, __m256bh, __mmask16, __m256bh, __m256bh, 100)
+test_4 (_mm256_mask_minmax_nepbh, __m256bh, __m256bh, __mmask16, __m256bh, __m256bh, 100)
+test_2x (_mm256_minmax_round_pd, __m256d, __m256d, __m256d, 100, 4)
+test_3x (_mm256_maskz_minmax_round_pd, __m256d, __mmask8, __m256d, __m256d, 100, 4)
+test_4x (_mm256_mask_minmax_round_pd, __m256d, __m256d, __mmask8, __m256d, __m256d, 100, 4)
+test_2x (_mm256_minmax_round_ps, __m256, __m256, __m256, 100, 4)
+test_3x (_mm256_maskz_minmax_round_ps, __m256, __mmask8, __m256, __m256, 100, 4)
+test_4x (_mm256_mask_minmax_round_ps, __m256, __m256, __mmask8, __m256, __m256, 100, 4)
+test_2x (_mm256_minmax_round_ph, __m256h, __m256h, __m256h, 100, 4)
+test_3x (_mm256_maskz_minmax_round_ph, __m256h, __mmask16, __m256h, __m256h, 100, 4)
+test_4x (_mm256_mask_minmax_round_ph, __m256h, __m256h, __mmask16, __m256h, __m256h, 100, 4)
+test_2 (_mm256_minmax_pd, __m256d, __m256d, __m256d, 100)
+test_3 (_mm256_maskz_minmax_pd, __m256d, __mmask8, __m256d, __m256d, 100)
+test_4 (_mm256_mask_minmax_pd, __m256d, __m256d, __mmask8, __m256d, __m256d, 100)
+test_2 (_mm256_minmax_ps, __m256, __m256, __m256, 100)
+test_3 (_mm256_maskz_minmax_ps, __m256, __mmask8, __m256, __m256, 100)
+test_4 (_mm256_mask_minmax_ps, __m256, __m256, __mmask8, __m256, __m256, 100)
+test_2 (_mm256_minmax_ph, __m256h, __m256h, __m256h, 100)
+test_3 (_mm256_maskz_minmax_ph, __m256h, __mmask16, __m256h, __m256h, 100)
+test_4 (_mm256_mask_minmax_ph, __m256h, __m256h, __mmask16, __m256h, __m256h, 100)
+test_2 (_mm_minmax_nepbh, __m128bh, __m128bh, __m128bh, 100)
+test_3 (_mm_maskz_minmax_nepbh, __m128bh, __mmask8, __m128bh, __m128bh, 100)
+test_4 (_mm_mask_minmax_nepbh, __m128bh, __m128bh, __mmask8, __m128bh, __m128bh, 100)
+test_2 (_mm_minmax_pd, __m128d, __m128d, __m128d, 100)
+test_3 (_mm_maskz_minmax_pd, __m128d, __mmask8, __m128d, __m128d, 100)
+test_4 (_mm_mask_minmax_pd, __m128d, __m128d, __mmask8, __m128d, __m128d, 100)
+test_2 (_mm_minmax_ps, __m128, __m128, __m128, 100)
+test_3 (_mm_maskz_minmax_ps, __m128, __mmask8, __m128, __m128, 100)
+test_4 (_mm_mask_minmax_ps, __m128, __m128, __mmask8, __m128, __m128, 100)
+test_2 (_mm_minmax_ph, __m128h, __m128h, __m128h, 100)
+test_3 (_mm_maskz_minmax_ph, __m128h, __mmask8, __m128h, __m128h, 100)
+test_4 (_mm_mask_minmax_ph, __m128h, __m128h, __mmask8, __m128h, __m128h, 100)
+test_2x (_mm_minmax_round_sd, __m128d, __m128d, __m128d, 100, 4)
+test_3x (_mm_maskz_minmax_round_sd, __m128d, __mmask8, __m128d, __m128d, 100, 4)
+test_4x (_mm_mask_minmax_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 100, 4)
+test_2x (_mm_minmax_round_ss, __m128, __m128, __m128, 100, 4)
+test_3x (_mm_maskz_minmax_round_ss, __m128, __mmask8, __m128, __m128, 100, 4)
+test_4x (_mm_mask_minmax_round_ss, __m128, __m128, __mmask8, __m128, __m128, 100, 4)
+test_2x (_mm_minmax_round_sh, __m128h, __m128h, __m128h, 100, 4)
+test_3x (_mm_maskz_minmax_round_sh, __m128h, __mmask8, __m128h, __m128h, 100, 4)
+test_4x (_mm_mask_minmax_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 100, 4)
@@ -1580,3 +1580,70 @@ test_1 (_mm_cvtts_roundsd_epu64, unsigned long long, __m128d, 8)
test_1 (_mm_cvtts_roundss_epi64, long long, __m128, 8)
test_1 (_mm_cvtts_roundss_epu64, unsigned long long, __m128, 8)
#endif
+
+/* avx10_2-512minmaxintrin.h */
+test_2 (_mm512_minmax_nepbh, __m512bh, __m512bh, __m512bh, 100)
+test_3 (_mm512_maskz_minmax_nepbh, __m512bh, __mmask32, __m512bh, __m512bh, 100)
+test_4 (_mm512_mask_minmax_nepbh, __m512bh, __m512bh, __mmask32, __m512bh, __m512bh, 100)
+test_2x (_mm512_minmax_round_pd, __m512d, __m512d, __m512d, 100, 4)
+test_3x (_mm512_maskz_minmax_round_pd, __m512d, __mmask8, __m512d, __m512d, 100, 4)
+test_4x (_mm512_mask_minmax_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 100, 4)
+test_2x (_mm512_minmax_round_ps, __m512, __m512, __m512, 100, 4)
+test_3x (_mm512_maskz_minmax_round_ps, __m512, __mmask16, __m512, __m512, 100, 4)
+test_4x (_mm512_mask_minmax_round_ps, __m512, __m512, __mmask16, __m512, __m512, 100, 4)
+test_2x (_mm512_minmax_round_ph, __m512h, __m512h, __m512h, 100, 4)
+test_3x (_mm512_maskz_minmax_round_ph, __m512h, __mmask32, __m512h, __m512h, 100, 4)
+test_4x (_mm512_mask_minmax_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 100, 4)
+test_2 (_mm512_minmax_pd, __m512d, __m512d, __m512d, 100)
+test_3 (_mm512_maskz_minmax_pd, __m512d, __mmask8, __m512d, __m512d, 100)
+test_4 (_mm512_mask_minmax_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 100)
+test_2 (_mm512_minmax_ps, __m512, __m512, __m512, 100)
+test_3 (_mm512_maskz_minmax_ps, __m512, __mmask16, __m512, __m512, 100)
+test_4 (_mm512_mask_minmax_ps, __m512, __m512, __mmask16, __m512, __m512, 100)
+test_2 (_mm512_minmax_ph, __m512h, __m512h, __m512h, 100)
+test_3 (_mm512_maskz_minmax_ph, __m512h, __mmask32, __m512h, __m512h, 100)
+test_4 (_mm512_mask_minmax_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 100)
+
+/* avx10_2minmaxintrin.h */
+test_2 (_mm256_minmax_nepbh, __m256bh, __m256bh, __m256bh, 100)
+test_3 (_mm256_maskz_minmax_nepbh, __m256bh, __mmask16, __m256bh, __m256bh, 100)
+test_4 (_mm256_mask_minmax_nepbh, __m256bh, __m256bh, __mmask16, __m256bh, __m256bh, 100)
+test_2x (_mm256_minmax_round_pd, __m256d, __m256d, __m256d, 100, 4)
+test_3x (_mm256_maskz_minmax_round_pd, __m256d, __mmask8, __m256d, __m256d, 100, 4)
+test_4x (_mm256_mask_minmax_round_pd, __m256d, __m256d, __mmask8, __m256d, __m256d, 100, 4)
+test_2x (_mm256_minmax_round_ps, __m256, __m256, __m256, 100, 4)
+test_3x (_mm256_maskz_minmax_round_ps, __m256, __mmask8, __m256, __m256, 100, 4)
+test_4x (_mm256_mask_minmax_round_ps, __m256, __m256, __mmask8, __m256, __m256, 100, 4)
+test_2x (_mm256_minmax_round_ph, __m256h, __m256h, __m256h, 100, 4)
+test_3x (_mm256_maskz_minmax_round_ph, __m256h, __mmask16, __m256h, __m256h, 100, 4)
+test_4x (_mm256_mask_minmax_round_ph, __m256h, __m256h, __mmask16, __m256h, __m256h, 100, 4)
+test_2 (_mm256_minmax_pd, __m256d, __m256d, __m256d, 100)
+test_3 (_mm256_maskz_minmax_pd, __m256d, __mmask8, __m256d, __m256d, 100)
+test_4 (_mm256_mask_minmax_pd, __m256d, __m256d, __mmask8, __m256d, __m256d, 100)
+test_2 (_mm256_minmax_ps, __m256, __m256, __m256, 100)
+test_3 (_mm256_maskz_minmax_ps, __m256, __mmask8, __m256, __m256, 100)
+test_4 (_mm256_mask_minmax_ps, __m256, __m256, __mmask8, __m256, __m256, 100)
+test_2 (_mm256_minmax_ph, __m256h, __m256h, __m256h, 100)
+test_3 (_mm256_maskz_minmax_ph, __m256h, __mmask16, __m256h, __m256h, 100)
+test_4 (_mm256_mask_minmax_ph, __m256h, __m256h, __mmask16, __m256h, __m256h, 100)
+test_2 (_mm_minmax_nepbh, __m128bh, __m128bh, __m128bh, 100)
+test_3 (_mm_maskz_minmax_nepbh, __m128bh, __mmask8, __m128bh, __m128bh, 100)
+test_4 (_mm_mask_minmax_nepbh, __m128bh, __m128bh, __mmask8, __m128bh, __m128bh, 100)
+test_2 (_mm_minmax_pd, __m128d, __m128d, __m128d, 100)
+test_3 (_mm_maskz_minmax_pd, __m128d, __mmask8, __m128d, __m128d, 100)
+test_4 (_mm_mask_minmax_pd, __m128d, __m128d, __mmask8, __m128d, __m128d, 100)
+test_2 (_mm_minmax_ps, __m128, __m128, __m128, 100)
+test_3 (_mm_maskz_minmax_ps, __m128, __mmask8, __m128, __m128, 100)
+test_4 (_mm_mask_minmax_ps, __m128, __m128, __mmask8, __m128, __m128, 100)
+test_2 (_mm_minmax_ph, __m128h, __m128h, __m128h, 100)
+test_3 (_mm_maskz_minmax_ph, __m128h, __mmask8, __m128h, __m128h, 100)
+test_4 (_mm_mask_minmax_ph, __m128h, __m128h, __mmask8, __m128h, __m128h, 100)
+test_2x (_mm_minmax_round_sd, __m128d, __m128d, __m128d, 100, 4)
+test_3x (_mm_maskz_minmax_round_sd, __m128d, __mmask8, __m128d, __m128d, 100, 4)
+test_4x (_mm_mask_minmax_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 100, 4)
+test_2x (_mm_minmax_round_ss, __m128, __m128, __m128, 100, 4)
+test_3x (_mm_maskz_minmax_round_ss, __m128, __mmask8, __m128, __m128, 100, 4)
+test_4x (_mm_mask_minmax_round_ss, __m128, __m128, __mmask8, __m128, __m128, 100, 4)
+test_2x (_mm_minmax_round_sh, __m128h, __m128h, __m128h, 100, 4)
+test_3x (_mm_maskz_minmax_round_sh, __m128h, __mmask8, __m128h, __m128h, 100, 4)
+test_4x (_mm_mask_minmax_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 100, 4)
@@ -1063,6 +1063,25 @@
#define __builtin_ia32_cvttss2usis64_round(A, B) __builtin_ia32_cvttss2usis64_round(A, 8)
#endif
+/* avx10_2-512minmaxintrin.h */
+#define __builtin_ia32_minmaxpd512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxpd512_mask_round (A, B, 100, D, E, 4)
+#define __builtin_ia32_minmaxph512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxph512_mask_round (A, B, 100, D, E, 4)
+#define __builtin_ia32_minmaxps512_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps512_mask_round (A, B, 100, D, E, 4)
+#define __builtin_ia32_minmaxnepbf16512_mask(A, B, C, W, U) __builtin_ia32_minmaxnepbf16512_mask (A, B, 100, W, U)
+
+/* avx10_2-minmaxintrin.h */
+#define __builtin_ia32_minmaxsd_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxsd_mask_round (A, B, 100, D, E, 4)
+#define __builtin_ia32_minmaxsh_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxsh_mask_round (A, B, 100, D, E, 4)
+#define __builtin_ia32_minmaxss_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxss_mask_round (A, B, 100, D, E, 4)
+#define __builtin_ia32_minmaxnepbf16128_mask(A, B, C, D, E) __builtin_ia32_minmaxnepbf16128_mask (A, B, 100, D, E)
+#define __builtin_ia32_minmaxnepbf16256_mask(A, B, C, D, E) __builtin_ia32_minmaxnepbf16256_mask (A, B, 100, D, E)
+#define __builtin_ia32_minmaxpd128_mask(A, B, C, D, E) __builtin_ia32_minmaxpd128_mask (A, B, 100, D, E)
+#define __builtin_ia32_minmaxpd256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxpd256_mask_round (A, B, 100, D, E, 4)
+#define __builtin_ia32_minmaxph128_mask(A, B, C, D, E) __builtin_ia32_minmaxph128_mask (A, B, 100, D, E)
+#define __builtin_ia32_minmaxph256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxph256_mask_round (A, B, 100, D, E, 4)
+#define __builtin_ia32_minmaxps128_mask(A, B, C, D, E) __builtin_ia32_minmaxps128_mask (A, B, 100, D, E)
+#define __builtin_ia32_minmaxps256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps256_mask_round (A, B, 100, D, E, 4)
+
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,xsavec,xsaves,clflushopt,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,vpclmulqdq,pconfig,wbnoinvd,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512")
#include <x86intrin.h>
From: "Mo, Zewei" <zewei.mo@intel.com> gcc/ChangeLog: * config.gcc: Add avx10_2-512minmaxintrin.h and avx10_2minmaxintrin.h. * config/i386/i386-builtin-types.def: Add DEF_FUNCTION_TYPE (V8BF, V8BF, V8BF, INT, V8BF, UQI), (V16BF, V16BF, V16BF, INT, V16BF, UHI), (V32BF, V32BF, V32BF, INT, V32BF, USI), (V8HF, V8HF, V8HF, INT, V8HF, UQI), (V8DF, V8DF, V8DF, INT, V8DF, UQI, INT), (V32HF, V32HF, V32HF, INT, V32HF, USI, INT), (V16HF, V16HF, V16HF, INT, V16HF, UHI, INT), (V16SF, V16SF, V16SF, INT, V16SF, UHI, INT). * config/i386/i386-builtin.def (BDESC): Add new builtins. * config/i386/i386-expand.cc (ix86_expand_args_builtin): Handle V8BF_FTYPE_V8BF_V8BF_INT_V8BF_UQI, V16BF_FTYPE_V16BF_V16BF_INT_V16BF_UHI, V32BF_FTYPE_V32BF_V32BF_INT_V32BF_USI, V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI, (ix86_expand_round_builtin): Handle V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI_INT, V32HF_FTYPE_V32HF_V32HF_INT_V32HF_USI_INT, V16HF_FTYPE_V16HF_V16HF_INT_V16HF_UHI_INT. V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI_INT. * config/i386/immtrin.h: Include avx10_2-512miscsintrin.h and avx10_2miscsintrin.h. * config/i386/sse.md (avx10_2_vminmaxnepbf16_<mode><mask_name>): New. (avx10_2_minmaxp<mode><mask_name><round_saeonly_name>): Ditto. (avx10_2_minmaxs<mode><mask_scalar_name><round_saeonly_scalar_name>): Ditto. * config/i386/avx10_2-512minmaxintrin.h: New file. * config/i386/avx10_2minmaxintrin.h: Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/avx-1.c: Add macros. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-14.c: Ditto. * gcc.target/i386/sse-22.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * gcc.target/i386/avx10-minmax-helper.h: New helper file. * gcc.target/i386/avx10_2-512-minmax-1.c: New test. * gcc.target/i386/avx10_2-512-vminmaxnepbf16-2.c: Ditto. * gcc.target/i386/avx10_2-512-vminmaxpd-2.c: Ditto. * gcc.target/i386/avx10_2-512-vminmaxph-2.c: Ditto. * gcc.target/i386/avx10_2-512-vminmaxps-2.c: Ditto. * gcc.target/i386/avx10_2-mixmax-1.c: Ditto. * gcc.target/i386/avx10_2-vminmaxnepbf16-2.c: Ditto. * gcc.target/i386/avx10_2-vminmaxsd-2.c: Ditto. * gcc.target/i386/avx10_2-vminmaxsh-2.c: Ditto. * gcc.target/i386/avx10_2-vminmaxss-2.c: Ditto. * gcc.target/i386/avx10_2-vminmaxpd-2.c: Ditto. * gcc.target/i386/avx10_2-vminmaxph-2.c: Ditto. * gcc.target/i386/avx10_2-vminmaxps-2.c: Ditto. Co-authored-by: Lin Hu <lin1.hu@intel.com> Co-authored-by: Haochen Jiang <haochen.jiang@intel.com> --- gcc/config.gcc | 3 +- gcc/config/i386/avx10_2-512minmaxintrin.h | 489 ++++++++ gcc/config/i386/avx10_2minmaxintrin.h | 1063 +++++++++++++++++ gcc/config/i386/i386-builtin-types.def | 8 + gcc/config/i386/i386-builtin.def | 16 +- gcc/config/i386/i386-expand.cc | 8 + gcc/config/i386/immintrin.h | 5 + gcc/config/i386/sse.md | 46 + gcc/testsuite/gcc.target/i386/avx-1.c | 19 + .../gcc.target/i386/avx10-minmax-helper.h | 257 ++++ .../gcc.target/i386/avx10_2-512-minmax-1.c | 51 + .../i386/avx10_2-512-vminmaxnepbf16-2.c | 35 + .../gcc.target/i386/avx10_2-512-vminmaxpd-2.c | 35 + .../gcc.target/i386/avx10_2-512-vminmaxph-2.c | 35 + .../gcc.target/i386/avx10_2-512-vminmaxps-2.c | 35 + .../gcc.target/i386/avx10_2-minmax-1.c | 122 ++ .../i386/avx10_2-vminmaxnepbf16-2.c | 13 + .../gcc.target/i386/avx10_2-vminmaxpd-2.c | 13 + .../gcc.target/i386/avx10_2-vminmaxph-2.c | 15 + .../gcc.target/i386/avx10_2-vminmaxps-2.c | 13 + .../gcc.target/i386/avx10_2-vminmaxsd-2.c | 34 + .../gcc.target/i386/avx10_2-vminmaxsh-2.c | 34 + .../gcc.target/i386/avx10_2-vminmaxss-2.c | 34 + .../gcc.target/i386/avx512f-helper.h | 2 + gcc/testsuite/gcc.target/i386/sse-13.c | 19 + gcc/testsuite/gcc.target/i386/sse-14.c | 67 ++ gcc/testsuite/gcc.target/i386/sse-22.c | 67 ++ gcc/testsuite/gcc.target/i386/sse-23.c | 19 + 28 files changed, 2555 insertions(+), 2 deletions(-) create mode 100644 gcc/config/i386/avx10_2-512minmaxintrin.h create mode 100644 gcc/config/i386/avx10_2minmaxintrin.h create mode 100644 gcc/testsuite/gcc.target/i386/avx10-minmax-helper.h create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-minmax-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vminmaxnepbf16-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vminmaxpd-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vminmaxph-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vminmaxps-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-minmax-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vminmaxnepbf16-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vminmaxpd-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vminmaxph-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vminmaxps-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vminmaxsd-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vminmaxsh-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vminmaxss-2.c