@@ -573,6 +573,25 @@ (define_expand "<optab><mode><vconvert>2"
DONE;
})
+;; -------------------------------------------------------------------------
+;; ---- [FP<-INT] Conversions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - vfcvt.f.xu.v
+;; - vfcvt.f.x.v
+;; -------------------------------------------------------------------------
+
+(define_expand "<float_cvt><vconvert><mode>2"
+ [(set (match_operand:VF 0 "register_operand")
+ (any_float:VF
+ (match_operand:<VCONVERT> 1 "register_operand")))]
+ "TARGET_VECTOR"
+{
+ insn_code icode = code_for_pred (<CODE>, <MODE>mode);
+ riscv_vector::emit_vlmax_fp_insn (icode, riscv_vector::RVV_UNOP, operands);
+ DONE;
+})
+
;; =========================================================================
;; == Unary arithmetic
;; =========================================================================
new file mode 100644
@@ -0,0 +1,96 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=scalable" } */
+
+#include "vfcvt-itof-template.h"
+
+#define RUN(TYPE1, TYPE2, NUM) \
+ TYPE1 src##TYPE1##TYPE2##NUM[NUM]; \
+ TYPE2 dst##TYPE1##TYPE2##NUM[NUM]; \
+ for (int i = 0; i < NUM; i++) \
+ { \
+ src##TYPE1##TYPE2##NUM[i] = i * -3 - 88932; \
+ } \
+ vfcvt_##TYPE1##TYPE2 (dst##TYPE1##TYPE2##NUM, src##TYPE1##TYPE2##NUM, NUM); \
+ for (int i = 0; i < NUM; i++) \
+ if (dst##TYPE1##TYPE2##NUM[i] != (TYPE2) src##TYPE1##TYPE2##NUM[i]) \
+ __builtin_abort ();
+
+#define RUN2(TYPE1, TYPE2, NUM) \
+ TYPE1 src##TYPE1##TYPE2##NUM[NUM]; \
+ TYPE2 dst##TYPE1##TYPE2##NUM[NUM]; \
+ for (int i = 0; i < NUM; i++) \
+ { \
+ src##TYPE1##TYPE2##NUM[i] = i * 3 + 88932; \
+ } \
+ vfcvt_##TYPE1##TYPE2 (dst##TYPE1##TYPE2##NUM, src##TYPE1##TYPE2##NUM, NUM); \
+ for (int i = 0; i < NUM; i++) \
+ if (dst##TYPE1##TYPE2##NUM[i] != (TYPE2) src##TYPE1##TYPE2##NUM[i]) \
+ __builtin_abort ();
+
+int
+main ()
+{
+ RUN (int32_t, float, 3)
+ RUN (int32_t, float, 4)
+ RUN (int32_t, float, 7)
+ RUN (int32_t, float, 99)
+ RUN (int32_t, float, 119)
+ RUN (int32_t, float, 128)
+ RUN (int32_t, float, 256)
+ RUN (int32_t, float, 279)
+ RUN (int32_t, float, 555)
+ RUN (int32_t, float, 1024)
+ RUN (int32_t, float, 1389)
+ RUN (int32_t, float, 2048)
+ RUN (int32_t, float, 3989)
+ RUN (int32_t, float, 4096)
+ RUN (int32_t, float, 5975)
+
+ RUN2 (uint32_t, float, 3)
+ RUN2 (uint32_t, float, 4)
+ RUN2 (uint32_t, float, 7)
+ RUN2 (uint32_t, float, 99)
+ RUN2 (uint32_t, float, 119)
+ RUN2 (uint32_t, float, 128)
+ RUN2 (uint32_t, float, 256)
+ RUN2 (uint32_t, float, 279)
+ RUN2 (uint32_t, float, 555)
+ RUN2 (uint32_t, float, 1024)
+ RUN2 (uint32_t, float, 1389)
+ RUN2 (uint32_t, float, 2048)
+ RUN2 (uint32_t, float, 3989)
+ RUN2 (uint32_t, float, 4096)
+ RUN2 (uint32_t, float, 5975)
+
+ RUN (int64_t, double, 3)
+ RUN (int64_t, double, 4)
+ RUN (int64_t, double, 7)
+ RUN (int64_t, double, 99)
+ RUN (int64_t, double, 119)
+ RUN (int64_t, double, 128)
+ RUN (int64_t, double, 256)
+ RUN (int64_t, double, 279)
+ RUN (int64_t, double, 555)
+ RUN (int64_t, double, 1024)
+ RUN (int64_t, double, 1389)
+ RUN (int64_t, double, 2048)
+ RUN (int64_t, double, 3989)
+ RUN (int64_t, double, 4096)
+ RUN (int64_t, double, 5975)
+
+ RUN2 (uint64_t, double, 3)
+ RUN2 (uint64_t, double, 4)
+ RUN2 (uint64_t, double, 7)
+ RUN2 (uint64_t, double, 99)
+ RUN2 (uint64_t, double, 119)
+ RUN2 (uint64_t, double, 128)
+ RUN2 (uint64_t, double, 256)
+ RUN2 (uint64_t, double, 279)
+ RUN2 (uint64_t, double, 555)
+ RUN2 (uint64_t, double, 1024)
+ RUN2 (uint64_t, double, 1389)
+ RUN2 (uint64_t, double, 2048)
+ RUN2 (uint64_t, double, 3989)
+ RUN2 (uint64_t, double, 4096)
+ RUN2 (uint64_t, double, 5975)
+}
new file mode 100644
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
+
+#include "vfcvt-itof-template.h"
+
+/* { dg-final { scan-assembler-times {\tvfcvt\.f\.x\.v} 3 } } */
+/* { dg-final { scan-assembler-times {\tvfcvt\.f\.xu\.v} 3 } } */
new file mode 100644
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=scalable" } */
+
+#include "vfcvt-itof-template.h"
+
+/* { dg-final { scan-assembler-times {\tvfcvt\.f\.x\.v} 3 } } */
+/* { dg-final { scan-assembler-times {\tvfcvt\.f\.xu\.v} 3 } } */
new file mode 100644
@@ -0,0 +1,20 @@
+#include <stdint-gcc.h>
+
+#define TEST(TYPE1, TYPE2) \
+ __attribute__ ((noipa)) \
+ void vfcvt_##TYPE1##TYPE2 (TYPE2 *restrict dst, \
+ TYPE1 *restrict a, int n) \
+ { \
+ for (int i = 0; i < n; i++) \
+ dst[i] = (TYPE1) a[i]; \
+ }
+
+#define TEST_ALL() \
+ TEST (int32_t, float) \
+ TEST (uint32_t, float) \
+ TEST (int64_t, double) \
+ TEST (uint64_t, double) \
+ TEST (int16_t, _Float16) \
+ TEST (uint16_t, _Float16) \
+
+TEST_ALL ()
new file mode 100644
@@ -0,0 +1,64 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-std=c99 -march=rv64gcv_zvfh -mabi=lp64d -fno-vect-cost-model --param=riscv-autovec-preference=scalable" } */
+
+#include "vfcvt-itof-template.h"
+
+#define RUN(TYPE1, TYPE2, NUM) \
+ TYPE1 src##TYPE1##TYPE2##NUM[NUM]; \
+ TYPE2 dst##TYPE1##TYPE2##NUM[NUM]; \
+ for (int i = 0; i < NUM; i++) \
+ { \
+ src##TYPE1##TYPE2##NUM[i] = i * 3 - 8932; \
+ } \
+ vfcvt_##TYPE1##TYPE2 (dst##TYPE1##TYPE2##NUM, src##TYPE1##TYPE2##NUM, NUM); \
+ for (int i = 0; i < NUM; i++) \
+ if (dst##TYPE1##TYPE2##NUM[i] != (TYPE2) src##TYPE1##TYPE2##NUM[i]) \
+ __builtin_abort ();
+
+#define RUN2(TYPE1, TYPE2, NUM) \
+ TYPE1 src##TYPE1##TYPE2##NUM[NUM]; \
+ TYPE2 dst##TYPE1##TYPE2##NUM[NUM]; \
+ for (int i = 0; i < NUM; i++) \
+ { \
+ src##TYPE1##TYPE2##NUM[i] = i * 3 + 8932; \
+ } \
+ vfcvt_##TYPE1##TYPE2 (dst##TYPE1##TYPE2##NUM, src##TYPE1##TYPE2##NUM, NUM); \
+ for (int i = 0; i < NUM; i++) \
+ if (dst##TYPE1##TYPE2##NUM[i] != (TYPE2) src##TYPE1##TYPE2##NUM[i]) \
+ __builtin_abort ();
+
+int
+main ()
+{
+ RUN (int16_t, _Float16, 3)
+ RUN (int16_t, _Float16, 4)
+ RUN (int16_t, _Float16, 7)
+ RUN (int16_t, _Float16, 99)
+ RUN (int16_t, _Float16, 119)
+ RUN (int16_t, _Float16, 128)
+ RUN (int16_t, _Float16, 256)
+ RUN (int16_t, _Float16, 279)
+ RUN (int16_t, _Float16, 555)
+ RUN (int16_t, _Float16, 1024)
+ RUN (int16_t, _Float16, 1389)
+ RUN (int16_t, _Float16, 2048)
+ RUN (int16_t, _Float16, 3989)
+ RUN (int16_t, _Float16, 4096)
+ RUN (int16_t, _Float16, 5975)
+
+ RUN2 (uint16_t, _Float16, 3)
+ RUN2 (uint16_t, _Float16, 4)
+ RUN2 (uint16_t, _Float16, 7)
+ RUN2 (uint16_t, _Float16, 99)
+ RUN2 (uint16_t, _Float16, 119)
+ RUN2 (uint16_t, _Float16, 128)
+ RUN2 (uint16_t, _Float16, 256)
+ RUN2 (uint16_t, _Float16, 279)
+ RUN2 (uint16_t, _Float16, 555)
+ RUN2 (uint16_t, _Float16, 1024)
+ RUN2 (uint16_t, _Float16, 1389)
+ RUN2 (uint16_t, _Float16, 2048)
+ RUN2 (uint16_t, _Float16, 3989)
+ RUN2 (uint16_t, _Float16, 4096)
+ RUN2 (uint16_t, _Float16, 5975)
+}
@@ -4,6 +4,18 @@
#include "vfcvt_rtz-template.h"
#define RUN(TYPE1, TYPE2, NUM) \
+ TYPE1 src##TYPE1##TYPE2##NUM[NUM]; \
+ TYPE2 dst##TYPE1##TYPE2##NUM[NUM]; \
+ for (int i = 0; i < NUM; i++) \
+ { \
+ src##TYPE1##TYPE2##NUM[i] = i * -3.1315926 - 88932.947289; \
+ } \
+ vfcvt_##TYPE1##TYPE2 (dst##TYPE1##TYPE2##NUM, src##TYPE1##TYPE2##NUM, NUM); \
+ for (int i = 0; i < NUM; i++) \
+ if (dst##TYPE1##TYPE2##NUM[i] != (TYPE2) src##TYPE1##TYPE2##NUM[i]) \
+ __builtin_abort ();
+
+#define RUN2(TYPE1, TYPE2, NUM) \
TYPE1 src##TYPE1##TYPE2##NUM[NUM]; \
TYPE2 dst##TYPE1##TYPE2##NUM[NUM]; \
for (int i = 0; i < NUM; i++) \
@@ -34,6 +46,22 @@ main ()
RUN (float, int32_t, 4096)
RUN (float, int32_t, 5975)
+ RUN2 (float, uint32_t, 3)
+ RUN2 (float, uint32_t, 4)
+ RUN2 (float, uint32_t, 7)
+ RUN2 (float, uint32_t, 99)
+ RUN2 (float, uint32_t, 119)
+ RUN2 (float, uint32_t, 128)
+ RUN2 (float, uint32_t, 256)
+ RUN2 (float, uint32_t, 279)
+ RUN2 (float, uint32_t, 555)
+ RUN2 (float, uint32_t, 1024)
+ RUN2 (float, uint32_t, 1389)
+ RUN2 (float, uint32_t, 2048)
+ RUN2 (float, uint32_t, 3989)
+ RUN2 (float, uint32_t, 4096)
+ RUN2 (float, uint32_t, 5975)
+
RUN (double, int64_t, 3)
RUN (double, int64_t, 4)
RUN (double, int64_t, 7)
@@ -49,4 +77,20 @@ main ()
RUN (double, int64_t, 3989)
RUN (double, int64_t, 4096)
RUN (double, int64_t, 5975)
+
+ RUN2 (double, uint64_t, 3)
+ RUN2 (double, uint64_t, 4)
+ RUN2 (double, uint64_t, 7)
+ RUN2 (double, uint64_t, 99)
+ RUN2 (double, uint64_t, 119)
+ RUN2 (double, uint64_t, 128)
+ RUN2 (double, uint64_t, 256)
+ RUN2 (double, uint64_t, 279)
+ RUN2 (double, uint64_t, 555)
+ RUN2 (double, uint64_t, 1024)
+ RUN2 (double, uint64_t, 1389)
+ RUN2 (double, uint64_t, 2048)
+ RUN2 (double, uint64_t, 3989)
+ RUN2 (double, uint64_t, 4096)
+ RUN2 (double, uint64_t, 5975)
}
@@ -1,6 +1,7 @@
/* { dg-do compile } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
#include "vfcvt_rtz-template.h"
-/* { dg-final { scan-assembler-times {\tvfcvt\.rtz} 2 } } */
+/* { dg-final { scan-assembler-times {\tvfcvt\.rtz\.x\.f\.v} 3 } } */
+/* { dg-final { scan-assembler-times {\tvfcvt\.rtz\.xu\.f\.v} 3 } } */
@@ -1,6 +1,7 @@
/* { dg-do compile } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=scalable" } */
#include "vfcvt_rtz-template.h"
-/* { dg-final { scan-assembler-times {\tvfcvt\.rtz} 2 } } */
+/* { dg-final { scan-assembler-times {\tvfcvt\.rtz\.x\.f\.v} 3 } } */
+/* { dg-final { scan-assembler-times {\tvfcvt\.rtz\.xu\.f\.v} 3 } } */
@@ -10,6 +10,10 @@
#define TEST_ALL() \
TEST (float, int32_t) \
- TEST (double, int64_t)
+ TEST (float, uint32_t) \
+ TEST (double, int64_t) \
+ TEST (double, uint64_t) \
+ TEST (_Float16, int16_t) \
+ TEST (_Float16, uint16_t) \
TEST_ALL ()
new file mode 100644
@@ -0,0 +1,64 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-std=c99 -march=rv64gcv_zvfh -mabi=lp64d -fno-vect-cost-model --param=riscv-autovec-preference=scalable" } */
+
+#include "vfcvt_rtz-template.h"
+
+#define RUN(TYPE1, TYPE2, NUM) \
+ TYPE1 src##TYPE1##TYPE2##NUM[NUM]; \
+ TYPE2 dst##TYPE1##TYPE2##NUM[NUM]; \
+ for (int i = 0; i < NUM; i++) \
+ { \
+ src##TYPE1##TYPE2##NUM[i] = i * -3.1315926 - 8932.947289; \
+ } \
+ vfcvt_##TYPE1##TYPE2 (dst##TYPE1##TYPE2##NUM, src##TYPE1##TYPE2##NUM, NUM); \
+ for (int i = 0; i < NUM; i++) \
+ if (dst##TYPE1##TYPE2##NUM[i] != (TYPE2) src##TYPE1##TYPE2##NUM[i]) \
+ __builtin_abort ();
+
+#define RUN2(TYPE1, TYPE2, NUM) \
+ TYPE1 src##TYPE1##TYPE2##NUM[NUM]; \
+ TYPE2 dst##TYPE1##TYPE2##NUM[NUM]; \
+ for (int i = 0; i < NUM; i++) \
+ { \
+ src##TYPE1##TYPE2##NUM[i] = i * 3.1315926 + 8932.947289; \
+ } \
+ vfcvt_##TYPE1##TYPE2 (dst##TYPE1##TYPE2##NUM, src##TYPE1##TYPE2##NUM, NUM); \
+ for (int i = 0; i < NUM; i++) \
+ if (dst##TYPE1##TYPE2##NUM[i] != (TYPE2) src##TYPE1##TYPE2##NUM[i]) \
+ __builtin_abort ();
+
+int
+main ()
+{
+ RUN (_Float16, int16_t, 3)
+ RUN (_Float16, int16_t, 4)
+ RUN (_Float16, int16_t, 7)
+ RUN (_Float16, int16_t, 99)
+ RUN (_Float16, int16_t, 119)
+ RUN (_Float16, int16_t, 128)
+ RUN (_Float16, int16_t, 256)
+ RUN (_Float16, int16_t, 279)
+ RUN (_Float16, int16_t, 555)
+ RUN (_Float16, int16_t, 1024)
+ RUN (_Float16, int16_t, 1389)
+ RUN (_Float16, int16_t, 2048)
+ RUN (_Float16, int16_t, 3989)
+ RUN (_Float16, int16_t, 4096)
+ RUN (_Float16, int16_t, 5975)
+
+ RUN2 (_Float16, uint16_t, 3)
+ RUN2 (_Float16, uint16_t, 4)
+ RUN2 (_Float16, uint16_t, 7)
+ RUN2 (_Float16, uint16_t, 99)
+ RUN2 (_Float16, uint16_t, 119)
+ RUN2 (_Float16, uint16_t, 128)
+ RUN2 (_Float16, uint16_t, 256)
+ RUN2 (_Float16, uint16_t, 279)
+ RUN2 (_Float16, uint16_t, 555)
+ RUN2 (_Float16, uint16_t, 1024)
+ RUN2 (_Float16, uint16_t, 1389)
+ RUN2 (_Float16, uint16_t, 2048)
+ RUN2 (_Float16, uint16_t, 3989)
+ RUN2 (_Float16, uint16_t, 4096)
+ RUN2 (_Float16, uint16_t, 5975)
+}
@@ -1,5 +1,7 @@
/* { dg-do compile } */
-/* { dg-options "-march=rv32gcv_zvfhmin -mabi=ilp32d --param riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+/* { dg-options "-march=rv32gcv_zvfhmin -mabi=ilp32d --param riscv-autovec-preference=scalable -ffast-math -fdump-rtl-final" } */
+
+#include <stdint-gcc.h>
void f0 (_Float16 * __restrict a, _Float16 * __restrict b, int n)
{
@@ -40,10 +42,44 @@ void f6 (_Float16 * __restrict a, _Float16 * __restrict b, int n)
void f7 (_Float16 * __restrict a, _Float16 * __restrict b, int n)
{
for (int i = 0; i < n; i++)
- a[i] = __builtin_sqrtf (b[i]);
+ a[i] = __builtin_sqrtf16 (b[i]);
+}
+
+void f8 (_Float16 * __restrict a, int16_t * __restrict b, int n)
+{
+ for (int i = 0; i < n; i++)
+ a[i] = (_Float16) (b[i]);
+}
+
+void f9 (_Float16 * __restrict a, uint16_t * __restrict b, int n)
+{
+ for (int i = 0; i < n; i++)
+ a[i] = (_Float16) (b[i]);
+}
+
+void f10 (int16_t * __restrict a, _Float16 * __restrict b, int n)
+{
+ for (int i = 0; i < n; i++)
+ a[i] = (int16_t) (b[i]);
+}
+
+void f11 (uint16_t * __restrict a, _Float16 * __restrict b, int n)
+{
+ for (int i = 0; i < n; i++)
+ a[i] = (uint16_t) (b[i]);
}
-/* We can't enable FP16 NEG/PLUS/MINUS/MULT/DIV/ABS/SQRTF auto-vectorization
- when -march="*zvfhmin*" because the min variant of the extension only
- provides loads, stores and conversions. */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 0 "vect" } } */
+/* We can't enable FP16 NEG/PLUS/MINUS/MULT/DIV/ABS/SQRTF as well as int/float
+ conversion auto-vectorization when -march="*zvfhmin*" because the min
+ variant of the extension only provides loads, stores and conversions.
+ As we might still vectorize after promotion to float, we need to make
+ sure that no vector operations with an HFmode are being generated. */
+/* { dg-final { scan-rtl-dump-not "plus:VNx\[0-9\]+HF" "final" } } */
+/* { dg-final { scan-rtl-dump-not "minus:VNx\[0-9\]+HF" "final" } } */
+/* { dg-final { scan-rtl-dump-not "mult:VNx\[0-9\]+HF" "final" } } */
+/* { dg-final { scan-rtl-dump-not "div:VNx\[0-9\]+HF" "final" } } */
+/* { dg-final { scan-rtl-dump-not "neg:VNx\[0-9\]+HF" "final" } } */
+/* { dg-final { scan-rtl-dump-not "abs:VNx\[0-9\]+HF" "final" } } */
+/* { dg-final { scan-rtl-dump-not "sqrt:VNx\[0-9\]+HF" "final" } } */
+/* { dg-final { scan-rtl-dump-not "float:VNx\[0-9\]+HF" "final" } } */
+/* { dg-final { scan-rtl-dump-not "fix:VNx\[0-9\]+HI\s*.+reg:VNx\[0-9\]+HF" "final" } } */