@@ -754,3 +754,39 @@ (define_expand "vec_init<vi_half><mode>"
{
DONE;
})
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Binary operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - vfadd.vv/vfsub.vv/...
+;; - vfadd.vf/vfsub.vf/...
+;; -------------------------------------------------------------------------
+(define_expand "<optab><mode>3"
+ [(match_operand:VF 0 "register_operand")
+ (any_float_binop:VF
+ (match_operand:VF 1 "register_operand")
+ (match_operand:VF 2 "register_operand"))]
+ "TARGET_VECTOR"
+{
+ riscv_vector::emit_vlmax_fp_insn (code_for_pred (<CODE>, <MODE>mode),
+ riscv_vector::RVV_BINOP, operands);
+ DONE;
+})
+
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - vfmin.vv/vfmax.vv
+;; - vfmin.vf/vfmax.vf
+;; -------------------------------------------------------------------------
+(define_expand "<optab><mode>3"
+ [(match_operand:VF 0 "register_operand")
+ (any_float_binop_nofrm:VF
+ (match_operand:VF 1 "register_operand")
+ (match_operand:VF 2 "register_operand"))]
+ "TARGET_VECTOR"
+{
+ riscv_vector::emit_vlmax_fp_minmax_insn (code_for_pred (<CODE>, <MODE>mode),
+ riscv_vector::RVV_BINOP, operands);
+ DONE;
+})
@@ -185,6 +185,8 @@ bool legitimize_move (rtx, rtx);
void emit_vlmax_vsetvl (machine_mode, rtx);
void emit_hard_vlmax_vsetvl (machine_mode, rtx);
void emit_vlmax_insn (unsigned, int, rtx *, rtx = 0);
+void emit_vlmax_fp_insn (unsigned, int, rtx *, rtx = 0);
+void emit_vlmax_fp_minmax_insn (unsigned, int, rtx *, rtx = 0);
void emit_vlmax_ternary_insn (unsigned, int, rtx *, rtx = 0);
void emit_nonvlmax_insn (unsigned, int, rtx *, rtx);
void emit_vlmax_slide_insn (unsigned, rtx *);
@@ -260,11 +262,12 @@ enum vxrm_field_enum
VXRM_RDN,
VXRM_ROD
};
+
/* Rounding mode bitfield for floating point FRM. The value of enum comes
from the below link.
https://github.com/riscv/riscv-isa-manual/blob/main/src/f-st-ext.adoc#floating-point-control-and-status-register
*/
-enum frm_field_enum
+enum rounding_mode
{
FRM_RNE = 0, /* Aka 0b000. */
FRM_RTZ = 1, /* Aka 0b001. */
@@ -74,8 +74,10 @@ public:
: m_opno (0), m_op_num (0), m_has_dest_p (false),
m_fully_unmasked_p (false), m_use_real_merge_p (false),
m_needs_avl_p (false), m_vlmax_p (false), m_has_tail_policy_p (false),
- m_has_mask_policy_p (false), m_tail_policy (TAIL_ANY),
- m_mask_policy (MASK_ANY), m_dest_mode (VOIDmode), m_mask_mode (VOIDmode),
+ m_has_mask_policy_p (false), m_has_fp_rounding_mode_p (false),
+ m_tail_policy (TAIL_ANY), m_mask_policy (MASK_ANY),
+ m_fp_rounding_mode (FRM_DYN),
+ m_dest_mode (VOIDmode), m_mask_mode (VOIDmode),
m_vl_op (NULL_RTX)
{}
@@ -87,8 +89,10 @@ public:
m_fully_unmasked_p (use_all_trues_mask_p),
m_use_real_merge_p (use_real_merge_p), m_needs_avl_p (needs_avl_p),
m_vlmax_p (vlmax_p), m_has_tail_policy_p (false),
- m_has_mask_policy_p (false), m_tail_policy (TAIL_ANY),
- m_mask_policy (MASK_ANY), m_dest_mode (dest_mode),
+ m_has_mask_policy_p (false), m_has_fp_rounding_mode_p (false),
+ m_tail_policy (TAIL_ANY), m_mask_policy (MASK_ANY),
+ m_fp_rounding_mode (FRM_DYN),
+ m_dest_mode (dest_mode),
m_mask_mode (mask_mode), m_vl_op (NULL_RTX)
{}
@@ -104,6 +108,12 @@ public:
}
void set_vl (rtx vl) { m_vl_op = vl; }
+ void set_rounding_mode (enum rounding_mode mode)
+ {
+ m_has_fp_rounding_mode_p = true;
+ m_fp_rounding_mode = mode;
+ }
+
void add_output_operand (rtx x, machine_mode mode)
{
create_output_operand (&m_ops[m_opno++], x, mode);
@@ -140,6 +150,15 @@ public:
add_input_operand (gen_int_mode (type, Pmode), Pmode);
}
+ void add_rounding_mode_operand ()
+ {
+ if (m_has_fp_rounding_mode_p)
+ {
+ rtx frm_rtx = gen_int_mode (m_fp_rounding_mode, Pmode);
+ add_input_operand (frm_rtx, Pmode);
+ }
+ }
+
void emit_insn (enum insn_code icode, rtx *ops)
{
int opno = 0;
@@ -200,6 +219,9 @@ public:
add_policy_operand ();
if (m_needs_avl_p)
add_avl_type_operand (m_vlmax_p ? avl_type::VLMAX : avl_type::NONVLMAX);
+
+ add_rounding_mode_operand ();
+
expand (icode, any_mem_p);
}
@@ -231,8 +253,10 @@ private:
bool m_vlmax_p;
bool m_has_tail_policy_p;
bool m_has_mask_policy_p;
+ bool m_has_fp_rounding_mode_p;
enum tail_policy m_tail_policy;
enum mask_policy m_mask_policy;
+ enum rounding_mode m_fp_rounding_mode;
machine_mode m_dest_mode;
machine_mode m_mask_mode;
rtx m_vl_op;
@@ -643,6 +667,50 @@ emit_vlmax_insn (unsigned icode, int op_num, rtx *ops, rtx vl)
e.emit_insn ((enum insn_code) icode, ops);
}
+void
+emit_vlmax_fp_insn (unsigned icode, int op_num, rtx *ops, rtx vl)
+{
+ machine_mode dest_mode = GET_MODE (ops[0]);
+ machine_mode mask_mode = get_mask_mode (dest_mode).require ();
+ insn_expander<RVV_INSN_OPERANDS_MAX> e (op_num,
+ /* HAS_DEST_P */ true,
+ /* FULLY_UNMASKED_P */ true,
+ /* USE_REAL_MERGE_P */ false,
+ /* HAS_AVL_P */ true,
+ /* VLMAX_P */ true,
+ dest_mode,
+ mask_mode);
+
+ e.set_policy (TAIL_ANY);
+ e.set_policy (MASK_ANY);
+ e.set_rounding_mode (FRM_DYN);
+ e.set_vl (vl);
+ e.emit_insn ((enum insn_code) icode, ops);
+}
+
+void
+emit_vlmax_fp_minmax_insn (unsigned icode, int op_num, rtx *ops, rtx vl)
+{
+ machine_mode dest_mode = GET_MODE (ops[0]);
+ machine_mode mask_mode = get_mask_mode (dest_mode).require ();
+ insn_expander<RVV_INSN_OPERANDS_MAX> e (op_num,
+ /* HAS_DEST_P */ true,
+ /* FULLY_UNMASKED_P */ true,
+ /* USE_REAL_MERGE_P */ false,
+ /* HAS_AVL_P */ true,
+ /* VLMAX_P */ true,
+ dest_mode,
+ mask_mode);
+
+ e.set_policy (TAIL_ANY);
+ e.set_policy (MASK_ANY);
+
+ e.set_vl (vl);
+ e.emit_insn ((enum insn_code) icode, ops);
+}
+
/* This function emits a {VLMAX, TAIL_ANY, MASK_ANY} vsetvli followed by the
* ternary operation which always has a real merge operand. */
void
@@ -1315,11 +1315,22 @@ riscv_const_insns (rtx x)
if (satisfies_constraint_vi (x))
return 1;
- /* A const duplicate vector can always be broadcast from
- a general-purpose register. This means we need as many
- insns as it takes to load the constant into the GPR
- and one vmv.v.x. */
- return 1 + riscv_const_insns (elt);
+ /* Any int/FP constants can always be broadcast from a
+ scalar register. Loading of a floating-point
+ constant incurs a literal-pool access. Allow this in
+ order to increase vectorization possibilities. */
+ int n = riscv_const_insns (elt);
+ if (CONST_DOUBLE_P (elt))
+ return 1 + 4; /* vfmv.v.f + memory access. */
+ else
+ {
+ /* We need as many insns as it takes to load the constant
+ into a GPR and one vmv.v.x. */
+ if (n != 0)
+ return 1 + n;
+ else
+ return 1 + 4; /*vmv.v.x + memory access. */
+ }
}
}
@@ -7169,8 +7180,8 @@ riscv_libgcc_floating_mode_supported_p (scalar_float_mode mode)
precision of the _FloatN type; evaluate all other operations and
constants to the range and precision of the semantic type;
- If we have the zfh/zhinx extensions then we support _Float16 in native
- precision, so we should set this to 16. */
+ If we have the zfh/zhinx/zvfh extensions then we support _Float16
+ in native precision, so we should set this to 16. */
static enum flt_eval_method
riscv_excess_precision (enum excess_precision_type type)
{
@@ -7178,7 +7189,7 @@ riscv_excess_precision (enum excess_precision_type type)
{
case EXCESS_PRECISION_TYPE_FAST:
case EXCESS_PRECISION_TYPE_STANDARD:
- return ((TARGET_ZFH || TARGET_ZHINX)
+ return ((TARGET_ZFH || TARGET_ZHINX || TARGET_ZVFH)
? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
: FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
case EXCESS_PRECISION_TYPE_IMPLICIT:
@@ -51,7 +51,9 @@
RUN(int32_t, -3) \
RUN(uint32_t, 4) \
RUN(int64_t, -5) \
- RUN(uint64_t, 6) \
+ RUN(uint64_t, 6) \
+ RUN(float, -5) \
+ RUN(double, 6) \
RUN2(int8_t, -7) \
RUN2(uint8_t, 8) \
RUN2(int16_t, -7) \
@@ -59,7 +61,9 @@
RUN2(int32_t, -9) \
RUN2(uint32_t, 10) \
RUN2(int64_t, -11) \
- RUN2(uint64_t, 12) \
+ RUN2(uint64_t, 12) \
+ RUN2(float, -11) \
+ RUN2(double, 12) \
RUN3M(int8_t, 13) \
RUN3(uint8_t, 14) \
RUN3M(int16_t, 13) \
@@ -67,7 +71,9 @@
RUN3M(int32_t, 15) \
RUN3(uint32_t, 16) \
RUN3M(int64_t, 17) \
- RUN3(uint64_t, 18)
+ RUN3(uint64_t, 18) \
+ RUN3(float, 17) \
+ RUN3M(double, 18) \
int main ()
{
@@ -1,7 +1,8 @@
/* { dg-do compile } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
#include "vadd-template.h"
/* { dg-final { scan-assembler-times {\tvadd\.vv} 16 } } */
/* { dg-final { scan-assembler-times {\tvadd\.vi} 8 } } */
+/* { dg-final { scan-assembler-times {\tvfadd\.vv} 9 } } */
@@ -1,7 +1,8 @@
/* { dg-do compile } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
#include "vadd-template.h"
/* { dg-final { scan-assembler-times {\tvadd\.vv} 16 } } */
/* { dg-final { scan-assembler-times {\tvadd\.vi} 8 } } */
+/* { dg-final { scan-assembler-times {\tvfadd\.vv} 9 } } */
@@ -41,6 +41,9 @@
TEST_TYPE(uint32_t) \
TEST_TYPE(int64_t) \
TEST_TYPE(uint64_t) \
+ TEST_TYPE(_Float16) \
+ TEST_TYPE(float) \
+ TEST_TYPE(double) \
TEST2_TYPE(int8_t) \
TEST2_TYPE(uint8_t) \
TEST2_TYPE(int16_t) \
@@ -49,6 +52,9 @@
TEST2_TYPE(uint32_t) \
TEST2_TYPE(int64_t) \
TEST2_TYPE(uint64_t) \
+ TEST2_TYPE(_Float16) \
+ TEST2_TYPE(float) \
+ TEST2_TYPE(double) \
TEST3M_TYPE(int8_t) \
TEST3_TYPE(uint8_t) \
TEST3M_TYPE(int16_t) \
@@ -56,6 +62,9 @@
TEST3M_TYPE(int32_t) \
TEST3_TYPE(uint32_t) \
TEST3M_TYPE(int64_t) \
- TEST3_TYPE(uint64_t)
+ TEST3_TYPE(uint64_t) \
+ TEST3M_TYPE(_Float16) \
+ TEST3_TYPE(float) \
+ TEST3M_TYPE(double) \
TEST_ALL()
new file mode 100644
@@ -0,0 +1,54 @@
+/* { dg-do run { target { riscv_zvfh_hw } } } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax" } */
+
+#include "vadd-template.h"
+
+#include <assert.h>
+
+#define SZ 512
+
+#define RUN(TYPE,VAL) \
+ TYPE a##TYPE[SZ]; \
+ TYPE b##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ a##TYPE[i] = 0; \
+ b##TYPE[i] = VAL; \
+ } \
+ vadd_##TYPE (a##TYPE, a##TYPE, b##TYPE, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (a##TYPE[i] == VAL);
+
+#define RUN2(TYPE,VAL) \
+ TYPE as##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ as##TYPE[i] = 0; \
+ vadds_##TYPE (as##TYPE, as##TYPE, VAL, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (as##TYPE[i] == VAL);
+
+#define RUN3(TYPE,VAL) \
+ TYPE ai##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ ai##TYPE[i] = VAL; \
+ vaddi_##TYPE (ai##TYPE, ai##TYPE, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (ai##TYPE[i] == VAL + 15);
+
+#define RUN3M(TYPE,VAL) \
+ TYPE aim##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ aim##TYPE[i] = VAL; \
+ vaddim_##TYPE (aim##TYPE, aim##TYPE, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (aim##TYPE[i] == VAL - 16);
+
+#define RUN_ALL() \
+ RUN(_Float16, 4) \
+ RUN2(_Float16, 10) \
+ RUN3M(_Float16, 17) \
+
+int main ()
+{
+ RUN_ALL()
+}
@@ -1,5 +1,5 @@
/* { dg-do run { target { riscv_vector_hw } } } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
#include "vdiv-template.h"
@@ -36,6 +36,8 @@
RUN(uint32_t, 4) \
RUN(int64_t, -5) \
RUN(uint64_t, 6) \
+ RUN(float, -5) \
+ RUN(double, 6) \
RUN2(int8_t, -7) \
RUN2(uint8_t, 8) \
RUN2(int16_t, -7) \
@@ -43,7 +45,9 @@
RUN2(int32_t, -9) \
RUN2(uint32_t, 10) \
RUN2(int64_t, -11) \
- RUN2(uint64_t, 12)
+ RUN2(uint64_t, 12) \
+ RUN2(float, -11) \
+ RUN2(double, 12) \
int main ()
{
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
#include "vdiv-template.h"
@@ -8,3 +8,8 @@
/* { dg-final { scan-assembler-times {\tvdiv\.vv} 14 } } */
/* { dg-final { scan-assembler-times {\tvdivu\.vv} 14 } } */
+
+/* Division by constant is done by calculating a reciprocal and
+ then multiplying. Hence we do not expect 6 vfdivs. */
+/* { dg-final { scan-assembler-times {\tvfdiv\.vv} 3 } } */
+/* { dg-final { scan-assembler-times {\tvfmul\.vv} 3 } } */
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
#include "vdiv-template.h"
@@ -8,3 +8,8 @@
/* { dg-final { scan-assembler-times {\tvdiv\.vv} 14 } } */
/* { dg-final { scan-assembler-times {\tvdivu\.vv} 14 } } */
+
+/* Division by constant is done by calculating a reciprocal and
+ then multiplying. Hence we do not expect 6 vfdivs. */
+/* { dg-final { scan-assembler-times {\tvfdiv\.vv} 3 } } */
+/* { dg-final { scan-assembler-times {\tvfmul\.vv} 3 } } */
@@ -25,6 +25,9 @@
TEST_TYPE(uint32_t) \
TEST_TYPE(int64_t) \
TEST_TYPE(uint64_t) \
+ TEST_TYPE(_Float16) \
+ TEST_TYPE(float) \
+ TEST_TYPE(double) \
TEST2_TYPE(int8_t) \
TEST2_TYPE(uint8_t) \
TEST2_TYPE(int16_t) \
@@ -32,6 +35,9 @@
TEST2_TYPE(int32_t) \
TEST2_TYPE(uint32_t) \
TEST2_TYPE(int64_t) \
- TEST2_TYPE(uint64_t)
+ TEST2_TYPE(uint64_t) \
+ TEST2_TYPE(_Float16) \
+ TEST2_TYPE(float) \
+ TEST2_TYPE(double) \
TEST_ALL()
new file mode 100644
@@ -0,0 +1,37 @@
+/* { dg-do run { target { riscv_zvfh_hw } } } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
+
+#include "vdiv-template.h"
+
+#include <assert.h>
+
+#define SZ 512
+
+#define RUN(TYPE,VAL) \
+ TYPE a##TYPE[SZ]; \
+ TYPE b##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ a##TYPE[i] = VAL * 3; \
+ b##TYPE[i] = VAL; \
+ } \
+ vdiv_##TYPE (a##TYPE, a##TYPE, b##TYPE, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (a##TYPE[i] == 3);
+
+#define RUN2(TYPE,VAL) \
+ TYPE as##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ as##TYPE[i] = VAL * 5; \
+ vdivs_##TYPE (as##TYPE, as##TYPE, VAL, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (as##TYPE[i] == 5);
+
+#define RUN_ALL() \
+ RUN(_Float16, 4) \
+ RUN2(_Float16, 10) \
+
+int main ()
+{
+ RUN_ALL()
+}
@@ -1,5 +1,5 @@
/* { dg-do run { target { riscv_vector_hw } } } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
#include "vmax-template.h"
@@ -27,6 +27,7 @@
for (int i = 0; i < SZ; i++) \
assert (as##TYPE[i] == 0 > VAL ? 0 : VAL);
+
#define RUN_ALL() \
RUN(int8_t, -1) \
RUN(uint8_t, 2) \
@@ -36,6 +37,8 @@
RUN(uint32_t, 4) \
RUN(int64_t, -5) \
RUN(uint64_t, 6) \
+ RUN(float, -5) \
+ RUN(double, 6) \
RUN2(int8_t, -7) \
RUN2(uint8_t, 8) \
RUN2(int16_t, -7) \
@@ -43,7 +46,9 @@
RUN2(int32_t, -9) \
RUN2(uint32_t, 10) \
RUN2(int64_t, -11) \
- RUN2(uint64_t, 12)
+ RUN2(uint64_t, 12) \
+ RUN2(float, -11) \
+ RUN2(double, 12) \
int main ()
{
@@ -1,7 +1,8 @@
/* { dg-do compile } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
#include "vmax-template.h"
/* { dg-final { scan-assembler-times {\tvmax\.vv} 8 } } */
/* { dg-final { scan-assembler-times {\tvmaxu\.vv} 8 } } */
+/* { dg-final { scan-assembler-times {\tvfmax\.vv} 6 } } */
@@ -1,7 +1,8 @@
/* { dg-do compile } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
#include "vmax-template.h"
/* { dg-final { scan-assembler-times {\tvmax\.vv} 8 } } */
/* { dg-final { scan-assembler-times {\tvmaxu\.vv} 8 } } */
+/* { dg-final { scan-assembler-times {\tvfmax\.vv} 6 } } */
@@ -25,6 +25,9 @@
TEST_TYPE(uint32_t) \
TEST_TYPE(int64_t) \
TEST_TYPE(uint64_t) \
+ TEST_TYPE(_Float16) \
+ TEST_TYPE(float) \
+ TEST_TYPE(double) \
TEST2_TYPE(int8_t) \
TEST2_TYPE(uint8_t) \
TEST2_TYPE(int16_t) \
@@ -32,6 +35,9 @@
TEST2_TYPE(int32_t) \
TEST2_TYPE(uint32_t) \
TEST2_TYPE(int64_t) \
- TEST2_TYPE(uint64_t)
+ TEST2_TYPE(uint64_t) \
+ TEST2_TYPE(_Float16) \
+ TEST2_TYPE(float) \
+ TEST2_TYPE(double) \
TEST_ALL()
new file mode 100644
@@ -0,0 +1,38 @@
+/* { dg-do run { target { riscv_zvfh_hw } } } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
+
+#include "vmax-template.h"
+
+#include <assert.h>
+
+#define SZ 512
+
+#define RUN(TYPE,VAL) \
+ TYPE a##TYPE[SZ]; \
+ TYPE b##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ a##TYPE[i] = 0; \
+ b##TYPE[i] = VAL; \
+ } \
+ vmax_##TYPE (a##TYPE, a##TYPE, b##TYPE, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (a##TYPE[i] == 0 > VAL ? 0 : VAL);
+
+#define RUN2(TYPE,VAL) \
+ TYPE as##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ as##TYPE[i] = 0; \
+ vmaxs_##TYPE (as##TYPE, as##TYPE, VAL, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (as##TYPE[i] == 0 > VAL ? 0 : VAL);
+
+
+#define RUN_ALL() \
+ RUN(_Float16, 4) \
+ RUN2(_Float16, 10) \
+
+int main ()
+{
+ RUN_ALL()
+}
@@ -1,5 +1,5 @@
/* { dg-do run { target { riscv_vector_hw } } } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
#include "vmin-template.h"
@@ -35,7 +35,9 @@
RUN(int32_t, -3) \
RUN(uint32_t, 4) \
RUN(int64_t, -5) \
- RUN(uint64_t, 6) \
+ RUN(uint64_t, 6) \
+ RUN(float, -5) \
+ RUN(double, 6) \
RUN2(int8_t, -7) \
RUN2(uint8_t, 8) \
RUN2(int16_t, -7) \
@@ -43,7 +45,9 @@
RUN2(int32_t, -9) \
RUN2(uint32_t, 10) \
RUN2(int64_t, -11) \
- RUN2(uint64_t, 12)
+ RUN2(uint64_t, 12) \
+ RUN2(float, -11) \
+ RUN2(double, 12) \
int main ()
{
@@ -1,7 +1,8 @@
/* { dg-do compile } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
#include "vmin-template.h"
/* { dg-final { scan-assembler-times {\tvmin\.vv} 8 } } */
/* { dg-final { scan-assembler-times {\tvminu\.vv} 8 } } */
+/* { dg-final { scan-assembler-times {\tvfmin\.vv} 6 } } */
@@ -1,7 +1,8 @@
/* { dg-do compile } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
#include "vmin-template.h"
/* { dg-final { scan-assembler-times {\tvmin\.vv} 8 } } */
/* { dg-final { scan-assembler-times {\tvminu\.vv} 8 } } */
+/* { dg-final { scan-assembler-times {\tvfmin\.vv} 6 } } */
@@ -25,6 +25,9 @@
TEST_TYPE(uint32_t) \
TEST_TYPE(int64_t) \
TEST_TYPE(uint64_t) \
+ TEST_TYPE(_Float16) \
+ TEST_TYPE(float) \
+ TEST_TYPE(double) \
TEST2_TYPE(int8_t) \
TEST2_TYPE(uint8_t) \
TEST2_TYPE(int16_t) \
@@ -32,6 +35,9 @@
TEST2_TYPE(int32_t) \
TEST2_TYPE(uint32_t) \
TEST2_TYPE(int64_t) \
- TEST2_TYPE(uint64_t)
+ TEST2_TYPE(uint64_t) \
+ TEST2_TYPE(_Float16) \
+ TEST2_TYPE(float) \
+ TEST2_TYPE(double) \
TEST_ALL()
new file mode 100644
@@ -0,0 +1,37 @@
+/* { dg-do run { target { riscv_zvfh_hw } } } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
+
+#include "vmin-template.h"
+
+#include <assert.h>
+
+#define SZ 512
+
+#define RUN(TYPE,VAL) \
+ TYPE a##TYPE[SZ]; \
+ TYPE b##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ a##TYPE[i] = 0; \
+ b##TYPE[i] = VAL; \
+ } \
+ vmin_##TYPE (a##TYPE, a##TYPE, b##TYPE, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (a##TYPE[i] == 0 < VAL ? 0 : VAL);
+
+#define RUN2(TYPE,VAL) \
+ TYPE as##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ as##TYPE[i] = 0; \
+ vmins_##TYPE (as##TYPE, as##TYPE, VAL, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (as##TYPE[i] == 0 < VAL ? 0 : VAL);
+
+#define RUN_ALL() \
+ RUN(_Float16, 4) \
+ RUN2(_Float16, 10) \
+
+int main ()
+{
+ RUN_ALL()
+}
@@ -1,5 +1,5 @@
/* { dg-do run { target { riscv_vector_hw } } } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
#include "vmul-template.h"
@@ -36,6 +36,8 @@
RUN(uint32_t, 4) \
RUN(int64_t, -5) \
RUN(uint64_t, 6) \
+ RUN(float, -5) \
+ RUN(double, 6) \
RUN2(int8_t, -7) \
RUN2(uint8_t, 8) \
RUN2(int16_t, -7) \
@@ -43,7 +45,9 @@
RUN2(int32_t, -9) \
RUN2(uint32_t, 10) \
RUN2(int64_t, -11) \
- RUN2(uint64_t, 12)
+ RUN2(uint64_t, 12) \
+ RUN2(float, -11) \
+ RUN2(double, 12) \
int main ()
{
@@ -1,11 +1,8 @@
/* { dg-do compile } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
#include "vmul-template.h"
-<<<<<<< HEAD
-/* { dg-final { scan-assembler-times {\tvmul\.vv} 14 } } */
-=======
/* { dg-final { scan-assembler-times {\tvmul\.vv} 16 } } */
/* { dg-final { scan-assembler-times {\tvfmul\.vv} 6 } } */
->>>>>>> 1004a8ccd52 (fix uint8)
+/* { dg-final { scan-assembler-times {\tvfmul\.vv} 6 } } */
@@ -1,11 +1,8 @@
/* { dg-do compile } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
#include "vmul-template.h"
-<<<<<<< HEAD
-/* { dg-final { scan-assembler-times {\tvmul\.vv} 14 } } */
-=======
/* { dg-final { scan-assembler-times {\tvmul\.vv} 16 } } */
/* { dg-final { scan-assembler-times {\tvfmul\.vv} 6 } } */
->>>>>>> 1004a8ccd52 (fix uint8)
+/* { dg-final { scan-assembler-times {\tvfmul\.vv} 6 } } */
@@ -25,6 +25,9 @@
TEST_TYPE(uint32_t) \
TEST_TYPE(int64_t) \
TEST_TYPE(uint64_t) \
+ TEST_TYPE(_Float16) \
+ TEST_TYPE(float) \
+ TEST_TYPE(double) \
TEST2_TYPE(int8_t) \
TEST2_TYPE(uint8_t) \
TEST2_TYPE(int16_t) \
@@ -32,6 +35,9 @@
TEST2_TYPE(int32_t) \
TEST2_TYPE(uint32_t) \
TEST2_TYPE(int64_t) \
- TEST2_TYPE(uint64_t)
+ TEST2_TYPE(uint64_t) \
+ TEST2_TYPE(_Float16) \
+ TEST2_TYPE(float) \
+ TEST2_TYPE(double) \
TEST_ALL()
new file mode 100644
@@ -0,0 +1,37 @@
+/* { dg-do run { target { riscv_zvfh_hw } } } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
+
+#include "vmul-template.h"
+
+#include <assert.h>
+
+#define SZ 512
+
+#define RUN(TYPE,VAL) \
+ TYPE a##TYPE[SZ]; \
+ TYPE b##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ a##TYPE[i] = 2; \
+ b##TYPE[i] = VAL; \
+ } \
+ vadd_##TYPE (a##TYPE, a##TYPE, b##TYPE, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (a##TYPE[i] == 2 * VAL);
+
+#define RUN2(TYPE,VAL) \
+ TYPE as##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ as##TYPE[i] = 3; \
+ vadds_##TYPE (as##TYPE, as##TYPE, VAL, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (as##TYPE[i] == 3 * VAL);
+
+#define RUN_ALL() \
+ RUN(_Float16, 4) \
+ RUN2(_Float16, 10) \
+
+int main ()
+{
+ RUN_ALL()
+}
@@ -1,5 +1,4 @@
-/* { dg-do compile } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
#include "vrem-template.h"
@@ -1,5 +1,5 @@
/* { dg-do run { target { riscv_vector_hw } } } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
#include "vsub-template.h"
@@ -52,6 +52,8 @@
RUN(uint32_t, 4) \
RUN(int64_t, 5) \
RUN(uint64_t, 6) \
+ RUN(float, 5) \
+ RUN(double, 6) \
RUN2(int8_t, 7) \
RUN2(uint8_t, 8) \
RUN2(int16_t, 7) \
@@ -60,6 +62,8 @@
RUN2(uint32_t, 10) \
RUN2(int64_t, 11) \
RUN2(uint64_t, 12) \
+ RUN2(float, 11) \
+ RUN2(double, 12) \
RUN3(int8_t) \
RUN3(uint8_t) \
RUN3(int16_t) \
@@ -68,6 +72,8 @@
RUN3(uint32_t) \
RUN3(int64_t) \
RUN3(uint64_t) \
+ RUN3(float) \
+ RUN3(double) \
RUN4(int8_t) \
RUN4(uint8_t) \
RUN4(int16_t) \
@@ -75,7 +81,9 @@
RUN4(int32_t) \
RUN4(uint32_t) \
RUN4(int64_t) \
- RUN4(uint64_t)
+ RUN4(uint64_t) \
+ RUN4(float) \
+ RUN4(double) \
int main ()
{
@@ -1,7 +1,13 @@
/* { dg-do compile } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
#include "vsub-template.h"
/* { dg-final { scan-assembler-times {\tvsub\.vv} 16 } } */
/* { dg-final { scan-assembler-times {\tvrsub\.vi} 16 } } */
+
+/* { dg-final { scan-assembler-times {\tvfsub\.vv} 12 } } */
+
+/* Do not expect vfrsub for now, because we do not properly
+ handle vop.vx and vfop.vf yet. */
+/* { dg-final { scan-assembler-times {\tvfrsub\.vv} 0 } } */
@@ -1,7 +1,13 @@
/* { dg-do compile } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
#include "vsub-template.h"
/* { dg-final { scan-assembler-times {\tvsub\.vv} 16 } } */
/* { dg-final { scan-assembler-times {\tvrsub\.vi} 16 } } */
+
+/* { dg-final { scan-assembler-times {\tvfsub\.vv} 12 } } */
+
+/* Do not expect vfrsub for now, because we do not properly
+ handle vop.vx and vfop.vf yet. */
+/* { dg-final { scan-assembler-times {\tvfrsub\.vv} 0 } } */
@@ -41,8 +41,11 @@
TEST_TYPE(uint32_t) \
TEST_TYPE(int64_t) \
TEST_TYPE(uint64_t) \
- TEST2_TYPE(int8_t) \
+ TEST_TYPE(_Float16) \
+ TEST_TYPE(float) \
+ TEST_TYPE(double) \
+ TEST2_TYPE(int8_t) \
TEST2_TYPE(uint8_t) \
TEST2_TYPE(int16_t) \
TEST2_TYPE(uint16_t) \
@@ -50,6 +53,9 @@
TEST2_TYPE(uint32_t) \
TEST2_TYPE(int64_t) \
TEST2_TYPE(uint64_t)
+ TEST2_TYPE(_Float16) \
+ TEST2_TYPE(float) \
+ TEST2_TYPE(double) \
TEST3_TYPE(int8_t) \
TEST3_TYPE(uint8_t) \
@@ -59,6 +65,9 @@
TEST3_TYPE(uint32_t) \
TEST3_TYPE(int64_t) \
TEST3_TYPE(uint64_t) \
+ TEST3_TYPE(_Float16) \
+ TEST3_TYPE(float) \
+ TEST3_TYPE(double) \
TEST4_TYPE(int8_t) \
TEST4_TYPE(uint8_t) \
@@ -67,6 +76,9 @@
TEST4_TYPE(int32_t) \
TEST4_TYPE(uint32_t) \
TEST4_TYPE(int64_t) \
- TEST4_TYPE(uint64_t)
+ TEST4_TYPE(uint64_t) \
+ TEST4_TYPE(_Float16) \
+ TEST4_TYPE(float) \
+ TEST4_TYPE(double) \
TEST_ALL()
new file mode 100644
@@ -0,0 +1,55 @@
+/* { dg-do run { target { riscv_zvfh_hw } } } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
+
+#include "vsub-template.h"
+
+#include <assert.h>
+
+#define SZ 512
+
+#define RUN(TYPE,VAL) \
+ TYPE a##TYPE[SZ]; \
+ TYPE b##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ a##TYPE[i] = 999; \
+ b##TYPE[i] = VAL; \
+ } \
+ vsub_##TYPE (a##TYPE, a##TYPE, b##TYPE, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (a##TYPE[i] == 999 - VAL);
+
+#define RUN2(TYPE,VAL) \
+ TYPE as##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ as##TYPE[i] = 999; \
+ vsubs_##TYPE (as##TYPE, as##TYPE, VAL, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (as##TYPE[i] == 999 - VAL);
+
+#define RUN3(TYPE) \
+ TYPE as2##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ as2##TYPE[i] = i * 33 - 779; \
+ vsubi_##TYPE (as2##TYPE, as2##TYPE, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (as2##TYPE[i] == (TYPE)(-16 - (i * 33 - 779)));
+
+#define RUN4(TYPE) \
+ TYPE as3##TYPE[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ as3##TYPE[i] = i * -17 + 667; \
+ vsubi2_##TYPE (as3##TYPE, as3##TYPE, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (as3##TYPE[i] == (TYPE)(15 - (i * -17 + 667)));
+
+#define RUN_ALL() \
+ RUN(_Float16, 4) \
+ RUN2(_Float16, 10) \
+ RUN3(_Float16) \
+ RUN4(_Float16) \
+
+int main ()
+{
+ RUN_ALL()
+}