@@ -27,12 +27,6 @@ (define_predicate "arith_operand"
(ior (match_operand 0 "const_arith_operand")
(match_operand 0 "register_operand")))
-(define_predicate "arith_operand_or_mode_mask"
- (ior (match_operand 0 "arith_operand")
- (and (match_code "const_int")
- (match_test "UINTVAL (op) == GET_MODE_MASK (HImode)
- || UINTVAL (op) == GET_MODE_MASK (SImode)"))))
-
(define_predicate "lui_operand"
(and (match_code "const_int")
(match_test "LUI_OPERAND (INTVAL (op))")))
@@ -398,6 +392,14 @@ (define_predicate "not_single_bit_mask_operand"
(and (match_code "const_int")
(match_test "SINGLE_BIT_MASK_OPERAND (~UINTVAL (op))")))
+(define_predicate "arith_or_mode_mask_or_zbs_operand"
+ (ior (match_operand 0 "arith_operand")
+ (and (match_test "TARGET_ZBS")
+ (match_operand 0 "not_single_bit_mask_operand"))
+ (and (match_code "const_int")
+ (match_test "UINTVAL (op) == GET_MODE_MASK (HImode)
+ || UINTVAL (op) == GET_MODE_MASK (SImode)"))))
+
(define_predicate "const_si_mask_operand"
(and (match_code "const_int")
(match_test "(INTVAL (op) & (GET_MODE_BITSIZE (SImode) - 1))
@@ -893,6 +893,40 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
codes[1].use_uw = false;
cost = 2;
}
+
+ /* If LUI/ADDI are going to set bits 32..63 and we need a small
+ number of them cleared, we might be able to use bclri profitably.
+
+ Note we may allow clearing of bit 31 using bclri. There's a class
+ of constants with that bit clear where this helps. */
+ else if (TARGET_64BIT
+ && TARGET_ZBS
+ && (32 - popcount_hwi (value & HOST_WIDE_INT_C (0xffffffff80000000))) + 1 < cost)
+ {
+ /* Turn on all those upper bits and synthesize the result. */
+ HOST_WIDE_INT nval = value | HOST_WIDE_INT_C (0xffffffff80000000);
+ alt_cost = riscv_build_integer_1 (alt_codes, nval, mode);
+
+ /* Now iterate over the bits we want to clear until the cost is
+ too high or we're done. */
+ nval = value ^ HOST_WIDE_INT_C (-1);
+ nval &= HOST_WIDE_INT_C (~0x7fffffff);
+ while (nval && alt_cost < cost)
+ {
+ HOST_WIDE_INT bit = ctz_hwi (nval);
+ alt_codes[alt_cost].code = AND;
+ alt_codes[alt_cost].value = ~(1UL << bit);
+ alt_codes[alt_cost].use_uw = false;
+ alt_cost++;
+ nval &= ~(1UL << bit);
+ }
+
+ if (nval == 0 && alt_cost <= cost)
+ {
+ memcpy (codes, alt_codes, sizeof (alt_codes));
+ cost = alt_cost;
+ }
+ }
}
if (cost > 2 && TARGET_64BIT && TARGET_ZBA)
@@ -1648,7 +1648,7 @@ (define_insn "smax<mode>3"
(define_expand "and<mode>3"
[(set (match_operand:X 0 "register_operand")
(and:X (match_operand:X 1 "register_operand")
- (match_operand:X 2 "arith_operand_or_mode_mask")))]
+ (match_operand:X 2 "arith_or_mode_mask_or_zbs_operand")))]
""
{
/* If the second operand is a mode mask, emit an extension
new file mode 100644
@@ -0,0 +1,95 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* We aggressively skip as we really just need to test the basic synthesis
+ which shouldn't vary based on the optimization level. -O1 seems to work
+ and eliminates the usual sources of extraneous dead code that would throw
+ off the counts. */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } } */
+/* { dg-options "-march=rv64gc_zba_zbb_zbs" } */
+
+/* Rather than test for a specific synthesis of all these constants or
+ having thousands of tests each testing one variant, we just test the
+ total number of instructions.
+
+ This isn't expected to change much and any change is worthy of a look. */
+/* { dg-final { scan-assembler-times "\\t(add|addi|bseti|bclri|li|ret|sh1add|sh2add|sh3add|slli)" 156 } } */
+
+
+unsigned long foo_0xfffffffbfffff7ff(void) { return 0xfffffffbfffff7ffUL; }
+
+unsigned long foo_0xfffffff7fffff7ff(void) { return 0xfffffff7fffff7ffUL; }
+
+unsigned long foo_0xffffffeffffff7ff(void) { return 0xffffffeffffff7ffUL; }
+
+unsigned long foo_0xffffffdffffff7ff(void) { return 0xffffffdffffff7ffUL; }
+
+unsigned long foo_0xffffffbffffff7ff(void) { return 0xffffffbffffff7ffUL; }
+
+unsigned long foo_0xffffff7ffffff7ff(void) { return 0xffffff7ffffff7ffUL; }
+
+unsigned long foo_0xfffffefffffff7ff(void) { return 0xfffffefffffff7ffUL; }
+
+unsigned long foo_0xfffffdfffffff7ff(void) { return 0xfffffdfffffff7ffUL; }
+
+unsigned long foo_0xfffffbfffffff7ff(void) { return 0xfffffbfffffff7ffUL; }
+
+unsigned long foo_0xfffff7fffffff7ff(void) { return 0xfffff7fffffff7ffUL; }
+
+unsigned long foo_0xffffeffffffff7ff(void) { return 0xffffeffffffff7ffUL; }
+
+unsigned long foo_0xffffdffffffff7ff(void) { return 0xffffdffffffff7ffUL; }
+
+unsigned long foo_0xffffbffffffff7ff(void) { return 0xffffbffffffff7ffUL; }
+
+unsigned long foo_0xffff7ffffffff7ff(void) { return 0xffff7ffffffff7ffUL; }
+
+unsigned long foo_0xfffefffffffff7ff(void) { return 0xfffefffffffff7ffUL; }
+
+unsigned long foo_0xfffdfffffffff7ff(void) { return 0xfffdfffffffff7ffUL; }
+
+unsigned long foo_0xfffbfffffffff7ff(void) { return 0xfffbfffffffff7ffUL; }
+
+unsigned long foo_0xfff7fffffffff7ff(void) { return 0xfff7fffffffff7ffUL; }
+
+unsigned long foo_0xffeffffffffff7ff(void) { return 0xffeffffffffff7ffUL; }
+
+unsigned long foo_0xffdffffffffff7ff(void) { return 0xffdffffffffff7ffUL; }
+
+unsigned long foo_0xffbffffffffff7ff(void) { return 0xffbffffffffff7ffUL; }
+
+unsigned long foo_0xff7ffffffffff7ff(void) { return 0xff7ffffffffff7ffUL; }
+
+unsigned long foo_0xfefffffffffff7ff(void) { return 0xfefffffffffff7ffUL; }
+
+unsigned long foo_0xfdfffffffffff7ff(void) { return 0xfdfffffffffff7ffUL; }
+
+unsigned long foo_0xfbfffffffffff7ff(void) { return 0xfbfffffffffff7ffUL; }
+
+unsigned long foo_0xf7fffffffffff7ff(void) { return 0xf7fffffffffff7ffUL; }
+
+unsigned long foo_0xeffffffffffff7ff(void) { return 0xeffffffffffff7ffUL; }
+
+unsigned long foo_0xdffffffffffff7ff(void) { return 0xdffffffffffff7ffUL; }
+
+unsigned long foo_0xbffffffffffff7ff(void) { return 0xbffffffffffff7ffUL; }
+
+unsigned long foo_0xffffffff7fffd7ff(void) { return 0xffffffff7fffd7ffUL; }
+
+unsigned long foo_0xffffffff7ffdf7ff(void) { return 0xffffffff7ffdf7ffUL; }
+
+unsigned long foo_0xffffffff7fdff7ff(void) { return 0xffffffff7fdff7ffUL; }
+
+unsigned long foo_0xffffffff7dfff7ff(void) { return 0xffffffff7dfff7ffUL; }
+
+unsigned long foo_0xffffffff5ffff7ff(void) { return 0xffffffff5ffff7ffUL; }
+
+unsigned long foo_0xfffff7ff7ffff7ff(void) { return 0xfffff7ff7ffff7ffUL; }
+
+unsigned long foo_0xffffdfff7ffff7ff(void) { return 0xffffdfff7ffff7ffUL; }
+
+unsigned long foo_0xffff7fff7ffff7ff(void) { return 0xffff7fff7ffff7ffUL; }
+
+unsigned long foo_0xfffdffff7ffff7ff(void) { return 0xfffdffff7ffff7ffUL; }
+
+
+