@@ -56,7 +56,7 @@ enum loongarch_symbol_type {
};
#define NUM_SYMBOL_TYPES (SYMBOL_TLSLDM + 1)
-/* Routines implemented in loongarch.c. */
+/* Routines implemented in loongarch.cc. */
extern rtx loongarch_emit_move (rtx, rtx);
extern HOST_WIDE_INT loongarch_initial_elimination_offset (int, int);
extern void loongarch_expand_prologue (void);
@@ -163,6 +163,8 @@ extern const char *current_section_name (void);
extern unsigned int current_section_flags (void);
extern bool loongarch_use_ins_ext_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
extern bool loongarch_check_zero_div_p (void);
+extern bool loongarch_pre_reload_split (void);
+extern int loongarch_use_bstrins_for_ior_with_mask (machine_mode, rtx *);
union loongarch_gen_fn_ptrs
{
@@ -5482,6 +5482,42 @@ loongarch_use_ins_ext_p (rtx op, HOST_WIDE_INT width, HOST_WIDE_INT bitpos)
return true;
}
+/* Predicate for pre-reload splitters with associated instructions,
+ which can match any time before the split1 pass (usually combine),
+ then are unconditionally split in that pass and should not be
+ matched again afterwards. */
+
+bool loongarch_pre_reload_split (void)
+{
+ return (can_create_pseudo_p ()
+ && !(cfun->curr_properties & PROP_rtl_split_insns));
+}
+
+/* Check if we can use bstrins.<d> for
+ op0 = (op1 & op2) | (op3 & op4)
+ where op0, op1, op3 are regs, and op2, op4 are integer constants. */
+int
+loongarch_use_bstrins_for_ior_with_mask (machine_mode mode, rtx *op)
+{
+ unsigned HOST_WIDE_INT mask1 = UINTVAL (op[2]);
+ unsigned HOST_WIDE_INT mask2 = UINTVAL (op[4]);
+
+ if (mask1 != ~mask2 || !mask1 || !mask2)
+ return 0;
+
+ /* Try to avoid a right-shift. */
+ if (low_bitmask_len (mode, mask1) != -1)
+ return -1;
+
+ if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1)
+ return 1;
+
+ if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1)
+ return -1;
+
+ return 0;
+}
+
/* Print the text for PRINT_OPERAND punctation character CH to FILE.
The punctuation characters are:
@@ -1322,6 +1322,97 @@ (define_insn "and<mode>3_extended"
[(set_attr "move_type" "pick_ins")
(set_attr "mode" "<MODE>")])
+(define_insn_and_split "*bstrins_<mode>_for_mask"
+ [(set (match_operand:GPR 0 "register_operand")
+ (and:GPR (match_operand:GPR 1 "register_operand")
+ (match_operand:GPR 2 "ins_zero_bitmask_operand")))]
+ ""
+ "#"
+ ""
+ [(set (match_dup 0) (match_dup 1))
+ (set (zero_extract:GPR (match_dup 0) (match_dup 2) (match_dup 3))
+ (const_int 0))]
+ {
+ unsigned HOST_WIDE_INT mask = ~UINTVAL (operands[2]);
+ int lo = ffs_hwi (mask) - 1;
+ int len = low_bitmask_len (<MODE>mode, mask >> lo);
+
+ len = MIN (len, GET_MODE_BITSIZE (<MODE>mode) - lo);
+ operands[2] = GEN_INT (len);
+ operands[3] = GEN_INT (lo);
+ })
+
+(define_insn_and_split "*bstrins_<mode>_for_ior_mask"
+ [(set (match_operand:GPR 0 "register_operand")
+ (ior:GPR (and:GPR (match_operand:GPR 1 "register_operand")
+ (match_operand:GPR 2 "const_int_operand"))
+ (and:GPR (match_operand:GPR 3 "register_operand")
+ (match_operand:GPR 4 "const_int_operand"))))]
+ "loongarch_pre_reload_split () && \
+ loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands)"
+ "#"
+ ""
+ [(set (match_dup 0) (match_dup 1))
+ (set (zero_extract:GPR (match_dup 0) (match_dup 2) (match_dup 4))
+ (match_dup 3))]
+ {
+ if (loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands) < 0)
+ {
+ std::swap (operands[1], operands[3]);
+ std::swap (operands[2], operands[4]);
+ }
+
+ unsigned HOST_WIDE_INT mask = ~UINTVAL (operands[2]);
+ int lo = ffs_hwi (mask) - 1;
+ int len = low_bitmask_len (<MODE>mode, mask >> lo);
+
+ len = MIN (len, GET_MODE_BITSIZE (<MODE>mode) - lo);
+ operands[2] = GEN_INT (len);
+ operands[4] = GEN_INT (lo);
+
+ if (lo)
+ {
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+ emit_move_insn (tmp, gen_rtx_ASHIFTRT(<MODE>mode, operands[3],
+ GEN_INT (lo)));
+ operands[3] = tmp;
+ }
+ })
+
+;; We always avoid the shift operation in bstrins_<mode>_for_ior_mask
+;; if possible, but the result may be sub-optimal when one of the masks
+;; is (1 << N) - 1 and one of the src register is the dest register.
+;; For example:
+;; move t0, a0
+;; move a0, a1
+;; bstrins.d a0, t0, 42, 0
+;; ret
+;; using a shift operation would be better:
+;; srai.d t0, a1, 43
+;; bstrins.d a0, t0, 63, 43
+;; ret
+;; unfortunately we cannot figure it out in split1: before reload we cannot
+;; know if the dest register is one of the src register. Fix it up in
+;; peephole2.
+(define_peephole2
+ [(set (match_operand:GPR 0 "register_operand")
+ (match_operand:GPR 1 "register_operand"))
+ (set (match_dup 1) (match_operand:GPR 2 "register_operand"))
+ (set (zero_extract:GPR (match_dup 1)
+ (match_operand:SI 3 "const_int_operand")
+ (const_int 0))
+ (match_dup 0))]
+ "peep2_reg_dead_p (3, operands[0])"
+ [(const_int 0)]
+ {
+ int len = GET_MODE_BITSIZE (<MODE>mode) - INTVAL (operands[3]);
+
+ emit_insn (gen_ashr<mode>3 (operands[0], operands[2], operands[3]));
+ emit_insn (gen_insv<mode> (operands[1], GEN_INT (len), operands[3],
+ operands[0]));
+ DONE;
+ })
+
(define_insn "*iorhi3"
[(set (match_operand:HI 0 "register_operand" "=r,r")
(ior:HI (match_operand:HI 1 "register_operand" "%r,r")
@@ -408,6 +408,14 @@ (define_predicate "fcc_reload_operand"
(define_predicate "muldiv_target_operand"
(match_operand 0 "register_operand"))
+(define_predicate "ins_zero_bitmask_operand"
+ (and (match_code "const_int")
+ (match_test "INTVAL (op) != -1")
+ (match_test "INTVAL (op) & 1")
+ (match_test "low_bitmask_len (mode, \
+ ~UINTVAL (op) | (~UINTVAL(op) - 1)) \
+ > 12")))
+
(define_predicate "const_call_insn_operand"
(match_code "const,symbol_ref,label_ref")
{
new file mode 100644
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-std=c++14 -O2 -march=loongarch64 -mabi=lp64d" } */
+/* { dg-final { scan-assembler "bstrins\\.d.*7,4" } } */
+/* { dg-final { scan-assembler "bstrins\\.d.*15,4" } } */
+/* { dg-final { scan-assembler "bstrins\\.d.*31,4" } } */
+/* { dg-final { scan-assembler "bstrins\\.d.*47,4" } } */
+/* { dg-final { scan-assembler "bstrins\\.d.*3,0" } } */
+
+typedef unsigned long u64;
+
+template <u64 mask>
+u64
+test (u64 a, u64 b)
+{
+ return (a & mask) | (b & ~mask);
+}
+
+template u64 test<0x0000'0000'0000'00f0l> (u64, u64);
+template u64 test<0x0000'0000'0000'fff0l> (u64, u64);
+template u64 test<0x0000'0000'ffff'fff0l> (u64, u64);
+template u64 test<0x0000'ffff'ffff'fff0l> (u64, u64);
+template u64 test<0xffff'ffff'ffff'fff0l> (u64, u64);
new file mode 100644
@@ -0,0 +1,65 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+typedef unsigned long gr;
+
+template <int l, int r>
+struct mask {
+ enum { value = (1ul << r) - (1ul << l) };
+};
+
+template <int l>
+struct mask<l, sizeof (gr) * __CHAR_BIT__> {
+ enum { value = -(1ul << l) };
+};
+
+__attribute__ ((noipa)) void
+test (gr a, gr b, gr mask, gr out)
+{
+ if (((a & mask) | (b & ~mask)) != out)
+ __builtin_abort ();
+}
+
+__attribute__ ((noipa)) gr
+no_optimize (gr x)
+{
+ return x;
+}
+
+template <int l, int r>
+struct test1 {
+ static void
+ run (void)
+ {
+ gr m = mask<l, r>::value;
+ gr a = no_optimize (-1ul);
+ gr b = no_optimize (0);
+
+ test (a, b, m, (a & m) | (b & ~m));
+ test (a, b, ~m, (a & ~m) | (b & m));
+ test (a, 0, ~m, a & ~m);
+
+ test1<l, r + 1>::run ();
+ }
+};
+
+template <int l>
+struct test1<l, sizeof (gr) * __CHAR_BIT__ + 1> {
+ static void run (void) {}
+};
+
+template <int l>
+void
+test2 (void)
+{
+ test1<l, l + 1>::run ();
+ test2<l + 1> ();
+}
+
+template <> void test2<sizeof (gr) * __CHAR_BIT__> (void) {}
+
+int
+main ()
+{
+ test2<0> ();
+}