commit 49a1a340ea0eef681f23b6861f3cdb6840aadd99
Author: Roger Sayle <roger@nextmovesoftware.com>
Date: Tue Mar 5 11:06:17 2024 +0100
AVR: Improve output of insn "*insv.any_shift.<mode>_split".
The instructions printed by insn "*insv.any_shift.<mode>_split" were
sub-optimal. The code to print the improved output is lengthy and
performed by new function avr_out_insv. As it turns out, the function
can also handle shift-offsets of zero, which is "*andhi3", "*andpsi3"
and "*andsi3". Thus, these tree insns get a new 3-operand alternative
where the 3rd operand is an exact power of 2.
gcc/
* config/avr/avr-protos.h (avr_out_insv): New proto.
* config/avr/avr.cc (avr_out_insv): New function.
(avr_adjust_insn_length) [ADJUST_LEN_INSV]: Handle case.
(avr_cbranch_cost) [ZERO_EXTRACT]: Adjust rtx costs.
* config/avr/avr.md (define_attr "adjust_len") Add insv.
(andhi3, *andhi3, andpsi3, *andpsi3, andsi3, *andsi3):
Add constraint alternative where the 3rd operand is a power
of 2, and the source register may differ from the destination.
(*insv.any_shift.<mode>_split): Call avr_out_insv to output
instructions. Set attr "length" to "insv".
* config/avr/constraints.md (Cb2, Cb3, Cb4): New constraints.
gcc/testsuite/
* gcc.target/avr/torture/insv-anyshift-hi.c: New test.
* gcc.target/avr/torture/insv-anyshift-si.c: New test.
@@ -58,6 +58,7 @@ extern const char *ret_cond_branch (rtx x, int len, int reverse);
extern const char *avr_out_movpsi (rtx_insn *, rtx*, int*);
extern const char *avr_out_sign_extend (rtx_insn *, rtx*, int*);
extern const char *avr_out_insert_notbit (rtx_insn *, rtx*, int*);
+extern const char *avr_out_insv (rtx_insn *, rtx*, int*);
extern const char *avr_out_extr (rtx_insn *, rtx*, int*);
extern const char *avr_out_extr_not (rtx_insn *, rtx*, int*);
extern const char *avr_out_plus_set_ZN (rtx*, int*);
@@ -9795,6 +9795,178 @@ avr_out_insert_notbit (rtx_insn *insn, rtx op[], int *plen)
}
+/* Output instructions for XOP[0] = (XOP[1] <Shift> XOP[2]) & XOP[3] where
+ - XOP[0] and XOP[1] have the same mode which is one of: QI, HI, PSI, SI.
+ - XOP[3] is an exact const_int power of 2.
+ - XOP[2] and XOP[3] are const_int.
+ - <Shift> is any of: ASHIFT, LSHIFTRT, ASHIFTRT.
+ - The result depends on XOP[1].
+ or XOP[0] = XOP[1] & XOP[2] where
+ - XOP[0] and XOP[1] have the same mode which is one of: HI, PSI, SI.
+ - XOP[2] is an exact const_int power of 2.
+ Returns "".
+ PLEN != 0: Set *PLEN to the code length in words. Don't output anything.
+ PLEN == 0: Output instructions. */
+
+const char*
+avr_out_insv (rtx_insn *insn, rtx xop[], int *plen)
+{
+ machine_mode mode = GET_MODE (xop[0]);
+ int n_bytes = GET_MODE_SIZE (mode);
+ rtx xsrc = SET_SRC (single_set (insn));
+
+ gcc_assert (AND == GET_CODE (xsrc));
+
+ rtx xop2 = xop[2];
+ rtx xop3 = xop[3];
+
+ if (REG_P (XEXP (xsrc, 0)))
+ {
+ // This function can also handle AND with an exact power of 2,
+ // which can be regarded as a XOP[1] shift with offset 0.
+ rtx xshift = gen_rtx_ASHIFT (mode, xop[1], const0_rtx);
+ xsrc = gen_rtx_AND (mode, xshift, xop[2]);
+ xop3 = xop[2];
+ xop2 = const0_rtx;
+ }
+
+ // Any of ASHIFT, LSHIFTRT, ASHIFTRT.
+ enum rtx_code code = GET_CODE (XEXP (xsrc, 0));
+ int shift = code == ASHIFT ? INTVAL (xop2) : -INTVAL (xop2);
+
+ // Determines the position of the output bit.
+ unsigned mask = GET_MODE_MASK (mode) & INTVAL (xop3);
+
+ // Position of the output / input bit, respectively.
+ int obit = exact_log2 (mask);
+ int ibit = obit - shift;
+
+ gcc_assert (IN_RANGE (obit, 0, GET_MODE_BITSIZE (mode) - 1));
+ gcc_assert (IN_RANGE (ibit, 0, GET_MODE_BITSIZE (mode) - 1));
+
+ // In the remainder, use the sub-bytes that hold the bits.
+ rtx op[4] =
+ {
+ // Output
+ simplify_gen_subreg (QImode, xop[0], mode, obit / 8),
+ GEN_INT (obit & 7),
+ // Input
+ simplify_gen_subreg (QImode, xop[1], mode, ibit / 8),
+ GEN_INT (ibit & 7)
+ };
+ obit &= 7;
+ ibit &= 7;
+
+ // The length of the default sequence at the end of this function.
+ // We only emit anything other than the default when we find a sequence
+ // that is strictly shorter than the default sequence; which is:
+ // BST + <CLR-result-bytes> + BLD.
+ const int len0 = 2 + n_bytes - (n_bytes == 4 && AVR_HAVE_MOVW);
+
+ // Finding something shorter than the default sequence implies that there
+ // must be at most 2 instructions that deal with the bytes containing the
+ // relevant bits. In addition, we need N_BYTES - 1 instructions to clear
+ // the remaining result bytes.
+
+ const int n_clr = n_bytes - 1;
+ bool clr_p = false;
+ bool andi_p = false;
+
+ if (plen)
+ *plen = 0;
+
+ if (REGNO (op[0]) == REGNO (op[2])
+ // Output reg allows ANDI.
+ && test_hard_reg_class (LD_REGS, op[0]))
+ {
+ if (1 + n_clr < len0
+ // Same byte and bit: A single ANDI will do.
+ && obit == ibit)
+ {
+ clr_p = andi_p = true;
+ }
+ else if (2 + n_clr < len0
+ // |obit - ibit| = 4: SWAP + ANDI will do.
+ && (obit == ibit + 4 || obit == ibit - 4))
+ {
+ avr_asm_len ("swap %0", op, plen, 1);
+ clr_p = andi_p = true;
+ }
+ else if (2 + n_clr < len0
+ // LSL + ANDI will do.
+ && obit == ibit + 1)
+ {
+ avr_asm_len ("lsl %0", op, plen, 1);
+ clr_p = andi_p = true;
+ }
+ else if (2 + n_clr < len0
+ // LSR + ANDI will do.
+ && obit == ibit - 1)
+ {
+ avr_asm_len ("lsr %0", op, plen, 1);
+ clr_p = andi_p = true;
+ }
+ }
+
+ if (REGNO (op[0]) != REGNO (op[2])
+ && obit == ibit)
+ {
+ if (2 + n_clr < len0
+ // Same bit but different byte: MOV + ANDI will do.
+ && test_hard_reg_class (LD_REGS, op[0]))
+ {
+ avr_asm_len ("mov %0,%2", op, plen, 1);
+ clr_p = andi_p = true;
+ }
+ else if (2 + n_clr < len0
+ // Same bit but different byte: We can use ANDI + MOV,
+ // but only if the input byte is LD_REGS and unused after.
+ && test_hard_reg_class (LD_REGS, op[2])
+ && reg_unused_after (insn, op[2]))
+ {
+ avr_asm_len ("andi %2,1<<%3" CR_TAB
+ "mov %0,%2", op, plen, 2);
+ clr_p = true;
+ }
+ }
+
+ // Output remaining instructions of the shorter sequence.
+
+ if (andi_p)
+ avr_asm_len ("andi %0,1<<%1", op, plen, 1);
+
+ if (clr_p)
+ {
+ for (int b = 0; b < n_bytes; ++b)
+ {
+ rtx byte = simplify_gen_subreg (QImode, xop[0], mode, b);
+ if (REGNO (byte) != REGNO (op[0]))
+ avr_asm_len ("clr %0", &byte, plen, 1);
+ }
+
+ // CLR_P means we found a shorter sequence, so we are done now.
+ return "";
+ }
+
+ // No shorter sequence found, just emit BST, CLR*, BLD sequence.
+
+ avr_asm_len ("bst %2,%3", op, plen, -1);
+
+ if (n_bytes == 4 && AVR_HAVE_MOVW)
+ avr_asm_len ("clr %A0" CR_TAB
+ "clr %B0" CR_TAB
+ "movw %C0,%A0", xop, plen, 3);
+ else
+ for (int b = 0; b < n_bytes; ++b)
+ {
+ rtx byte = simplify_gen_subreg (QImode, xop[0], mode, b);
+ avr_asm_len ("clr %0", &byte, plen, 1);
+ }
+
+ return avr_asm_len ("bld %0,%1", op, plen, 1);
+}
+
+
/* Output instructions to extract a bit to 8-bit register XOP[0].
The input XOP[1] is a register or an 8-bit MEM in the lower I/O range.
XOP[2] is the const_int bit position. Return "".
@@ -10721,6 +10893,7 @@ avr_adjust_insn_length (rtx_insn *insn, int len)
case ADJUST_LEN_OUT_BITOP: avr_out_bitop (insn, op, &len); break;
case ADJUST_LEN_EXTR_NOT: avr_out_extr_not (insn, op, &len); break;
case ADJUST_LEN_EXTR: avr_out_extr (insn, op, &len); break;
+ case ADJUST_LEN_INSV: avr_out_insv (insn, op, &len); break;
case ADJUST_LEN_PLUS: avr_out_plus (insn, op, &len); break;
case ADJUST_LEN_ADDTO_SP: avr_out_addto_sp (op, &len); break;
@@ -12206,6 +12379,14 @@ avr_cbranch_cost (rtx x)
return COSTS_N_INSNS (size + 1 + 1);
}
+ if (GET_CODE (xreg) == ZERO_EXTRACT
+ && XEXP (xreg, 1) == const1_rtx)
+ {
+ // Branch on a single bit, with an additional edge due to less
+ // register pressure.
+ return (int) COSTS_N_INSNS (1.5);
+ }
+
bool reg_p = register_operand (xreg, mode);
bool reg_or_0_p = reg_or_0_operand (xval, mode);
@@ -170,7 +170,7 @@ (define_attr "adjust_len"
ashlhi, ashrhi, lshrhi,
ashlsi, ashrsi, lshrsi,
ashlpsi, ashrpsi, lshrpsi,
- insert_bits, insv_notbit,
+ insert_bits, insv_notbit, insv,
add_set_ZN, cmp_uext, cmp_sext,
no"
(const_string "no"))
@@ -4380,10 +4380,10 @@ (define_insn "*andqi3"
[(set_attr "length" "1,1,2")])
(define_insn_and_split "andhi3"
- [(set (match_operand:HI 0 "register_operand" "=??r,d,d,r ,r")
- (and:HI (match_operand:HI 1 "register_operand" "%0,0,0,0 ,0")
- (match_operand:HI 2 "nonmemory_operand" "r,s,n,Ca2,n")))
- (clobber (match_scratch:QI 3 "=X,X,X,X ,&d"))]
+ [(set (match_operand:HI 0 "register_operand" "=??r,d,d,r ,r ,r")
+ (and:HI (match_operand:HI 1 "register_operand" "%0,0,0,0 ,r ,0")
+ (match_operand:HI 2 "nonmemory_operand" "r,s,n,Ca2,Cb2,n")))
+ (clobber (match_scratch:QI 3 "=X,X,X,X ,X ,&d"))]
""
"#"
"&& reload_completed"
@@ -4394,10 +4394,10 @@ (define_insn_and_split "andhi3"
(clobber (reg:CC REG_CC))])])
(define_insn "*andhi3"
- [(set (match_operand:HI 0 "register_operand" "=??r,d,d,r ,r")
- (and:HI (match_operand:HI 1 "register_operand" "%0,0,0,0 ,0")
- (match_operand:HI 2 "nonmemory_operand" "r,s,n,Ca2,n")))
- (clobber (match_scratch:QI 3 "=X,X,X,X ,&d"))
+ [(set (match_operand:HI 0 "register_operand" "=??r,d,d,r ,r ,r")
+ (and:HI (match_operand:HI 1 "register_operand" "%0,0,0,0 ,r ,0")
+ (match_operand:HI 2 "nonmemory_operand" "r,s,n,Ca2,Cb2,n")))
+ (clobber (match_scratch:QI 3 "=X,X,X,X ,X ,&d"))
(clobber (reg:CC REG_CC))]
"reload_completed"
{
@@ -4405,17 +4405,19 @@ (define_insn "*andhi3"
return "and %A0,%A2\;and %B0,%B2";
else if (which_alternative == 1)
return "andi %A0,lo8(%2)\;andi %B0,hi8(%2)";
+ else if (which_alternative == 4)
+ return avr_out_insv (insn, operands, NULL);
return avr_out_bitop (insn, operands, NULL);
}
- [(set_attr "length" "2,2,2,4,4")
- (set_attr "adjust_len" "*,*,out_bitop,out_bitop,out_bitop")])
+ [(set_attr "length" "2,2,2,4,4,4")
+ (set_attr "adjust_len" "*,*,out_bitop,out_bitop,insv,out_bitop")])
(define_insn_and_split "andpsi3"
- [(set (match_operand:PSI 0 "register_operand" "=??r,d,r ,r")
- (and:PSI (match_operand:PSI 1 "register_operand" "%0,0,0 ,0")
- (match_operand:PSI 2 "nonmemory_operand" "r,n,Ca3,n")))
- (clobber (match_scratch:QI 3 "=X,X,X ,&d"))]
+ [(set (match_operand:PSI 0 "register_operand" "=??r,d,r ,r ,r")
+ (and:PSI (match_operand:PSI 1 "register_operand" "%0,0,0 ,r ,0")
+ (match_operand:PSI 2 "nonmemory_operand" "r,n,Ca3,Cb3,n")))
+ (clobber (match_scratch:QI 3 "=X,X,X ,X ,&d"))]
""
"#"
"&& reload_completed"
@@ -4426,10 +4428,10 @@ (define_insn_and_split "andpsi3"
(clobber (reg:CC REG_CC))])])
(define_insn "*andpsi3"
- [(set (match_operand:PSI 0 "register_operand" "=??r,d,r ,r")
- (and:PSI (match_operand:PSI 1 "register_operand" "%0,0,0 ,0")
- (match_operand:PSI 2 "nonmemory_operand" "r,n,Ca3,n")))
- (clobber (match_scratch:QI 3 "=X,X,X ,&d"))
+ [(set (match_operand:PSI 0 "register_operand" "=??r,d,r ,r ,r")
+ (and:PSI (match_operand:PSI 1 "register_operand" "%0,0,0 ,r ,0")
+ (match_operand:PSI 2 "nonmemory_operand" "r,n,Ca3,Cb3,n")))
+ (clobber (match_scratch:QI 3 "=X,X,X ,X ,&d"))
(clobber (reg:CC REG_CC))]
"reload_completed"
{
@@ -4438,16 +4440,19 @@ (define_insn "*andpsi3"
"and %B0,%B2" CR_TAB
"and %C0,%C2";
+ if (which_alternative == 3)
+ return avr_out_insv (insn, operands, NULL);
+
return avr_out_bitop (insn, operands, NULL);
}
- [(set_attr "length" "3,3,6,6")
- (set_attr "adjust_len" "*,out_bitop,out_bitop,out_bitop")])
+ [(set_attr "length" "3,3,6,5,6")
+ (set_attr "adjust_len" "*,out_bitop,out_bitop,insv,out_bitop")])
(define_insn_and_split "andsi3"
- [(set (match_operand:SI 0 "register_operand" "=??r,d,r ,r")
- (and:SI (match_operand:SI 1 "register_operand" "%0,0,0 ,0")
- (match_operand:SI 2 "nonmemory_operand" "r,n,Ca4,n")))
- (clobber (match_scratch:QI 3 "=X,X,X ,&d"))]
+ [(set (match_operand:SI 0 "register_operand" "=??r,d,r ,r ,r")
+ (and:SI (match_operand:SI 1 "register_operand" "%0,0,0 ,r ,0")
+ (match_operand:SI 2 "nonmemory_operand" "r,n,Ca4,Cb4,n")))
+ (clobber (match_scratch:QI 3 "=X,X,X ,X ,&d"))]
""
"#"
"&& reload_completed"
@@ -4458,10 +4463,10 @@ (define_insn_and_split "andsi3"
(clobber (reg:CC REG_CC))])])
(define_insn "*andsi3"
- [(set (match_operand:SI 0 "register_operand" "=??r,d,r ,r")
- (and:SI (match_operand:SI 1 "register_operand" "%0,0,0 ,0")
- (match_operand:SI 2 "nonmemory_operand" "r,n,Ca4,n")))
- (clobber (match_scratch:QI 3 "=X,X,X ,&d"))
+ [(set (match_operand:SI 0 "register_operand" "=??r,d,r ,r ,r")
+ (and:SI (match_operand:SI 1 "register_operand" "%0,0,0 ,r ,0")
+ (match_operand:SI 2 "nonmemory_operand" "r,n,Ca4,Cb4,n")))
+ (clobber (match_scratch:QI 3 "=X,X,X ,X ,&d"))
(clobber (reg:CC REG_CC))]
"reload_completed"
{
@@ -4471,10 +4476,13 @@ (define_insn "*andsi3"
"and %C0,%C2" CR_TAB
"and %D0,%D2";
+ if (which_alternative == 3)
+ return avr_out_insv (insn, operands, NULL);
+
return avr_out_bitop (insn, operands, NULL);
}
- [(set_attr "length" "4,4,8,8")
- (set_attr "adjust_len" "*,out_bitop,out_bitop,out_bitop")])
+ [(set_attr "length" "4,4,8,6,8")
+ (set_attr "adjust_len" "*,out_bitop,out_bitop,insv,out_bitop")])
(define_peephole2 ; andi
[(parallel [(set (match_operand:QI 0 "d_register_operand" "")
@@ -9852,6 +9860,12 @@ (define_insn_and_split "*extzv.io.lsr7"
(const_int 1)
(const_int 7)))])
+;; This insn serves as a combine bridge because insn combine will only
+;; combine so much (3) insns at most. It's not actually an open coded
+;; bit-insertion but just a part of it. It may occur in other contexts
+;; than INSV though, and in such a case the code may be worse than without
+;; this pattern. We still have to emit code for it in that case because
+;; we cannot roll back.
(define_insn_and_split "*insv.any_shift.<mode>_split"
[(set (match_operand:QISI 0 "register_operand" "=r")
(and:QISI (any_shift:QISI (match_operand:QISI 1 "register_operand" "r")
@@ -9874,27 +9888,9 @@ (define_insn "*insv.any_shift.<mode>"
(clobber (reg:CC REG_CC))]
"reload_completed"
{
- int shift = <CODE> == ASHIFT ? INTVAL (operands[2]) : -INTVAL (operands[2]);
- int mask = GET_MODE_MASK (<MODE>mode) & INTVAL (operands[3]);
- // Position of the output / input bit, respectively.
- int obit = exact_log2 (mask);
- int ibit = obit - shift;
- gcc_assert (IN_RANGE (obit, 0, <MSB>));
- gcc_assert (IN_RANGE (ibit, 0, <MSB>));
- operands[3] = GEN_INT (obit);
- operands[2] = GEN_INT (ibit);
-
- if (<SIZE> == 1) return "bst %T1%T2\;clr %0\;" "bld %T0%T3";
- if (<SIZE> == 2) return "bst %T1%T2\;clr %A0\;clr %B0\;" "bld %T0%T3";
- if (<SIZE> == 3) return "bst %T1%T2\;clr %A0\;clr %B0\;clr %C0\;bld %T0%T3";
- return AVR_HAVE_MOVW
- ? "bst %T1%T2\;clr %A0\;clr %B0\;movw %C0,%A0\;" "bld %T0%T3"
- : "bst %T1%T2\;clr %A0\;clr %B0\;clr %C0\;clr %D0\;bld %T0%T3";
+ return avr_out_insv (insn, operands, nullptr);
}
- [(set (attr "length")
- (minus (symbol_ref "2 + <SIZE>")
- ; One less if we can use a MOVW to clear.
- (symbol_ref "<SIZE> == 4 && AVR_HAVE_MOVW")))])
+ [(set_attr "adjust_len" "insv")])
(define_insn_and_split "*extzv.<mode>hi2"
@@ -188,6 +188,21 @@ (define_constraint "Co4"
(and (match_code "const_int")
(match_test "avr_popcount_each_byte (op, 4, (1<<0) | (1<<1) | (1<<8))")))
+(define_constraint "Cb2"
+ "Constant 2-byte integer that has exactly 1 bit set."
+ (and (match_code "const_int")
+ (match_test "single_one_operand (op, HImode)")))
+
+(define_constraint "Cb3"
+ "Constant 3-byte integer that has exactly 1 bit set."
+ (and (match_code "const_int")
+ (match_test "single_one_operand (op, PSImode)")))
+
+(define_constraint "Cb4"
+ "Constant 4-byte integer that has exactly 1 bit set."
+ (and (match_code "const_int")
+ (match_test "single_one_operand (op, SImode)")))
+
(define_constraint "Cx2"
"Constant 2-byte integer that allows XOR without clobber register."
(and (match_code "const_int")
new file mode 100644
@@ -0,0 +1,141 @@
+/* { dg-do run } */
+/* { dg-additional-options { -fno-split-wide-types } } */
+
+typedef __UINT16_TYPE__ uint16_t;
+
+/* Testing inlined and completely folded versions of functions
+ against their non-inlined, non-folded counnterparts. */
+
+#define MK_FUN1(OBIT, LSR) \
+ static __inline__ __attribute__((__always_inline__)) \
+ uint16_t fun1_lsr_##OBIT##_##LSR##_ai (int x, uint16_t a) \
+ { \
+ (void) x; \
+ return (a >> LSR) & (1u << OBIT); \
+ } \
+ \
+ __attribute__((__noinline__,__noclone__)) \
+ uint16_t fun1_lsr_##OBIT##_##LSR##_ni (int x, uint16_t a) \
+ { \
+ return fun1_lsr_##OBIT##_##LSR##_ai (x, a); \
+ } \
+ \
+ void test_fun1_lsr_##OBIT##_##LSR (void) \
+ { \
+ if (fun1_lsr_##OBIT##_##LSR##_ni (0, 1u << (OBIT + LSR)) \
+ != fun1_lsr_##OBIT##_##LSR##_ai (0, 1u << (OBIT + LSR))) \
+ __builtin_abort(); \
+ \
+ if (fun1_lsr_##OBIT##_##LSR##_ni (0, 1u << (OBIT + LSR)) \
+ != fun1_lsr_##OBIT##_##LSR##_ai (0, -1u)) \
+ __builtin_abort(); \
+ }
+
+#define MK_FUN3(OBIT, LSR) \
+ static __inline__ __attribute__((__always_inline__)) \
+ uint16_t fun3_lsr_##OBIT##_##LSR##_ai (uint16_t a) \
+ { \
+ return (a >> LSR) & (1u << OBIT); \
+ } \
+ \
+ __attribute__((__noinline__,__noclone__)) \
+ uint16_t fun3_lsr_##OBIT##_##LSR##_ni (uint16_t a) \
+ { \
+ return fun3_lsr_##OBIT##_##LSR##_ai (a); \
+ } \
+ \
+ void test_fun3_lsr_##OBIT##_##LSR (void) \
+ { \
+ if (fun3_lsr_##OBIT##_##LSR##_ni (1u << (OBIT + LSR)) \
+ != fun3_lsr_##OBIT##_##LSR##_ai (1u << (OBIT + LSR))) \
+ __builtin_abort(); \
+ \
+ if (fun3_lsr_##OBIT##_##LSR##_ni (1u << (OBIT + LSR)) \
+ != fun3_lsr_##OBIT##_##LSR##_ai (-1u)) \
+ __builtin_abort(); \
+ }
+
+
+#define MK_FUN2(OBIT, LSL) \
+ static __inline__ __attribute__((__always_inline__)) \
+ uint16_t fun2_lsl_##OBIT##_##LSL##_ai (uint16_t a) \
+ { \
+ return (a << LSL) & (1u << OBIT); \
+ } \
+ \
+ __attribute__((__noinline__,__noclone__)) \
+ uint16_t fun2_lsl_##OBIT##_##LSL##_ni (uint16_t a) \
+ { \
+ return fun2_lsl_##OBIT##_##LSL##_ai (a); \
+ } \
+ \
+ void test_fun2_lsl_##OBIT##_##LSL (void) \
+ { \
+ if (fun2_lsl_##OBIT##_##LSL##_ni (1u << (OBIT - LSL)) \
+ != fun2_lsl_##OBIT##_##LSL##_ai (1u << (OBIT - LSL))) \
+ __builtin_abort(); \
+ \
+ if (fun2_lsl_##OBIT##_##LSL##_ni (1u << (OBIT - LSL)) \
+ != fun2_lsl_##OBIT##_##LSL##_ai (-1u)) \
+ __builtin_abort(); \
+ }
+
+
+MK_FUN1 (10, 4)
+MK_FUN1 (6, 1)
+MK_FUN1 (1, 5)
+MK_FUN1 (0, 8)
+MK_FUN1 (0, 4)
+MK_FUN1 (0, 1)
+MK_FUN1 (0, 0)
+
+MK_FUN3 (10, 4)
+MK_FUN3 (6, 1)
+MK_FUN3 (1, 5)
+MK_FUN3 (0, 8)
+MK_FUN3 (0, 4)
+MK_FUN3 (0, 1)
+MK_FUN3 (0, 0)
+
+MK_FUN2 (12, 8)
+MK_FUN2 (15, 15)
+MK_FUN2 (14, 12)
+MK_FUN2 (8, 8)
+MK_FUN2 (7, 4)
+MK_FUN2 (5, 4)
+MK_FUN2 (5, 1)
+MK_FUN2 (4, 0)
+MK_FUN2 (1, 0)
+MK_FUN2 (0, 0)
+
+int main (void)
+{
+ test_fun1_lsr_10_4 ();
+ test_fun1_lsr_6_1 ();
+ test_fun1_lsr_1_5 ();
+ test_fun1_lsr_0_8 ();
+ test_fun1_lsr_0_4 ();
+ test_fun1_lsr_0_1 ();
+ test_fun1_lsr_0_0 ();
+
+ test_fun3_lsr_10_4 ();
+ test_fun3_lsr_6_1 ();
+ test_fun3_lsr_1_5 ();
+ test_fun3_lsr_0_8 ();
+ test_fun3_lsr_0_4 ();
+ test_fun3_lsr_0_1 ();
+ test_fun3_lsr_0_0 ();
+
+ test_fun2_lsl_12_8 ();
+ test_fun2_lsl_15_15 ();
+ test_fun2_lsl_14_12 ();
+ test_fun2_lsl_8_8 ();
+ test_fun2_lsl_7_4 ();
+ test_fun2_lsl_5_4 ();
+ test_fun2_lsl_5_1 ();
+ test_fun2_lsl_4_0 ();
+ test_fun2_lsl_1_0 ();
+ test_fun2_lsl_0_0 ();
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,89 @@
+/* { dg-do run } */
+
+typedef __UINT32_TYPE__ uint32_t;
+
+/* Testing inlined and completely folded versions of functions
+ against their non-inlined, non-folded counnterparts. */
+
+#define MK_FUN1(OBIT, LSR) \
+ static __inline__ __attribute__((__always_inline__)) \
+ uint32_t fun1_lsr_##OBIT##_##LSR##_ai (int x, uint32_t a) \
+ { \
+ (void) x; \
+ return (a >> LSR) & (1ul << OBIT); \
+ } \
+ \
+ __attribute__((__noinline__,__noclone__)) \
+ uint32_t fun1_lsr_##OBIT##_##LSR##_ni (int x, uint32_t a) \
+ { \
+ return fun1_lsr_##OBIT##_##LSR##_ai (x, a); \
+ } \
+ \
+ void test_fun1_lsr_##OBIT##_##LSR (void) \
+ { \
+ if (fun1_lsr_##OBIT##_##LSR##_ni (0, 1ul << (OBIT + LSR)) \
+ != fun1_lsr_##OBIT##_##LSR##_ai (0, 1ul << (OBIT + LSR))) \
+ __builtin_abort(); \
+ \
+ if (fun1_lsr_##OBIT##_##LSR##_ni (0, 1ul << (OBIT + LSR)) \
+ != fun1_lsr_##OBIT##_##LSR##_ai (0, -1ul)) \
+ __builtin_abort(); \
+ }
+
+
+#define MK_FUN2(OBIT, LSL) \
+ static __inline__ __attribute__((__always_inline__)) \
+ uint32_t fun2_lsl_##OBIT##_##LSL##_ai (int x, uint32_t a) \
+ { \
+ (void) x; \
+ return (a << LSL) & (1ul << OBIT); \
+ } \
+ \
+ __attribute__((__noinline__,__noclone__)) \
+ uint32_t fun2_lsl_##OBIT##_##LSL##_ni (int x, uint32_t a) \
+ { \
+ return fun2_lsl_##OBIT##_##LSL##_ai (x, a); \
+ } \
+ \
+ void test_fun2_lsl_##OBIT##_##LSL (void) \
+ { \
+ if (fun2_lsl_##OBIT##_##LSL##_ni (0, 1ul << (OBIT - LSL)) \
+ != fun2_lsl_##OBIT##_##LSL##_ai (0, 1ul << (OBIT - LSL))) \
+ __builtin_abort(); \
+ \
+ if (fun2_lsl_##OBIT##_##LSL##_ni (0, 1ul << (OBIT - LSL)) \
+ != fun2_lsl_##OBIT##_##LSL##_ai (0, -1ul)) \
+ __builtin_abort(); \
+ }
+
+
+MK_FUN1 (13, 15)
+MK_FUN1 (13, 16)
+MK_FUN1 (13, 17)
+MK_FUN1 (13, 12)
+MK_FUN1 (0, 31)
+MK_FUN1 (0, 8)
+MK_FUN1 (0, 0)
+
+MK_FUN2 (12, 8)
+MK_FUN2 (13, 8)
+MK_FUN2 (16, 8)
+MK_FUN2 (16, 0)
+
+int main (void)
+{
+ test_fun1_lsr_13_15 ();
+ test_fun1_lsr_13_16 ();
+ test_fun1_lsr_13_17 ();
+ test_fun1_lsr_13_12 ();
+ test_fun1_lsr_0_31 ();
+ test_fun1_lsr_0_8 ();
+ test_fun1_lsr_0_0 ();
+
+ test_fun2_lsl_12_8 ();
+ test_fun2_lsl_13_8 ();
+ test_fun2_lsl_16_8 ();
+ test_fun2_lsl_16_0 ();
+
+ return 0;
+}