===================================================================
@@ -155,10 +155,34 @@ (define_predicate "call_insn_operand"
(ior (match_test "register_operand (XEXP (op, 0), mode)")
(match_test "CONSTANT_ADDRESS_P (XEXP (op, 0))"))))
+;; For some insns we must ensure that no hard register is inserted
+;; into their operands because the insns are split and the split
+;; involves hard registers. An example are divmod insn that are
+;; split to insns that represent implicit library calls.
+
;; True for register that is pseudo register.
(define_predicate "pseudo_register_operand"
- (and (match_code "reg")
- (match_test "!HARD_REGISTER_P (op)")))
+ (and (match_operand 0 "register_operand")
+ (not (and (match_code "reg")
+ (match_test "HARD_REGISTER_P (op)")))))
+
+;; True for operand that is pseudo register or CONST_INT.
+(define_predicate "pseudo_register_or_const_int_operand"
+ (ior (match_operand 0 "const_int_operand")
+ (match_operand 0 "pseudo_register_operand")))
+
+;; We keep combiner from inserting hard registers into the input of sign- and
+;; zero-extends. A hard register in the input operand is not wanted because
+;; 32-bit multiply patterns clobber some hard registers and extends with a
+;; hard register that overlaps these clobbers won't combine to a widening
+;; multiplication. There is no need for combine to propagate or insert
+;; hard registers, register allocation can do it just as well.
+
+;; True for operand that is pseudo register at combine time.
+(define_predicate "combine_pseudo_register_operand"
+ (ior (match_operand 0 "pseudo_register_operand")
+ (and (match_operand 0 "register_operand")
+ (match_test "reload_completed || reload_in_progress"))))
;; Return true if OP is a constant integer that is either
;; 8 or 16 or 24.
@@ -189,3 +213,18 @@ (define_predicate "s9_operand"
(define_predicate "register_or_s9_operand"
(ior (match_operand 0 "register_operand")
(match_operand 0 "s9_operand")))
+
+;; Unsigned CONST_INT that fits in 16 bits, i.e. 0..65536.
+(define_predicate "u16_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 0, (1<<16)-1)")))
+
+;; Signed CONST_INT that fits in 16 bits, i.e. -32768..32767.
+(define_predicate "s16_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), -(1<<15), (1<<15)-1)")))
+
+;; One-extended CONST_INT that fits in 16 bits, i.e. -65536..-1.
+(define_predicate "o16_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), -(1<<16), -1)")))
===================================================================
@@ -72,10 +72,11 @@ see the files COPYING3 and COPYING.RUNTI
.endm
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
#if !defined (__AVR_HAVE_MUL__)
/*******************************************************
- Multiplication 8 x 8
+ Multiplication 8 x 8 without MUL
*******************************************************/
#if defined (L_mulqi3)
@@ -83,9 +84,7 @@ see the files COPYING3 and COPYING.RUNTI
#define r_arg1 r24 /* multiplier */
#define r_res __tmp_reg__ /* result */
- .global __mulqi3
- .func __mulqi3
-__mulqi3:
+DEFUN __mulqi3
clr r_res ; clear result
__mulqi3_loop:
sbrc r_arg1,0
@@ -97,18 +96,16 @@ __mulqi3_loop:
__mulqi3_exit:
mov r_arg1,r_res ; result to return register
ret
+ENDF __mulqi3
#undef r_arg2
#undef r_arg1
#undef r_res
-.endfunc
#endif /* defined (L_mulqi3) */
#if defined (L_mulqihi3)
- .global __mulqihi3
- .func __mulqihi3
-__mulqihi3:
+DEFUN __mulqihi3
clr r25
sbrc r24, 7
dec r25
@@ -116,21 +113,19 @@ __mulqihi3:
sbrc r22, 7
dec r22
rjmp __mulhi3
- .endfunc
+ENDF __mulqihi3:
#endif /* defined (L_mulqihi3) */
#if defined (L_umulqihi3)
- .global __umulqihi3
- .func __umulqihi3
-__umulqihi3:
+DEFUN __umulqihi3
clr r25
clr r23
rjmp __mulhi3
- .endfunc
+ENDF __umulqihi3
#endif /* defined (L_umulqihi3) */
/*******************************************************
- Multiplication 16 x 16
+ Multiplication 16 x 16 without MUL
*******************************************************/
#if defined (L_mulhi3)
#define r_arg1L r24 /* multiplier Low */
@@ -140,9 +135,7 @@ __umulqihi3:
#define r_resL __tmp_reg__ /* result Low */
#define r_resH r21 /* result High */
- .global __mulhi3
- .func __mulhi3
-__mulhi3:
+DEFUN __mulhi3
clr r_resH ; clear result
clr r_resL ; clear result
__mulhi3_loop:
@@ -166,6 +159,7 @@ __mulhi3_exit:
mov r_arg1H,r_resH ; result to return register
mov r_arg1L,r_resL
ret
+ENDF __mulhi3
#undef r_arg1L
#undef r_arg1H
@@ -174,168 +168,51 @@ __mulhi3_exit:
#undef r_resL
#undef r_resH
-.endfunc
#endif /* defined (L_mulhi3) */
-#endif /* !defined (__AVR_HAVE_MUL__) */
/*******************************************************
- Widening Multiplication 32 = 16 x 16
+ Widening Multiplication 32 = 16 x 16 without MUL
*******************************************************/
-
+
#if defined (L_mulhisi3)
DEFUN __mulhisi3
-#if defined (__AVR_HAVE_MUL__)
-
-;; r25:r22 = r19:r18 * r21:r20
-
-#define A0 18
-#define B0 20
-#define C0 22
-
-#define A1 A0+1
-#define B1 B0+1
-#define C1 C0+1
-#define C2 C0+2
-#define C3 C0+3
-
- ; C = (signed)A1 * (signed)B1
- muls A1, B1
- movw C2, R0
-
- ; C += A0 * B0
- mul A0, B0
- movw C0, R0
-
- ; C += (signed)A1 * B0
- mulsu A1, B0
- sbci C3, 0
- add C1, R0
- adc C2, R1
- clr __zero_reg__
- adc C3, __zero_reg__
-
- ; C += (signed)B1 * A0
- mulsu B1, A0
- sbci C3, 0
- XJMP __xmulhisi3_exit
-
-#undef A0
-#undef A1
-#undef B0
-#undef B1
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-
-#else /* !__AVR_HAVE_MUL__ */
;;; FIXME: This is dead code (noone calls it)
- mov_l r18, r24
- mov_h r19, r25
- clr r24
- sbrc r23, 7
- dec r24
- mov r25, r24
- clr r20
- sbrc r19, 7
- dec r20
- mov r21, r20
- XJMP __mulsi3
-#endif /* __AVR_HAVE_MUL__ */
+ mov_l r18, r24
+ mov_h r19, r25
+ clr r24
+ sbrc r23, 7
+ dec r24
+ mov r25, r24
+ clr r20
+ sbrc r19, 7
+ dec r20
+ mov r21, r20
+ XJMP __mulsi3
ENDF __mulhisi3
#endif /* defined (L_mulhisi3) */
#if defined (L_umulhisi3)
DEFUN __umulhisi3
-#if defined (__AVR_HAVE_MUL__)
-
-;; r25:r22 = r19:r18 * r21:r20
-
-#define A0 18
-#define B0 20
-#define C0 22
-
-#define A1 A0+1
-#define B1 B0+1
-#define C1 C0+1
-#define C2 C0+2
-#define C3 C0+3
-
- ; C = A1 * B1
- mul A1, B1
- movw C2, R0
-
- ; C += A0 * B0
- mul A0, B0
- movw C0, R0
-
- ; C += A1 * B0
- mul A1, B0
- add C1, R0
- adc C2, R1
- clr __zero_reg__
- adc C3, __zero_reg__
-
- ; C += B1 * A0
- mul B1, A0
- XJMP __xmulhisi3_exit
-
-#undef A0
-#undef A1
-#undef B0
-#undef B1
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-
-#else /* !__AVR_HAVE_MUL__ */
;;; FIXME: This is dead code (noone calls it)
- mov_l r18, r24
- mov_h r19, r25
- clr r24
- clr r25
- clr r20
- clr r21
- XJMP __mulsi3
-#endif /* __AVR_HAVE_MUL__ */
+ mov_l r18, r24
+ mov_h r19, r25
+ clr r24
+ clr r25
+ mov_l r20, r24
+ mov_h r21, r25
+ XJMP __mulsi3
ENDF __umulhisi3
#endif /* defined (L_umulhisi3) */
-#if defined (L_xmulhisi3_exit)
-
-;;; Helper for __mulhisi3 resp. __umulhisi3.
-
-#define C0 22
-#define C1 C0+1
-#define C2 C0+2
-#define C3 C0+3
-
-DEFUN __xmulhisi3_exit
- add C1, R0
- adc C2, R1
- clr __zero_reg__
- adc C3, __zero_reg__
- ret
-ENDF __xmulhisi3_exit
-
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-
-#endif /* defined (L_xmulhisi3_exit) */
-
#if defined (L_mulsi3)
/*******************************************************
- Multiplication 32 x 32
+ Multiplication 32 x 32 without MUL
*******************************************************/
#define r_arg1L r22 /* multiplier Low */
#define r_arg1H r23
#define r_arg1HL r24
#define r_arg1HH r25 /* multiplier High */
-
#define r_arg2L r18 /* multiplicand Low */
#define r_arg2H r19
#define r_arg2HL r20
@@ -346,43 +223,7 @@ ENDF __xmulhisi3_exit
#define r_resHL r30
#define r_resHH r31 /* result High */
-
- .global __mulsi3
- .func __mulsi3
-__mulsi3:
-#if defined (__AVR_HAVE_MUL__)
- mul r_arg1L, r_arg2L
- movw r_resL, r0
- mul r_arg1H, r_arg2H
- movw r_resHL, r0
- mul r_arg1HL, r_arg2L
- add r_resHL, r0
- adc r_resHH, r1
- mul r_arg1L, r_arg2HL
- add r_resHL, r0
- adc r_resHH, r1
- mul r_arg1HH, r_arg2L
- add r_resHH, r0
- mul r_arg1HL, r_arg2H
- add r_resHH, r0
- mul r_arg1H, r_arg2HL
- add r_resHH, r0
- mul r_arg1L, r_arg2HH
- add r_resHH, r0
- clr r_arg1HH ; use instead of __zero_reg__ to add carry
- mul r_arg1H, r_arg2L
- add r_resH, r0
- adc r_resHL, r1
- adc r_resHH, r_arg1HH ; add carry
- mul r_arg1L, r_arg2H
- add r_resH, r0
- adc r_resHL, r1
- adc r_resHH, r_arg1HH ; add carry
- movw r_arg1L, r_resL
- movw r_arg1HL, r_resHL
- clr r1 ; __zero_reg__ clobbered by "mul"
- ret
-#else
+DEFUN __mulsi3
clr r_resHH ; clear result
clr r_resHL ; clear result
clr r_resH ; clear result
@@ -414,13 +255,13 @@ __mulsi3_exit:
mov_h r_arg1H,r_resH
mov_l r_arg1L,r_resL
ret
-#endif /* defined (__AVR_HAVE_MUL__) */
+ENDF __mulsi3
+
#undef r_arg1L
#undef r_arg1H
#undef r_arg1HL
#undef r_arg1HH
-
#undef r_arg2L
#undef r_arg2H
#undef r_arg2HL
@@ -431,9 +272,183 @@ __mulsi3_exit:
#undef r_resHL
#undef r_resHH
-.endfunc
#endif /* defined (L_mulsi3) */
+
+#endif /* !defined (__AVR_HAVE_MUL__) */
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+#if defined (__AVR_HAVE_MUL__)
+#define A0 26
+#define B0 18
+#define C0 22
+
+#define A1 A0+1
+
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
+
+/*******************************************************
+ Widening Multiplication 32 = 16 x 16
+*******************************************************/
+
+#if defined (L_mulhisi3)
+;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
+;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulhisi3
+ XCALL __umulhisi3
+ ;; Sign-extend B
+ tst B1
+ brpl 1f
+ sub C2, A0
+ sbc C3, A1
+1: ;; Sign-extend A
+ XJMP __usmulhisi3_tail
+ENDF __mulhisi3
+#endif /* L_mulhisi3 */
+
+#if defined (L_usmulhisi3)
+;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
+;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __usmulhisi3
+ XCALL __umulhisi3
+ ;; FALLTHRU
+ENDF __usmulhisi3
+
+DEFUN __usmulhisi3_tail
+ ;; Sign-extend A
+ sbrs A1, 7
+ ret
+ sub C2, B0
+ sbc C3, B1
+ ret
+ENDF __usmulhisi3_tail
+#endif /* L_usmulhisi3 */
+
+#if defined (L_umulhisi3)
+;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
+;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __umulhisi3
+ mul A0, B0
+ movw C0, r0
+ mul A1, B1
+ movw C2, r0
+ mul A0, B1
+ add C1, r0
+ adc C2, r1
+ clr __zero_reg__
+ adc C3, __zero_reg__
+ mul A1, B0
+ add C1, r0
+ adc C2, r1
+ clr __zero_reg__
+ adc C3, __zero_reg__
+ ret
+ENDF __umulhisi3
+#endif /* L_umulhisi3 */
+
+/*******************************************************
+ Widening Multiplication 32 = 16 x 32
+*******************************************************/
+
+#if defined (L_mulshisi3)
+;;; R25:R22 = (signed long) R27:R26 * R21:R18
+;;; (C3:C0) = (signed long) A1:A0 * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulshisi3
+#ifdef __AVR_HAVE_JMP_CALL__
+ ;; Some cores have problem skipping 2-word instruction
+ tst A1
+ brmi __mulohisi3
+#else
+ sbrs A1, 7
+#endif /* __AVR_HAVE_JMP_CALL__ */
+ XJMP __muluhisi3
+ ;; FALLTHRU
+ENDF __mulshisi3
+
+;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
+;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulohisi3
+ XCALL __muluhisi3
+ ;; One-extend R27:R26 (A1:A0)
+ sub C2, B0
+ sbc C3, B1
+ ret
+ENDF __mulohisi3
+#endif /* L_mulshisi3 */
+
+#if defined (L_muluhisi3)
+;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
+;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __muluhisi3
+ XCALL __umulhisi3
+ mul A0, B3
+ add C3, r0
+ mul A1, B2
+ add C3, r0
+ mul A0, B2
+ add C2, r0
+ adc C3, r1
+ clr __zero_reg__
+ ret
+ENDF __muluhisi3
+#endif /* L_muluhisi3 */
+
+/*******************************************************
+ Multiplication 32 x 32
+*******************************************************/
+
+#if defined (L_mulsi3)
+;;; R25:R22 = R25:R22 * R21:R18
+;;; (C3:C0) = C3:C0 * B3:B0
+;;; Clobbers: R26, R27, __tmp_reg__
+DEFUN __mulsi3
+ movw A0, C0
+ push C2
+ push C3
+ XCALL __muluhisi3
+ pop A1
+ pop A0
+ ;; A1:A0 now contains the high word of A
+ mul A0, B0
+ add C2, r0
+ adc C3, r1
+ mul A0, B1
+ add C3, r0
+ mul A1, B0
+ add C3, r0
+ clr __zero_reg__
+ ret
+#endif /* L_mulsi3 */
+
+#undef A0
+#undef A1
+
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+
+#endif /* __AVR_HAVE_MUL__ */
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
/*******************************************************
Division 8 / 8 => (result + remainder)
*******************************************************/
===================================================================
@@ -126,12 +126,25 @@ (define_attr "length" ""
(const_int 2))]
(const_int 2)))
-;; Define mode iterator
+;; Define mode iterators
+(define_mode_iterator QIHI [(QI "") (HI "")])
+(define_mode_iterator QIHI2 [(QI "") (HI "")])
(define_mode_iterator QISI [(QI "") (HI "") (SI "")])
(define_mode_iterator QIDI [(QI "") (HI "") (SI "") (DI "")])
(define_mode_iterator HIDI [(HI "") (SI "") (DI "")])
(define_mode_iterator HISI [(HI "") (SI "")])
+;; Define code iterators
+;; Define two incarnations so that we can build the cross product.
+(define_code_iterator any_extend [sign_extend zero_extend])
+(define_code_iterator any_extend2 [sign_extend zero_extend])
+
+;; Define code attributes
+(define_code_attr extend_prefix
+ [(sign_extend "s")
+ (zero_extend "u")])
+
+
;;========================================================================
;; The following is used by nonlocal_goto and setjmp.
;; The receiver pattern will create no instructions since internally
@@ -1349,69 +1362,310 @@ (define_insn "*mulhi3_call"
;; Operand 2 (reg:SI 18) not clobbered on the enhanced core.
;; All call-used registers clobbered otherwise - normal library call.
+;; To support widening multiplicatioon with constant we postpone
+;; expanding to the implicit library call until post combine and
+;; prior to register allocation. Clobber all hard registers that
+;; might be used by the (widening) multiply until it is split and
+;; it's final register footprint is worked out.
+
(define_expand "mulsi3"
- [(set (reg:SI 22) (match_operand:SI 1 "register_operand" ""))
- (set (reg:SI 18) (match_operand:SI 2 "register_operand" ""))
- (parallel [(set (reg:SI 22) (mult:SI (reg:SI 22) (reg:SI 18)))
- (clobber (reg:HI 26))
- (clobber (reg:HI 30))])
- (set (match_operand:SI 0 "register_operand" "") (reg:SI 22))]
+ [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ (mult:SI (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 2 "nonmemory_operand" "")))
+ (clobber (reg:DI 18))])]
"AVR_HAVE_MUL"
- "")
+ {
+ if (u16_operand (operands[2], SImode))
+ {
+ operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+ emit_insn (gen_muluhisi3 (operands[0], operands[2], operands[1]));
+ DONE;
+ }
-(define_insn "*mulsi3_call"
- [(set (reg:SI 22) (mult:SI (reg:SI 22) (reg:SI 18)))
- (clobber (reg:HI 26))
- (clobber (reg:HI 30))]
- "AVR_HAVE_MUL"
- "%~call __mulsi3"
- [(set_attr "type" "xcall")
- (set_attr "cc" "clobber")])
+ if (o16_operand (operands[2], SImode))
+ {
+ operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+ emit_insn (gen_mulohisi3 (operands[0], operands[2], operands[1]));
+ DONE;
+ }
+ })
-(define_expand "mulhisi3"
- [(set (reg:HI 18)
- (match_operand:HI 1 "register_operand" ""))
- (set (reg:HI 20)
- (match_operand:HI 2 "register_operand" ""))
+(define_insn_and_split "*mulsi3"
+ [(set (match_operand:SI 0 "pseudo_register_operand" "=r")
+ (mult:SI (match_operand:SI 1 "pseudo_register_operand" "r")
+ (match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn")))
+ (clobber (reg:DI 18))]
+ "AVR_HAVE_MUL && !reload_completed"
+ { gcc_unreachable(); }
+ "&& 1"
+ [(set (reg:SI 18)
+ (match_dup 1))
(set (reg:SI 22)
- (mult:SI (sign_extend:SI (reg:HI 18))
- (sign_extend:SI (reg:HI 20))))
- (set (match_operand:SI 0 "register_operand" "")
+ (match_dup 2))
+ (parallel [(set (reg:SI 22)
+ (mult:SI (reg:SI 22)
+ (reg:SI 18)))
+ (clobber (reg:HI 26))])
+ (set (match_dup 0)
+ (reg:SI 22))]
+ {
+ if (u16_operand (operands[2], SImode))
+ {
+ operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+ emit_insn (gen_muluhisi3 (operands[0], operands[2], operands[1]));
+ DONE;
+ }
+
+ if (o16_operand (operands[2], SImode))
+ {
+ operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+ emit_insn (gen_mulohisi3 (operands[0], operands[2], operands[1]));
+ DONE;
+ }
+ })
+
+;; "muluqisi3"
+;; "muluhisi3"
+(define_insn_and_split "mulu<mode>si3"
+ [(set (match_operand:SI 0 "pseudo_register_operand" "=r")
+ (mult:SI (zero_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r"))
+ (match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn")))
+ (clobber (reg:DI 18))]
+ "AVR_HAVE_MUL && !reload_completed"
+ { gcc_unreachable(); }
+ "&& 1"
+ [(set (reg:HI 26)
+ (match_dup 1))
+ (set (reg:SI 18)
+ (match_dup 2))
+ (set (reg:SI 22)
+ (mult:SI (zero_extend:SI (reg:HI 26))
+ (reg:SI 18)))
+ (set (match_dup 0)
+ (reg:SI 22))]
+ {
+ /* Do the QI -> HI extension explicitely before the multiplication. */
+ /* Do the HI -> SI extension implicitely and after the multiplication. */
+
+ if (QImode == <MODE>mode)
+ operands[1] = gen_rtx_ZERO_EXTEND (HImode, operands[1]);
+
+ if (u16_operand (operands[2], SImode))
+ {
+ operands[1] = force_reg (HImode, operands[1]);
+ operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+ emit_insn (gen_umulhisi3 (operands[0], operands[1], operands[2]));
+ DONE;
+ }
+ })
+
+;; "mulsqisi3"
+;; "mulshisi3"
+(define_insn_and_split "muls<mode>si3"
+ [(set (match_operand:SI 0 "pseudo_register_operand" "=r")
+ (mult:SI (sign_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r"))
+ (match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn")))
+ (clobber (reg:DI 18))]
+ "AVR_HAVE_MUL && !reload_completed"
+ { gcc_unreachable(); }
+ "&& 1"
+ [(set (reg:HI 26)
+ (match_dup 1))
+ (set (reg:SI 18)
+ (match_dup 2))
+ (set (reg:SI 22)
+ (mult:SI (sign_extend:SI (reg:HI 26))
+ (reg:SI 18)))
+ (set (match_dup 0)
+ (reg:SI 22))]
+ {
+ /* Do the QI -> HI extension explicitely before the multiplication. */
+ /* Do the HI -> SI extension implicitely and after the multiplication. */
+
+ if (QImode == <MODE>mode)
+ operands[1] = gen_rtx_SIGN_EXTEND (HImode, operands[1]);
+
+ if (u16_operand (operands[2], SImode)
+ || s16_operand (operands[2], SImode))
+ {
+ rtx xop2 = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+
+ operands[1] = force_reg (HImode, operands[1]);
+
+ if (u16_operand (operands[2], SImode))
+ emit_insn (gen_usmulhisi3 (operands[0], xop2, operands[1]));
+ else
+ emit_insn (gen_mulhisi3 (operands[0], operands[1], xop2));
+
+ DONE;
+ }
+ })
+
+;; One-extend operand 1
+
+(define_insn_and_split "mulohisi3"
+ [(set (match_operand:SI 0 "pseudo_register_operand" "=r")
+ (mult:SI (not:SI (zero_extend:SI
+ (not:HI (match_operand:HI 1 "pseudo_register_operand" "r"))))
+ (match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn")))
+ (clobber (reg:DI 18))]
+ "AVR_HAVE_MUL && !reload_completed"
+ { gcc_unreachable(); }
+ "&& 1"
+ [(set (reg:HI 26)
+ (match_dup 1))
+ (set (reg:SI 18)
+ (match_dup 2))
+ (set (reg:SI 22)
+ (mult:SI (not:SI (zero_extend:SI (not:HI (reg:HI 26))))
+ (reg:SI 18)))
+ (set (match_dup 0)
(reg:SI 22))]
+ "")
+
+(define_expand "mulhisi3"
+ [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" ""))
+ (sign_extend:SI (match_operand:HI 2 "register_operand" ""))))
+ (clobber (reg:DI 18))])]
"AVR_HAVE_MUL"
"")
(define_expand "umulhisi3"
+ [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" ""))
+ (zero_extend:SI (match_operand:HI 2 "register_operand" ""))))
+ (clobber (reg:DI 18))])]
+ "AVR_HAVE_MUL"
+ "")
+
+(define_expand "usmulhisi3"
+ [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" ""))
+ (sign_extend:SI (match_operand:HI 2 "register_operand" ""))))
+ (clobber (reg:DI 18))])]
+ "AVR_HAVE_MUL"
+ "")
+
+;; "*uumulqihisi3" "*uumulhiqisi3" "*uumulhihisi3" "*uumulqiqisi3"
+;; "*usmulqihisi3" "*usmulhiqisi3" "*usmulhihisi3" "*usmulqiqisi3"
+;; "*sumulqihisi3" "*sumulhiqisi3" "*sumulhihisi3" "*sumulqiqisi3"
+;; "*ssmulqihisi3" "*ssmulhiqisi3" "*ssmulhihisi3" "*ssmulqiqisi3"
+(define_insn_and_split
+ "*<any_extend:extend_prefix><any_extend2:extend_prefix>mul<QIHI:mode><QIHI2:mode>si3"
+ [(set (match_operand:SI 0 "pseudo_register_operand" "=r")
+ (mult:SI (any_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r"))
+ (any_extend2:SI (match_operand:QIHI2 2 "pseudo_register_operand" "r"))))
+ (clobber (reg:DI 18))]
+ "AVR_HAVE_MUL && !reload_completed"
+ { gcc_unreachable(); }
+ "&& 1"
[(set (reg:HI 18)
- (match_operand:HI 1 "register_operand" ""))
- (set (reg:HI 20)
- (match_operand:HI 2 "register_operand" ""))
- (set (reg:SI 22)
- (mult:SI (zero_extend:SI (reg:HI 18))
- (zero_extend:SI (reg:HI 20))))
- (set (match_operand:SI 0 "register_operand" "")
+ (match_dup 1))
+ (set (reg:HI 26)
+ (match_dup 2))
+ (set (reg:SI 22)
+ (mult:SI (match_dup 3)
+ (match_dup 4)))
+ (set (match_dup 0)
(reg:SI 22))]
+ {
+ rtx xop1 = operands[1];
+ rtx xop2 = operands[2];
+
+ /* Do the QI -> HI extension explicitely before the multiplication. */
+ /* Do the HI -> SI extension implicitely and after the multiplication. */
+
+ if (QImode == <QIHI:MODE>mode)
+ xop1 = gen_rtx_fmt_e (<any_extend:CODE>, HImode, xop1);
+
+ if (QImode == <QIHI2:MODE>mode)
+ xop2 = gen_rtx_fmt_e (<any_extend2:CODE>, HImode, xop2);
+
+ if (<any_extend:CODE> == <any_extend2:CODE>
+ || <any_extend:CODE> == ZERO_EXTEND)
+ {
+ operands[1] = xop1;
+ operands[2] = xop2;
+ operands[3] = gen_rtx_fmt_e (<any_extend:CODE>, SImode, gen_rtx_REG (HImode, 18));
+ operands[4] = gen_rtx_fmt_e (<any_extend2:CODE>, SImode, gen_rtx_REG (HImode, 26));
+ }
+ else
+ {
+ /* <any_extend:CODE> = SIGN_EXTEND */
+ /* <any_extend2:CODE> = ZERO_EXTEND */
+
+ operands[1] = xop2;
+ operands[2] = xop1;
+ operands[3] = gen_rtx_ZERO_EXTEND (SImode, gen_rtx_REG (HImode, 18));
+ operands[4] = gen_rtx_SIGN_EXTEND (SImode, gen_rtx_REG (HImode, 26));
+ }
+ })
+
+(define_insn "*mulsi3_call"
+ [(set (reg:SI 22)
+ (mult:SI (reg:SI 22)
+ (reg:SI 18)))
+ (clobber (reg:HI 26))]
"AVR_HAVE_MUL"
- "")
+ "%~call __mulsi3"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
(define_insn "*mulhisi3_call"
- [(set (reg:SI 22)
+ [(set (reg:SI 22)
(mult:SI (sign_extend:SI (reg:HI 18))
- (sign_extend:SI (reg:HI 20))))]
+ (sign_extend:SI (reg:HI 26))))]
"AVR_HAVE_MUL"
"%~call __mulhisi3"
[(set_attr "type" "xcall")
(set_attr "cc" "clobber")])
(define_insn "*umulhisi3_call"
- [(set (reg:SI 22)
+ [(set (reg:SI 22)
(mult:SI (zero_extend:SI (reg:HI 18))
- (zero_extend:SI (reg:HI 20))))]
+ (zero_extend:SI (reg:HI 26))))]
"AVR_HAVE_MUL"
"%~call __umulhisi3"
[(set_attr "type" "xcall")
(set_attr "cc" "clobber")])
+(define_insn "*usmulhisi3_call"
+ [(set (reg:SI 22)
+ (mult:SI (zero_extend:SI (reg:HI 18))
+ (sign_extend:SI (reg:HI 26))))]
+ "AVR_HAVE_MUL"
+ "%~call __usmulhisi3"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*muluhisi3_call"
+ [(set (reg:SI 22)
+ (mult:SI (zero_extend:SI (reg:HI 26))
+ (reg:SI 18)))]
+ "AVR_HAVE_MUL"
+ "%~call __muluhisi3"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*mulshisi3_call"
+ [(set (reg:SI 22)
+ (mult:SI (sign_extend:SI (reg:HI 26))
+ (reg:SI 18)))]
+ "AVR_HAVE_MUL"
+ "%~call __mulshisi3"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*mulohisi3_call"
+ [(set (reg:SI 22)
+ (mult:SI (not:SI (zero_extend:SI (not:HI (reg:HI 26))))
+ (reg:SI 18)))]
+ "AVR_HAVE_MUL"
+ "%~call __mulohisi3"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
; / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / %
; divmod
@@ -2399,9 +2653,16 @@ (define_insn "one_cmplsi2"
;; xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x
;; sign extend
+;; We keep combiner from inserting hard registers into the input of sign- and
+;; zero-extends. A hard register in the input operand is not wanted because
+;; 32-bit multiply patterns clobber some hard registers and extends with a
+;; hard register that overlaps these clobbers won't be combined to a widening
+;; multiplication. There is no need for combine to propagate hard registers,
+;; register allocation can do it just as well.
+
(define_insn "extendqihi2"
[(set (match_operand:HI 0 "register_operand" "=r,r")
- (sign_extend:HI (match_operand:QI 1 "register_operand" "0,*r")))]
+ (sign_extend:HI (match_operand:QI 1 "combine_pseudo_register_operand" "0,*r")))]
""
"@
clr %B0\;sbrc %0,7\;com %B0
@@ -2411,7 +2672,7 @@ (define_insn "extendqihi2"
(define_insn "extendqisi2"
[(set (match_operand:SI 0 "register_operand" "=r,r")
- (sign_extend:SI (match_operand:QI 1 "register_operand" "0,*r")))]
+ (sign_extend:SI (match_operand:QI 1 "combine_pseudo_register_operand" "0,*r")))]
""
"@
clr %B0\;sbrc %A0,7\;com %B0\;mov %C0,%B0\;mov %D0,%B0
@@ -2420,8 +2681,8 @@ (define_insn "extendqisi2"
(set_attr "cc" "set_n,set_n")])
(define_insn "extendhisi2"
- [(set (match_operand:SI 0 "register_operand" "=r,&r")
- (sign_extend:SI (match_operand:HI 1 "register_operand" "0,*r")))]
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (sign_extend:SI (match_operand:HI 1 "combine_pseudo_register_operand" "0,*r")))]
""
"@
clr %C0\;sbrc %B0,7\;com %C0\;mov %D0,%C0
@@ -2438,7 +2699,7 @@ (define_insn "extendhisi2"
(define_insn_and_split "zero_extendqihi2"
[(set (match_operand:HI 0 "register_operand" "=r")
- (zero_extend:HI (match_operand:QI 1 "register_operand" "r")))]
+ (zero_extend:HI (match_operand:QI 1 "combine_pseudo_register_operand" "r")))]
""
"#"
"reload_completed"
@@ -2454,7 +2715,7 @@ (define_insn_and_split "zero_extendqihi2
(define_insn_and_split "zero_extendqisi2"
[(set (match_operand:SI 0 "register_operand" "=r")
- (zero_extend:SI (match_operand:QI 1 "register_operand" "r")))]
+ (zero_extend:SI (match_operand:QI 1 "combine_pseudo_register_operand" "r")))]
""
"#"
"reload_completed"
@@ -2469,8 +2730,8 @@ (define_insn_and_split "zero_extendqisi2
})
(define_insn_and_split "zero_extendhisi2"
- [(set (match_operand:SI 0 "register_operand" "=r")
- (zero_extend:SI (match_operand:HI 1 "register_operand" "r")))]
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (zero_extend:SI (match_operand:HI 1 "combine_pseudo_register_operand" "r")))]
""
"#"
"reload_completed"
===================================================================
@@ -41,7 +41,9 @@ LIB1ASMFUNCS = \
_mulhi3 \
_mulhisi3 \
_umulhisi3 \
- _xmulhisi3_exit \
+ _usmulhisi3 \
+ _muluhisi3 \
+ _mulshisi3 \
_mulsi3 \
_udivmodqi4 \
_divmodqi4 \
===================================================================
@@ -5512,6 +5512,34 @@ avr_rtx_costs (rtx x, int codearg, int o
return false;
break;
+ case SImode:
+ if (AVR_HAVE_MUL)
+ {
+ if (!speed)
+ {
+ /* Add some additional costs besides CALL like moves etc. */
+
+ *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 5 : 4);
+ }
+ else
+ {
+ /* Just a rough estimate. Even with -O2 we don't want bulky
+ code expanded inline. */
+
+ *total = COSTS_N_INSNS (25);
+ }
+ }
+ else
+ {
+ if (speed)
+ *total = COSTS_N_INSNS (300);
+ else
+ /* Add some additional costs besides CALL like moves etc. */
+ *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 5 : 4);
+ }
+
+ return true;
+
default:
return false;
}