@@ -14780,9 +14780,32 @@
"! TARGET_POPCNT"
{
rtx scratch = gen_reg_rtx (QImode);
+ rtx hipart1 = gen_reg_rtx (SImode);
+ rtx lopart1 = gen_reg_rtx (SImode);
+ rtx xor1 = gen_reg_rtx (SImode);
+ rtx shift2 = gen_reg_rtx (SImode);
+ rtx hipart2 = gen_reg_rtx (HImode);
+ rtx lopart2 = gen_reg_rtx (HImode);
+ rtx xor2 = gen_reg_rtx (HImode);
- emit_insn (gen_paritydi2_cmp (NULL_RTX, NULL_RTX,
- NULL_RTX, operands[1]));
+ if (TARGET_64BIT)
+ {
+ rtx shift1 = gen_reg_rtx (DImode);
+ emit_insn (gen_lshrdi3 (shift1, operands[1], GEN_INT (32)));
+ emit_move_insn (hipart1, gen_lowpart (SImode, shift1));
+ }
+ else
+ emit_move_insn (hipart1, gen_highpart (SImode, operands[1]));
+
+ emit_move_insn (lopart1, gen_lowpart (SImode, operands[1]));
+ emit_insn (gen_xorsi3 (xor1, hipart1, lopart1));
+
+ emit_insn (gen_lshrsi3 (shift2, xor1, GEN_INT (16)));
+ emit_move_insn (hipart2, gen_lowpart (HImode, shift2));
+ emit_move_insn (lopart2, gen_lowpart (HImode, xor1));
+ emit_insn (gen_xorhi3 (xor2, hipart2, lopart2));
+
+ emit_insn (gen_parityhi2_cmp (xor2));
ix86_expand_setcc (scratch, ORDERED,
gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
@@ -14805,8 +14828,17 @@
"! TARGET_POPCNT"
{
rtx scratch = gen_reg_rtx (QImode);
+ rtx shift = gen_reg_rtx (SImode);
+ rtx hipart = gen_reg_rtx (HImode);
+ rtx lopart = gen_reg_rtx (HImode);
+ rtx tmp = gen_reg_rtx (HImode);
+
+ emit_insn (gen_lshrsi3 (shift, operands[1], GEN_INT (16)));
+ emit_move_insn (hipart, gen_lowpart (HImode, shift));
+ emit_move_insn (lopart, gen_lowpart (HImode, operands[1]));
+ emit_insn (gen_xorhi3 (tmp, hipart, lopart));
- emit_insn (gen_paritysi2_cmp (NULL_RTX, NULL_RTX, operands[1]));
+ emit_insn (gen_parityhi2_cmp (tmp));
ix86_expand_setcc (scratch, ORDERED,
gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
@@ -14815,70 +14847,128 @@
DONE;
})
-(define_insn_and_split "paritydi2_cmp"
- [(set (reg:CC FLAGS_REG)
- (unspec:CC [(match_operand:DI 3 "register_operand" "0")]
- UNSPEC_PARITY))
- (clobber (match_scratch:DI 0 "=r"))
- (clobber (match_scratch:SI 1 "=&r"))
- (clobber (match_scratch:HI 2 "=Q"))]
+(define_expand "parityhi2"
+ [(set (match_operand:HI 0 "register_operand")
+ (parity:HI (match_operand:HI 1 "register_operand")))]
"! TARGET_POPCNT"
- "#"
- "&& reload_completed"
- [(parallel
- [(set (match_dup 1)
- (xor:SI (match_dup 1) (match_dup 4)))
- (clobber (reg:CC FLAGS_REG))])
- (parallel
- [(set (reg:CC FLAGS_REG)
- (unspec:CC [(match_dup 1)] UNSPEC_PARITY))
- (clobber (match_dup 1))
- (clobber (match_dup 2))])]
{
- operands[4] = gen_lowpart (SImode, operands[3]);
+ rtx scratch = gen_reg_rtx (QImode);
- if (TARGET_64BIT)
- {
- emit_move_insn (operands[1], gen_lowpart (SImode, operands[3]));
- emit_insn (gen_lshrdi3 (operands[3], operands[3], GEN_INT (32)));
- }
- else
- operands[1] = gen_highpart (SImode, operands[3]);
+ emit_insn (gen_parityhi2_cmp (operands[1]));
+
+ ix86_expand_setcc (scratch, ORDERED,
+ gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
+
+ emit_insn (gen_zero_extendqihi2 (operands[0], scratch));
+ DONE;
})
-(define_insn_and_split "paritysi2_cmp"
- [(set (reg:CC FLAGS_REG)
- (unspec:CC [(match_operand:SI 2 "register_operand" "0")]
- UNSPEC_PARITY))
- (clobber (match_scratch:SI 0 "=r"))
- (clobber (match_scratch:HI 1 "=&Q"))]
+(define_expand "parityqi2"
+ [(set (match_operand:QI 0 "register_operand")
+ (parity:QI (match_operand:QI 1 "register_operand")))]
"! TARGET_POPCNT"
- "#"
- "&& reload_completed"
- [(parallel
- [(set (match_dup 1)
- (xor:HI (match_dup 1) (match_dup 3)))
- (clobber (reg:CC FLAGS_REG))])
- (parallel
- [(set (reg:CC FLAGS_REG)
- (unspec:CC [(match_dup 1)] UNSPEC_PARITY))
- (clobber (match_dup 1))])]
{
- operands[3] = gen_lowpart (HImode, operands[2]);
+ emit_insn (gen_parityqi2_cmp (operands[1]));
- emit_move_insn (operands[1], gen_lowpart (HImode, operands[2]));
- emit_insn (gen_lshrsi3 (operands[2], operands[2], GEN_INT (16)));
+ ix86_expand_setcc (operands[0], ORDERED,
+ gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
+ DONE;
})
-(define_insn "*parityhi2_cmp"
+(define_insn "parityhi2_cmp"
[(set (reg:CC FLAGS_REG)
- (unspec:CC [(match_operand:HI 1 "register_operand" "0")]
+ (unspec:CC [(match_operand:HI 0 "register_operand" "+Q")]
UNSPEC_PARITY))
- (clobber (match_scratch:HI 0 "=Q"))]
- "! TARGET_POPCNT"
+ (clobber (match_dup 0))]
+ ""
"xor{b}\t{%h0, %b0|%b0, %h0}"
[(set_attr "length" "2")
- (set_attr "mode" "HI")])
+ (set_attr "mode" "QI")])
+
+(define_insn "parityqi2_cmp"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_operand:QI 0 "register_operand")]
+ UNSPEC_PARITY))]
+ ""
+ "test{b}\t%b0, %b0"
+ [(set_attr "mode" "QI")])
+
+;; Replace zero_extend:HI followed by parityhi2_cmp with parityqi2_cmp
+(define_peephole2
+ [(set (match_operand:HI 0 "register_operand")
+ (zero_extend:HI (match_operand:QI 1 "register_operand")))
+ (parallel [(set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_dup 0)] UNSPEC_PARITY))
+ (clobber (match_dup 0))])]
+ ""
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_dup 1)] UNSPEC_PARITY))])
+
+;; Eliminate QImode popcount&1 using parity flag
+(define_peephole2
+ [(set (match_operand:SI 0 "register_operand")
+ (zero_extend:SI (match_operand:QI 1 "register_operand")))
+ (parallel [(set (match_operand:SI 2 "register_operand")
+ (popcount:SI (match_dup 0)))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (and:QI (match_operand:QI 3 "register_operand")
+ (const_int 1))
+ (const_int 0)))
+ (set (pc) (if_then_else (match_operator 4 "bt_comparison_operator"
+ [(reg:CCZ FLAGS_REG)
+ (const_int 0)])
+ (label_ref (match_operand 5))
+ (pc)))]
+ "REGNO (operands[2]) == REGNO (operands[3])
+ && peep2_reg_dead_p (3, operands[0])
+ && peep2_reg_dead_p (3, operands[2])
+ && peep2_regno_dead_p (4, FLAGS_REG)"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_dup 1)] UNSPEC_PARITY))
+ (set (pc) (if_then_else (match_op_dup 4 [(reg:CC FLAGS_REG)
+ (const_int 0)])
+ (label_ref (match_dup 5))
+ (pc)))]
+{
+ operands[4] = shallow_copy_rtx (operands[4]);
+ PUT_CODE (operands[4], GET_CODE (operands[4]) == EQ ? UNORDERED : ORDERED);
+})
+
+;; Eliminate HImode popcount&1 using parity flag
+(define_peephole2
+ [(match_scratch:HI 0 "Q")
+ (parallel [(set (match_operand:HI 1 "register_operand")
+ (popcount:HI
+ (match_operand:HI 2 "nonimmediate_operand")))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_operand 3 "register_operand")
+ (zero_extend (match_dup 1)))
+ (set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (and:QI (match_operand:QI 4 "register_operand")
+ (const_int 1))
+ (const_int 0)))
+ (set (pc) (if_then_else (match_operator 5 "bt_comparison_operator"
+ [(reg:CCZ FLAGS_REG)
+ (const_int 0)])
+ (label_ref (match_operand 6))
+ (pc)))]
+ "REGNO (operands[3]) == REGNO (operands[4])
+ && peep2_reg_dead_p (3, operands[1])
+ && peep2_reg_dead_p (3, operands[3])
+ && peep2_regno_dead_p (4, FLAGS_REG)"
+ [(set (match_dup 0) (match_dup 2))
+ (parallel [(set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_dup 0)] UNSPEC_PARITY))
+ (clobber (match_dup 0))])
+ (set (pc) (if_then_else (match_op_dup 5 [(reg:CC FLAGS_REG)
+ (const_int 0)])
+ (label_ref (match_dup 6))
+ (pc)))]
+{
+ operands[5] = shallow_copy_rtx (operands[5]);
+ PUT_CODE (operands[5], GET_CODE (operands[5]) == EQ ? UNORDERED : ORDERED);
+})
;; Thread-local storage patterns for ELF.