@@ -11705,18 +11705,19 @@ (define_expand "and<mode>3"
(operands[0], gen_lowpart (mode, operands[1]),
<MODE>mode, mode, 1));
else
- ix86_expand_binary_operator (AND, <MODE>mode, operands);
+ ix86_expand_binary_operator (AND, <MODE>mode, operands,
+ TARGET_APX_NDD);
DONE;
})
(define_insn_and_split "*and<dwi>3_doubleword"
- [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
+ [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,r,r")
(and:<DWI>
- (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0")
- (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o")))
+ (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,r")
+ (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r<di>,o")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (AND, <DWI>mode, operands)"
+ "ix86_binary_operator_ok (AND, <DWI>mode, operands, TARGET_APX_NDD)"
"#"
"&& reload_completed"
[(const_int:DWIH 0)]
@@ -11728,39 +11729,53 @@ (define_insn_and_split "*and<dwi>3_doubleword"
if (operands[2] == const0_rtx)
emit_move_insn (operands[0], const0_rtx);
else if (operands[2] == constm1_rtx)
- emit_insn_deleted_note_p = true;
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+ else
+ emit_insn_deleted_note_p = true;
+ }
else
- ix86_expand_binary_operator (AND, <MODE>mode, &operands[0]);
+ ix86_expand_binary_operator (AND, <MODE>mode, &operands[0],
+ TARGET_APX_NDD);
if (operands[5] == const0_rtx)
emit_move_insn (operands[3], const0_rtx);
else if (operands[5] == constm1_rtx)
{
- if (emit_insn_deleted_note_p)
+ if (!rtx_equal_p (operands[3], operands[4]))
+ emit_move_insn (operands[3], operands[4]);
+ else if (emit_insn_deleted_note_p)
emit_note (NOTE_INSN_DELETED);
}
else
- ix86_expand_binary_operator (AND, <MODE>mode, &operands[3]);
+ ix86_expand_binary_operator (AND, <MODE>mode, &operands[3],
+ TARGET_APX_NDD);
DONE;
-})
+}
+[(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
(define_insn "*anddi_1"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,?k")
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,r,r,r,r,?k")
(and:DI
- (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm,k")
- (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m,L,k")))
+ (match_operand:DI 1 "nonimmediate_operand" "%0,r,0,0,rm,r,qm,k")
+ (match_operand:DI 2 "x86_64_szext_general_operand" "Z,Z,re,m,re,m,L,k")))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)"
+ "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands,
+ TARGET_APX_NDD)"
"@
and{l}\t{%k2, %k0|%k0, %k2}
+ and{l}\t{%k2, %k1, %k0|%k0, %k1, %k2}
and{q}\t{%2, %0|%0, %2}
and{q}\t{%2, %0|%0, %2}
+ and{q}\t{%2, %1, %0|%0, %1, %2}
+ and{q}\t{%2, %1, %0|%0, %1, %2}
#
#"
- [(set_attr "isa" "x64,x64,x64,x64,avx512bw_512")
- (set_attr "type" "alu,alu,alu,imovx,msklog")
- (set_attr "length_immediate" "*,*,*,0,*")
+ [(set_attr "isa" "x64,apx_ndd,x64,x64,apx_ndd,apx_ndd,x64,avx512bw_512")
+ (set_attr "type" "alu,alu,alu,alu,alu,alu,imovx,msklog")
+ (set_attr "length_immediate" "*,*,*,*,*,*,0,*")
(set (attr "prefix_rex")
(if_then_else
(and (eq_attr "type" "imovx")
@@ -11768,7 +11783,7 @@ (define_insn "*anddi_1"
(match_operand 1 "ext_QIreg_operand")))
(const_string "1")
(const_string "*")))
- (set_attr "mode" "SI,DI,DI,SI,DI")])
+ (set_attr "mode" "SI,SI,DI,DI,DI,DI,SI,DI")])
(define_insn_and_split "*anddi_1_btr"
[(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
@@ -11823,36 +11838,45 @@ (define_split
;; See comment for addsi_1_zext why we do use nonimmediate_operand
(define_insn "*andsi_1_zext"
- [(set (match_operand:DI 0 "register_operand" "=r")
+ [(set (match_operand:DI 0 "register_operand" "=r,r,r")
(zero_extend:DI
- (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
- (match_operand:SI 2 "x86_64_general_operand" "rBMe"))))
+ (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r")
+ (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM"))))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands)"
- "and{l}\t{%2, %k0|%k0, %2}"
+ "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands,
+ TARGET_APX_NDD)"
+ "@
+ and{l}\t{%2, %k0|%k0, %2}
+ and{l}\t{%2, %1, %k0|%k0, %1, %2}
+ and{l}\t{%2, %1, %k0|%k0, %1, %2}"
[(set_attr "type" "alu")
+ (set_attr "isa" "*,apx_ndd,apx_ndd")
(set_attr "mode" "SI")])
(define_insn "*and<mode>_1"
- [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,Ya,?k")
- (and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,qm,k")
- (match_operand:SWI24 2 "<general_operand>" "r<i>,<m>,L,k")))
+ [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,r,r,Ya,?k")
+ (and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,rm,r,qm,k")
+ (match_operand:SWI24 2 "<general_operand>" "r<i>,<m>,r<i>,<m>,L,k")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (AND, <MODE>mode, operands)"
+ "ix86_binary_operator_ok (AND, <MODE>mode, operands, TARGET_APX_NDD)"
"@
and{<imodesuffix>}\t{%2, %0|%0, %2}
and{<imodesuffix>}\t{%2, %0|%0, %2}
+ and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
#
#"
[(set (attr "isa")
- (cond [(eq_attr "alternative" "3")
+ (cond [(eq_attr "alternative" "2,3")
+ (const_string "apx_ndd")
+ (eq_attr "alternative" "5")
(if_then_else (eq_attr "mode" "SI")
(const_string "avx512bw")
(const_string "avx512f"))
]
(const_string "*")))
- (set_attr "type" "alu,alu,imovx,msklog")
- (set_attr "length_immediate" "*,*,0,*")
+ (set_attr "type" "alu,alu,alu,alu,imovx,msklog")
+ (set_attr "length_immediate" "*,*,*,*,0,*")
(set (attr "prefix_rex")
(if_then_else
(and (eq_attr "type" "imovx")
@@ -11860,24 +11884,27 @@ (define_insn "*and<mode>_1"
(match_operand 1 "ext_QIreg_operand")))
(const_string "1")
(const_string "*")))
- (set_attr "mode" "<MODE>,<MODE>,SI,<MODE>")])
+ (set_attr "mode" "<MODE>,<MODE>,<MODE>,<MODE>,SI,<MODE>")])
(define_insn "*andqi_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k")
- (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
- (match_operand:QI 2 "general_operand" "qn,m,rn,k")))
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k")
+ (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k")
+ (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (AND, QImode, operands)"
+ "ix86_binary_operator_ok (AND, QImode, operands, TARGET_APX_NDD)"
"@
and{b}\t{%2, %0|%0, %2}
and{b}\t{%2, %0|%0, %2}
and{l}\t{%k2, %k0|%k0, %k2}
+ and{b}\t{%2, %1, %0|%0, %1, %2}
+ and{b}\t{%2, %1, %0|%0, %1, %2}
#"
- [(set_attr "type" "alu,alu,alu,msklog")
+ [(set_attr "type" "alu,alu,alu,alu,alu,msklog")
+ (set_attr "isa" "*,*,*,apx_ndd,apx_ndd,*")
(set (attr "mode")
(cond [(eq_attr "alternative" "2")
(const_string "SI")
- (and (eq_attr "alternative" "3")
+ (and (eq_attr "alternative" "5")
(match_test "!TARGET_AVX512DQ"))
(const_string "HI")
]
@@ -11980,7 +12007,10 @@ (define_split
(clobber (reg:CC FLAGS_REG))]
"reload_completed
&& (!REG_P (operands[1])
- || REGNO (operands[0]) != REGNO (operands[1]))"
+ || REGNO (operands[0]) != REGNO (operands[1]))
+ && (UINTVAL (operands[2]) == GET_MODE_MASK (SImode)
+ || UINTVAL (operands[2]) == GET_MODE_MASK (HImode)
+ || UINTVAL (operands[2]) == GET_MODE_MASK (QImode))"
[(const_int 0)]
{
unsigned HOST_WIDE_INT ival = UINTVAL (operands[2]);
@@ -12053,10 +12083,10 @@ (define_insn "*anddi_2"
[(set (reg FLAGS_REG)
(compare
(and:DI
- (match_operand:DI 1 "nonimmediate_operand" "%0,0,0")
- (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m"))
+ (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,r,rm,r")
+ (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m,Z,re,m"))
(const_int 0)))
- (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r")
+ (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,r,r")
(and:DI (match_dup 1) (match_dup 2)))]
"TARGET_64BIT
&& ix86_match_ccmode
@@ -12070,38 +12100,46 @@ (define_insn "*anddi_2"
&& (!CONST_INT_P (operands[2])
|| val_signbit_known_set_p (SImode, INTVAL (operands[2]))))
? CCZmode : CCNOmode)
- && ix86_binary_operator_ok (AND, DImode, operands)"
+ && ix86_binary_operator_ok (AND, DImode, operands, TARGET_APX_NDD)"
"@
and{l}\t{%k2, %k0|%k0, %k2}
and{q}\t{%2, %0|%0, %2}
- and{q}\t{%2, %0|%0, %2}"
+ and{q}\t{%2, %0|%0, %2}
+ and{l}\t{%k2, %k1, %k0|%k0, %k1, %k2}
+ and{q}\t{%2, %1, %0|%0, %1, %2}
+ and{q}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "alu")
- (set_attr "mode" "SI,DI,DI")])
+ (set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd")
+ (set_attr "mode" "SI,DI,DI,SI,DI,DI")])
;; See comment for addsi_1_zext why we do use nonimmediate_operand
(define_insn "*andsi_2_zext"
[(set (reg FLAGS_REG)
(compare (and:SI
- (match_operand:SI 1 "nonimmediate_operand" "%0")
- (match_operand:SI 2 "x86_64_general_operand" "rBMe"))
+ (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r")
+ (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM"))
(const_int 0)))
- (set (match_operand:DI 0 "register_operand" "=r")
+ (set (match_operand:DI 0 "register_operand" "=r,r,r")
(zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
"TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
- && ix86_binary_operator_ok (AND, SImode, operands)"
- "and{l}\t{%2, %k0|%k0, %2}"
+ && ix86_binary_operator_ok (AND, SImode, operands, TARGET_APX_NDD)"
+ "@
+ and{l}\t{%2, %k0|%k0, %2}
+ and{l}\t{%2, %1, %k0|%k0, %1, %2}
+ and{l}\t{%2, %1, %k0|%k0, %1, %2}"
[(set_attr "type" "alu")
+ (set_attr "isa" "*,apx_ndd,apx_ndd")
(set_attr "mode" "SI")])
(define_insn "*andqi_2_maybe_si"
[(set (reg FLAGS_REG)
(compare (and:QI
- (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
- (match_operand:QI 2 "general_operand" "qn,m,n"))
+ (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r")
+ (match_operand:QI 2 "general_operand" "qn,m,n,rn,m"))
(const_int 0)))
- (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
+ (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r")
(and:QI (match_dup 1) (match_dup 2)))]
- "ix86_binary_operator_ok (AND, QImode, operands)
+ "ix86_binary_operator_ok (AND, QImode, operands, TARGET_APX_NDD)
&& ix86_match_ccmode (insn,
CONST_INT_P (operands[2])
&& INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)"
@@ -12112,11 +12150,16 @@ (define_insn "*andqi_2_maybe_si"
operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff);
return "and{l}\t{%2, %k0|%k0, %2}";
}
+ if (which_alternative > 2)
+ return "and{b}\t{%2, %1, %0|%0, %1, %2}";
return "and{b}\t{%2, %0|%0, %2}";
}
[(set_attr "type" "alu")
+ (set_attr "isa" "*,*,*,apx_ndd,apx_ndd")
(set (attr "mode")
- (cond [(eq_attr "alternative" "2")
+ (cond [(eq_attr "alternative" "3,4")
+ (const_string "QI")
+ (eq_attr "alternative" "2")
(const_string "SI")
(and (match_test "optimize_insn_for_size_p ()")
(and (match_operand 0 "ext_QIreg_operand")
@@ -12133,15 +12176,21 @@ (define_insn "*andqi_2_maybe_si"
(define_insn "*and<mode>_2"
[(set (reg FLAGS_REG)
(compare (and:SWI124
- (match_operand:SWI124 1 "nonimmediate_operand" "%0,0")
- (match_operand:SWI124 2 "<general_operand>" "<r><i>,<m>"))
+ (match_operand:SWI124 1 "nonimmediate_operand" "%0,0,rm,r")
+ (match_operand:SWI124 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
(const_int 0)))
- (set (match_operand:SWI124 0 "nonimmediate_operand" "=<r>m,<r>")
+ (set (match_operand:SWI124 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
(and:SWI124 (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCNOmode)
- && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
- "and{<imodesuffix>}\t{%2, %0|%0, %2}"
+ && ix86_binary_operator_ok (AND, <MODE>mode, operands,
+ TARGET_APX_NDD)"
+ "@
+ and{<imodesuffix>}\t{%2, %0|%0, %2}
+ and{<imodesuffix>}\t{%2, %0|%0, %2}
+ and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "alu")
+ (set_attr "isa" "*,*,apx_ndd,apx_ndd")
(set_attr "mode" "<MODE>")])
(define_insn "*<code>qi_ext<mode>_0"
@@ -12387,6 +12436,7 @@ (define_insn_and_split "*<code>qi_ext<mode>_3"
;; Don't do the splitting with memory operands, since it introduces risk
;; of memory mismatch stalls. We may want to do the splitting for optimizing
;; for size, but that can (should?) be handled by generic code instead.
+;; Don't do the splitting for APX NDD as NDD does not support *h registers.
(define_split
[(set (match_operand:SWI248 0 "QIreg_operand")
(and:SWI248 (match_operand:SWI248 1 "register_operand")
@@ -12394,7 +12444,8 @@ (define_split
(clobber (reg:CC FLAGS_REG))]
"reload_completed
&& (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
- && !(~INTVAL (operands[2]) & ~(255 << 8))"
+ && !(~INTVAL (operands[2]) & ~(255 << 8))
+ && !(TARGET_APX_NDD && REGNO (operands[0]) != REGNO (operands[1]))"
[(parallel
[(set (zero_extract:HI (match_dup 0)
(const_int 8)
@@ -12423,7 +12474,9 @@ (define_split
"reload_completed
&& (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
&& !(~INTVAL (operands[2]) & ~255)
- && !(INTVAL (operands[2]) & 128)"
+ && !(INTVAL (operands[2]) & 128)
+ && !(TARGET_APX_NDD
+ && !rtx_equal_p (operands[0], operands[1]))"
[(parallel [(set (strict_low_part (match_dup 0))
(and:QI (match_dup 1)
(match_dup 2)))
@@ -85,6 +85,15 @@ F (int, not, ~)
F1 (int, not, ~)
F (long, not, ~)
F1 (long, not, ~)
+
+FOO (char, and, &)
+FOO1 (char, and, &)
+FOO (short, and, &)
+FOO1 (short, and, &)
+FOO (int, and, &)
+FOO1 (int, and, &)
+FOO (long, and, &)
+FOO1 (long, and, &)
/* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */
/* { dg-final { scan-assembler-times "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]%(?:|r|e)si(?:|l), \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */
@@ -95,3 +104,7 @@ F1 (long, not, ~)
/* { dg-final { scan-assembler-times "neg(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "not(?:b|l|w|q)\[^\n\r]\\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */
/* { dg-final { scan-assembler-times "not(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)ax" 4 } } */
+/* { dg-final { scan-assembler-times "andb\[^\n\r]*1, \\(%rdi\\), %al" 1 } } */
+/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 3 } } */
+/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 2 } } */
+/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 2 } } */
From: Kong Lingling <lingling.kong@intel.com> For NDD form AND insn, there are three splitter fixes after extending legacy patterns. 1. APX NDD does not support high QImode registers like ah, bh, ch, dh, so for some optimization splitters that generates highpart zero_extract for QImode need to be prohibited under NDD pattern. 2. Legacy AND insn will use r/qm/L constraint, and a post-reload splitter will transform it into zero_extend move. But for NDD form AND, the splitter is not strict enough as the splitter assum such AND will have the const_int operand matching the constraint "L", then NDD form AND allows const_int with any QI values. Restrict the splitter condition to match "L" constraint that strictly matches zero-extend sematic. 3. Legacy AND insn will adopt r/0/Z constraint, a splitter will try to optimize such form into strict_lowpart QImode AND when 7th bit is not set. But the splitter will wronly convert non-zext form of NDD and with memory src, then the strict_lowpart transform matches alternative 1 of *<code><mode>_slp_1 and generates *movstrict<mode>_1 so the zext sematic was omitted. This could cause highpart of dest not cleared and generates wrong code. Disable the splitter when NDD adopted and operands[0] and operands[1] are not equal. gcc/ChangeLog: * config/i386/i386.md (and<mode>3): Add NDD alternatives and adjust output template. (*anddi_1): Likewise. (*and<mode>_1): Likewise. (*andqi_1): Likewise. (*andsi_1_zext): Likewise. (*anddi_2): Likewise. (*andsi_2_zext): Likewise. (*andqi_2_maybe_si): Likewise. (*and<mode>_2): Likewise. (*and<dwi>3_doubleword): Add NDD alternative, emit move for optimized case if operands[0] not equal to operands[1]. (define_split for QI highpart AND): Prohibit splitter to split NDD form AND insn to <any_logic:code>qi_ext<mode>_3. (define_split for QI strict_lowpart optimization): Prohibit splitter to split NDD form AND insn to *<code><mode>3_1_slp. (define_split for zero_extend and optimization): Prohibit splitter to split NDD form AND insn to zero_extend insn. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-ndd.c: Add and test. --- gcc/config/i386/i386.md | 175 +++++++++++++++--------- gcc/testsuite/gcc.target/i386/apx-ndd.c | 13 ++ 2 files changed, 127 insertions(+), 61 deletions(-)