@@ -1274,6 +1274,8 @@ bool ix86_can_use_ndd_p (enum rtx_code code)
case NEG:
case NOT:
case AND:
+ case IOR:
+ case XOR:
return true;
default:
return false;
@@ -12372,17 +12372,19 @@ (define_expand "<code><mode>3"
&& !x86_64_hilo_general_operand (operands[2], <MODE>mode))
operands[2] = force_reg (<MODE>mode, operands[2]);
- ix86_expand_binary_operator (<CODE>, <MODE>mode, operands);
+ ix86_expand_binary_operator (<CODE>, <MODE>mode, operands,
+ ix86_can_use_ndd_p (<CODE>));
DONE;
})
(define_insn_and_split "*<code><dwi>3_doubleword"
- [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
+ [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,r,r")
(any_or:<DWI>
- (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0")
- (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o")))
+ (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,r")
+ (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r<di>,o")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (<CODE>, <DWI>mode, operands)"
+ "ix86_binary_operator_ok (<CODE>, <DWI>mode, operands,
+ ix86_can_use_ndd_p (<CODE>))"
"#"
"&& reload_completed"
[(const_int:DWIH 0)]
@@ -12394,20 +12396,29 @@ (define_insn_and_split "*<code><dwi>3_doubleword"
split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
if (operands[2] == const0_rtx)
- emit_insn_deleted_note_p = true;
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+ else
+ emit_insn_deleted_note_p = true;
+ }
else if (operands[2] == constm1_rtx)
{
if (<CODE> == IOR)
emit_move_insn (operands[0], constm1_rtx);
else
- ix86_expand_unary_operator (NOT, <MODE>mode, &operands[0]);
+ ix86_expand_unary_operator (NOT, <MODE>mode, &operands[0],
+ ix86_can_use_ndd_p (NOT));
}
else
- ix86_expand_binary_operator (<CODE>, <MODE>mode, &operands[0]);
+ ix86_expand_binary_operator (<CODE>, <MODE>mode, &operands[0],
+ ix86_can_use_ndd_p (<CODE>));
if (operands[5] == const0_rtx)
{
- if (emit_insn_deleted_note_p)
+ if (!rtx_equal_p (operands[3], operands[4]))
+ emit_move_insn (operands[3], operands[4]);
+ else if (emit_insn_deleted_note_p)
emit_note (NOTE_INSN_DELETED);
}
else if (operands[5] == constm1_rtx)
@@ -12415,37 +12426,44 @@ (define_insn_and_split "*<code><dwi>3_doubleword"
if (<CODE> == IOR)
emit_move_insn (operands[3], constm1_rtx);
else
- ix86_expand_unary_operator (NOT, <MODE>mode, &operands[3]);
+ ix86_expand_unary_operator (NOT, <MODE>mode, &operands[3],
+ ix86_can_use_ndd_p (NOT));
}
else
- ix86_expand_binary_operator (<CODE>, <MODE>mode, &operands[3]);
+ ix86_expand_binary_operator (<CODE>, <MODE>mode, &operands[3],
+ ix86_can_use_ndd_p (<CODE>));
DONE;
-})
+}
+[(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
(define_insn "*<code><mode>_1"
- [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k")
+ [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,r,r,?k")
(any_or:SWI248
- (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,k")
- (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,k")))
+ (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,rm,r,k")
+ (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,r<i>,<m>,k")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands,
+ ix86_can_use_ndd_p (<CODE>))"
"@
<logic>{<imodesuffix>}\t{%2, %0|%0, %2}
<logic>{<imodesuffix>}\t{%2, %0|%0, %2}
+ <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
#"
- [(set_attr "isa" "*,*,<kmov_isa>")
- (set_attr "type" "alu, alu, msklog")
+ [(set_attr "isa" "*,*,apx_ndd,apx_ndd,<kmov_isa>")
+ (set_attr "type" "alu, alu, alu, alu, msklog")
(set_attr "mode" "<MODE>")])
(define_insn_and_split "*notxor<mode>_1"
- [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k")
+ [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,r,r,?k")
(not:SWI248
(xor:SWI248
- (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,k")
- (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,k"))))
+ (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,rm,r,k")
+ (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,r<i>,<m>,k"))))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
+ "ix86_binary_operator_ok (XOR, <MODE>mode, operands,
+ ix86_can_use_ndd_p (XOR))"
"#"
"&& reload_completed"
[(parallel
@@ -12461,8 +12479,8 @@ (define_insn_and_split "*notxor<mode>_1"
DONE;
}
}
- [(set_attr "isa" "*,*,<kmov_isa>")
- (set_attr "type" "alu, alu, msklog")
+ [(set_attr "isa" "*,*,apx_ndd,apx_ndd,<kmov_isa>")
+ (set_attr "type" "alu, alu, alu, alu, msklog")
(set_attr "mode" "<MODE>")])
(define_insn_and_split "*iordi_1_bts"
@@ -12550,44 +12568,56 @@ (define_insn_and_split "*xor2andn"
;; See comment for addsi_1_zext why we do use nonimmediate_operand
(define_insn "*<code>si_1_zext"
- [(set (match_operand:DI 0 "register_operand" "=r")
+ [(set (match_operand:DI 0 "register_operand" "=r,r,r")
(zero_extend:DI
- (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
- (match_operand:SI 2 "x86_64_general_operand" "rBMe"))))
+ (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r")
+ (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM"))))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
- "<logic>{l}\t{%2, %k0|%k0, %2}"
+ "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands,
+ ix86_can_use_ndd_p (<CODE>))"
+ "@
+ <logic>{l}\t{%2, %k0|%k0, %2}
+ <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}
+ <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}"
[(set_attr "type" "alu")
+ (set_attr "isa" "*,apx_ndd,apx_ndd")
(set_attr "mode" "SI")])
(define_insn "*<code>si_1_zext_imm"
- [(set (match_operand:DI 0 "register_operand" "=r")
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
(any_or:DI
- (zero_extend:DI (match_operand:SI 1 "register_operand" "%0"))
- (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z")))
+ (zero_extend:DI (match_operand:SI 1 "register_operand" "%0,r"))
+ (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z,Z")))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
- "<logic>{l}\t{%2, %k0|%k0, %2}"
+ "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands,
+ ix86_can_use_ndd_p (<CODE>))"
+ "@
+ <logic>{l}\t{%2, %k0|%k0, %2}
+ <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}"
[(set_attr "type" "alu")
+ (set_attr "isa" "*,apx_ndd")
(set_attr "mode" "SI")])
(define_insn "*<code>qi_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k")
- (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
- (match_operand:QI 2 "general_operand" "qn,m,rn,k")))
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k")
+ (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k")
+ (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (<CODE>, QImode, operands)"
+ "ix86_binary_operator_ok (<CODE>, QImode, operands,
+ ix86_can_use_ndd_p (<CODE>))"
"@
<logic>{b}\t{%2, %0|%0, %2}
<logic>{b}\t{%2, %0|%0, %2}
<logic>{l}\t{%k2, %k0|%k0, %k2}
+ <logic>{b}\t{%2, %1, %0|%0, %1, %2}
+ <logic>{b}\t{%2, %1, %0|%0, %1, %2}
#"
- [(set_attr "isa" "*,*,*,avx512f")
- (set_attr "type" "alu,alu,alu,msklog")
+ [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,avx512f")
+ (set_attr "type" "alu,alu,alu,alu,alu,msklog")
(set (attr "mode")
(cond [(eq_attr "alternative" "2")
(const_string "SI")
- (and (eq_attr "alternative" "3")
+ (and (eq_attr "alternative" "5")
(match_test "!TARGET_AVX512DQ"))
(const_string "HI")
]
@@ -12599,12 +12629,13 @@ (define_insn "*<code>qi_1"
(symbol_ref "true")))])
(define_insn_and_split "*notxorqi_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k")
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k")
(not:QI
- (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
- (match_operand:QI 2 "general_operand" "qn,m,rn,k"))))
+ (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k")
+ (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k"))))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (XOR, QImode, operands)"
+ "ix86_binary_operator_ok (XOR, QImode, operands,
+ ix86_can_use_ndd_p (XOR))"
"#"
"&& reload_completed"
[(parallel
@@ -12620,12 +12651,12 @@ (define_insn_and_split "*notxorqi_1"
DONE;
}
}
- [(set_attr "isa" "*,*,*,avx512f")
- (set_attr "type" "alu,alu,alu,msklog")
+ [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,avx512f")
+ (set_attr "type" "alu,alu,alu,alu,alu,msklog")
(set (attr "mode")
(cond [(eq_attr "alternative" "2")
(const_string "SI")
- (and (eq_attr "alternative" "3")
+ (and (eq_attr "alternative" "5")
(match_test "!TARGET_AVX512DQ"))
(const_string "HI")
]
@@ -12673,44 +12704,59 @@ (define_split
(define_insn "*<code><mode>_2"
[(set (reg FLAGS_REG)
(compare (any_or:SWI
- (match_operand:SWI 1 "nonimmediate_operand" "%0,0")
- (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>"))
+ (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r")
+ (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
(const_int 0)))
- (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+ (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
(any_or:SWI (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCNOmode)
- && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
- "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands,
+ ix86_can_use_ndd_p (<CODE>))"
+ "@
+ <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
+ <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
+ <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "alu")
+ (set_attr "isa" "*,*,apx_ndd,apx_ndd")
(set_attr "mode" "<MODE>")])
;; See comment for addsi_1_zext why we do use nonimmediate_operand
;; ??? Special case for immediate operand is missing - it is tricky.
(define_insn "*<code>si_2_zext"
[(set (reg FLAGS_REG)
- (compare (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
- (match_operand:SI 2 "x86_64_general_operand" "rBMe"))
+ (compare (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r")
+ (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM"))
(const_int 0)))
- (set (match_operand:DI 0 "register_operand" "=r")
+ (set (match_operand:DI 0 "register_operand" "=r,r,r")
(zero_extend:DI (any_or:SI (match_dup 1) (match_dup 2))))]
"TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
- && ix86_binary_operator_ok (<CODE>, SImode, operands)"
- "<logic>{l}\t{%2, %k0|%k0, %2}"
+ && ix86_binary_operator_ok (<CODE>, SImode, operands,
+ ix86_can_use_ndd_p (<CODE>))"
+ "@
+ <logic>{l}\t{%2, %k0|%k0, %2}
+ <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}
+ <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}"
[(set_attr "type" "alu")
+ (set_attr "isa" "*,apx_ndd,apx_ndd")
(set_attr "mode" "SI")])
(define_insn "*<code>si_2_zext_imm"
[(set (reg FLAGS_REG)
(compare (any_or:SI
- (match_operand:SI 1 "nonimmediate_operand" "%0")
- (match_operand:SI 2 "x86_64_zext_immediate_operand" "Z"))
+ (match_operand:SI 1 "nonimmediate_operand" "%0,rm")
+ (match_operand:SI 2 "x86_64_zext_immediate_operand" "Z,Z"))
(const_int 0)))
- (set (match_operand:DI 0 "register_operand" "=r")
+ (set (match_operand:DI 0 "register_operand" "=r,r")
(any_or:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
"TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
- && ix86_binary_operator_ok (<CODE>, SImode, operands)"
- "<logic>{l}\t{%2, %k0|%k0, %2}"
+ && ix86_binary_operator_ok (<CODE>, SImode, operands,
+ ix86_can_use_ndd_p (<CODE>))"
+ "@
+ <logic>{l}\t{%2, %k0|%k0, %2}
+ <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}"
[(set_attr "type" "alu")
+ (set_attr "isa" "*,apx_ndd")
(set_attr "mode" "SI")])
(define_insn "*<code><mode>_3"
@@ -12731,6 +12777,7 @@ (define_insn "*<code><mode>_3"
;; Don't do the splitting with memory operands, since it introduces risk
;; of memory mismatch stalls. We may want to do the splitting for optimizing
;; for size, but that can (should?) be handled by generic code instead.
+;; Don't do the splitting for APX NDD as NDD does not support *h registers.
(define_split
[(set (match_operand:SWI248 0 "QIreg_operand")
(any_or:SWI248 (match_operand:SWI248 1 "register_operand")
@@ -12738,7 +12785,8 @@ (define_split
(clobber (reg:CC FLAGS_REG))]
"reload_completed
&& (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
- && !(INTVAL (operands[2]) & ~(255 << 8))"
+ && !(INTVAL (operands[2]) & ~(255 << 8))
+ && !(TARGET_APX_NDD && REGNO (operands[0]) != REGNO (operands[1]))"
[(parallel
[(set (zero_extract:HI (match_dup 0)
(const_int 8)
@@ -12776,7 +12824,9 @@ (define_split
"reload_completed
&& (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
&& !(INTVAL (operands[2]) & ~255)
- && (INTVAL (operands[2]) & 128)"
+ && (INTVAL (operands[2]) & 128)
+ && !(TARGET_APX_NDD
+ && !rtx_equal_p (operands[0], operands[1]))"
[(parallel [(set (strict_low_part (match_dup 0))
(any_or:QI (match_dup 1)
(match_dup 2)))
@@ -94,6 +94,24 @@ FOO (int, and, &)
FOO1 (int, and, &)
FOO (long, and, &)
FOO1 (long, and, &)
+
+FOO (char, or, |)
+FOO1 (char, or, |)
+FOO (short, or, |)
+FOO1 (short, or, |)
+FOO (int, or, |)
+FOO1 (int, or, |)
+FOO (long, or, |)
+FOO1 (long, or, |)
+
+FOO (char, xor, ^)
+FOO1 (char, xor, ^)
+FOO (short, xor, ^)
+FOO1 (short, xor, ^)
+FOO (int, xor, ^)
+FOO1 (int, xor, ^)
+FOO (long, xor, ^)
+FOO1 (long, xor, ^)
/* { dg-final { scan-assembler-times "add(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "add(?:l|w|q)\[^\n\r]%(?:|r|e)si, \\(%rdi\\), %(?:|r|e)ax" 4 } } */
@@ -108,3 +126,11 @@ FOO1 (long, and, &)
/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 3 } } */
/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 2 } } */
/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 2 } } */
+/* { dg-final { scan-assembler-times "orb\[^\n\r]*1, \\(%rdi\\), %al" 2} } */
+/* { dg-final { scan-assembler-times "or(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 6 } } */
+/* { dg-final { scan-assembler-times "or(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 4 } } */
+/* { dg-final { scan-assembler-times "or(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 4 } } */
+/* { dg-final { scan-assembler-times "xorb\[^\n\r]*1, \\(%rdi\\), %al" 1 } } */
+/* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 3 } } */
+/* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 2 } } */
+/* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 2 } } */
From: Kong Lingling <lingling.kong@intel.com> Similar to AND insn, two splitters need to be adjusted to prevent misoptimizaiton for NDD OR/XOR. gcc/ChangeLog: * config/i386/i386-expand.cc (ix86_can_use_ndd_p): Add IOR/XOR support. * config/i386/i386.md (<code><mode>3): Add NDD alternative and adjust output templates. (*<code><mode>_1): Likewise. (*<code>qi_1): Likewise. (*notxor<mode>_1): Likewise. (*<code>si_1_zext): Likewise. (*<code>si_1_zext_imm): Likewise. (*notxorqi_1): Likewise. (*<code><mode>_2): Likewise. (*<code>si_2_zext): Likewise. (*<code>si_2_zext_imm): Likewise. (*<code><dwi>3_doubleword): Add NDD constraints, emit move for optimized case if operands[0] != operands[1] or operands[4] != operands[5]. (define_split for QI highpart OR/XOR): Prohibit splitter to split NDD form OR/XOR insn to <any_logic:code>qi_ext<mode>_3. (define_split for QI strict_lowpart optimization): Prohibit splitter to split NDD form AND insn to *<code><mode>3_1_slp. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-ndd.c: Add or and xor test. --- gcc/config/i386/i386-expand.cc | 2 + gcc/config/i386/i386.md | 180 +++++++++++++++--------- gcc/testsuite/gcc.target/i386/apx-ndd.c | 26 ++++ 3 files changed, 143 insertions(+), 65 deletions(-)