===================================================================
@@ -317,7 +317,7 @@ (define_bypass 4 "power8-branch" "power8
; VS Unit (includes FP/VSX/VMX/DFP/Crypto)
(define_insn_reservation "power8-fp" 6
- (and (eq_attr "type" "fp,dmul")
+ (and (eq_attr "type" "fp,dmul,dfp")
(eq_attr "cpu" "power8"))
"DU_any_power8,VSU_power8")
===================================================================
@@ -500,7 +500,7 @@ (define_insn_reservation "power6-mtcr" 4
(define_bypass 9 "power6-mtcr" "power6-branch")
(define_insn_reservation "power6-fp" 6
- (and (eq_attr "type" "fp,dmul")
+ (and (eq_attr "type" "fp,dmul,dfp")
(eq_attr "cpu" "power6"))
"FPU_power6")
===================================================================
@@ -72,7 +72,8 @@ (define_insn "*tabort"
(set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))]
"TARGET_HTM"
"tabort. %0"
- [(set_attr "type" "htm")
+ [(set_attr "type" "htmsimple")
+ (set_attr "power9_alu2" "yes")
(set_attr "length" "4")])
(define_expand "tabort<wd>c"
@@ -98,7 +99,8 @@ (define_insn "*tabort<wd>c"
(set (match_operand:BLK 4) (unspec:BLK [(match_dup 4)] UNSPEC_HTM_FENCE))]
"TARGET_HTM"
"tabort<wd>c. %0,%1,%2"
- [(set_attr "type" "htm")
+ [(set_attr "type" "htmsimple")
+ (set_attr "power9_alu2" "yes")
(set_attr "length" "4")])
(define_expand "tabort<wd>ci"
@@ -124,7 +126,8 @@ (define_insn "*tabort<wd>ci"
(set (match_operand:BLK 4) (unspec:BLK [(match_dup 4)] UNSPEC_HTM_FENCE))]
"TARGET_HTM"
"tabort<wd>ci. %0,%1,%2"
- [(set_attr "type" "htm")
+ [(set_attr "type" "htmsimple")
+ (set_attr "power9_alu2" "yes")
(set_attr "length" "4")])
(define_expand "tbegin"
@@ -146,7 +149,7 @@ (define_insn "*tbegin"
(set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))]
"TARGET_HTM"
"tbegin. %0"
- [(set_attr "type" "htm")
+ [(set_attr "type" "htmsimple")
(set_attr "length" "4")])
(define_expand "tcheck"
@@ -208,7 +211,7 @@ (define_insn "*trechkpt"
(set (match_operand:BLK 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))]
"TARGET_HTM"
"trechkpt."
- [(set_attr "type" "htm")
+ [(set_attr "type" "htmsimple")
(set_attr "length" "4")])
(define_expand "treclaim"
@@ -230,7 +233,7 @@ (define_insn "*treclaim"
(set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))]
"TARGET_HTM"
"treclaim. %0"
- [(set_attr "type" "htm")
+ [(set_attr "type" "htmsimple")
(set_attr "length" "4")])
(define_expand "tsr"
@@ -252,7 +255,7 @@ (define_insn "*tsr"
(set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))]
"TARGET_HTM"
"tsr. %0"
- [(set_attr "type" "htm")
+ [(set_attr "type" "htmsimple")
(set_attr "length" "4")])
(define_expand "ttest"
@@ -272,7 +275,8 @@ (define_insn "*ttest"
(set (match_operand:BLK 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))]
"TARGET_HTM"
"tabortwci. 0,1,0"
- [(set_attr "type" "htm")
+ [(set_attr "type" "htmsimple")
+ (set_attr "power9_alu2" "yes")
(set_attr "length" "4")])
(define_insn "htm_mfspr_<mode>"
===================================================================
@@ -0,0 +1,525 @@
+;; Scheduling description for IBM POWER9 processor.
+;; Copyright (C) 2016 Free Software Foundation, Inc.
+;;
+;; Contributed by Pat Haugen (pthaugen@us.ibm.com).
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "power9dsp,power9lsu,power9vsu,power9misc")
+
+(define_cpu_unit "lsu0_power9,lsu1_power9,lsu2_power9,lsu3_power9" "power9lsu")
+(define_cpu_unit "vsu0_power9,vsu1_power9,vsu2_power9,vsu3_power9" "power9vsu")
+; Two vector permute units, part of vsu
+(define_cpu_unit "prm0_power9,prm1_power9" "power9vsu")
+; Two fixed point divide units, not pipelined
+(define_cpu_unit "fx_div0_power9,fx_div1_power9" "power9misc")
+(define_cpu_unit "bru_power9,cryptu_power9,dfu_power9" "power9misc")
+
+(define_cpu_unit "x0_power9,x1_power9,xa0_power9,xa1_power9,\
+ x2_power9,x3_power9,xb0_power9,xb1_power9,
+ br0_power9,br1_power9" "power9dsp")
+
+
+; Dispatch port reservations
+;
+; Power9 can dispatch a maximum of 6 iops per cycle with the following
+; general restrictions (other restrictions also apply):
+; 1) At most 2 iops per execution slice
+; 2) At most 2 iops to the branch unit
+; Note that insn position in a dispatch group of 6 insns does not infer which
+; execution slice the insn is routed to. The units are used to infer the
+; conflicts that exist (i.e. an 'even' requirement will preclude dispatch
+; with 2 insns with 'superslice' requirement).
+
+; The xa0/xa1 units really represent the 3rd dispatch port for a superslice but
+; are listed as separate units to allow those insns that preclude its use to
+; still be scheduled two to a superslice while reserving the 3rd slot. The
+; same applies for xb0/xb1.
+(define_reservation "DU_xa_power9" "xa0_power9+xa1_power9")
+(define_reservation "DU_xb_power9" "xb0_power9+xb1_power9")
+
+; Any execution slice dispatch
+(define_reservation "DU_any_power9"
+ "x0_power9|x1_power9|DU_xa_power9|x2_power9|x3_power9|\
+ DU_xb_power9")
+
+; Even slice, actually takes even/odd slots
+(define_reservation "DU_even_power9" "x0_power9+x1_power9|x2_power9+x3_power9")
+
+; Slice plus 3rd slot
+(define_reservation "DU_slice_3_power9"
+ "x0_power9+xa0_power9|x1_power9+xa1_power9|\
+ x2_power9+xb0_power9|x3_power9+xb1_power9")
+
+; Superslice
+(define_reservation "DU_super_power9"
+ "x0_power9+x1_power9|x2_power9+x3_power9")
+
+; 2-way cracked
+(define_reservation "DU_C2_power9" "(x0_power9+x1_power9)|\
+ (x1_power9+DU_xa_power9)|\
+ (x1_power9+x2_power9)|\
+ (DU_xa_power9+x2_power9)|\
+ (x2_power9+x3_power9)|\
+ (x3_power9+DU_xb_power9)")
+
+; 2-way cracked plus 3rd slot
+(define_reservation "DU_C2_3_power9" "(x0_power9+x1_power9+xa0_power9)|\
+ (x1_power9+x2_power9+xa0_power9)|\
+ (x1_power9+x2_power9+xb0_power9)|\
+ (x2_power9+x3_power9+xb0_power9)")
+
+; 3-way cracked (consumes whole decode/dispatch cycle)
+(define_reservation "DU_C3_power9"
+ "x0_power9+x1_power9+xa0_power9+xa1_power9+x2_power9+\
+ x3_power9+xb0_power9+xb1_power9+br0_power9+br1_power9")
+
+; Branch ports
+(define_reservation "DU_branch_power9" "br0_power9|br1_power9")
+
+
+; Execution unit reservations
+(define_reservation "LSU_power9"
+ "lsu0_power9|lsu1_power9|lsu2_power9|lsu3_power9")
+
+(define_reservation "LSU_pair_power9"
+ "lsu0_power9+lsu1_power9|lsu1_power9+lsu2_power9|\
+ lsu2_power9+lsu3_power9|lsu3_power9+lsu1_power9")
+
+(define_reservation "VSU_power9"
+ "vsu0_power9|vsu1_power9|vsu2_power9|vsu3_power9")
+
+(define_reservation "VSU_super_power9"
+ "vsu0_power9+vsu1_power9|vsu2_power9+vsu3_power9")
+
+(define_reservation "VSU_PRM_power9" "prm0_power9|prm1_power9")
+
+
+; 2 cycle FP ops
+(define_attr "power9_fp_2cyc" "no,yes"
+ (cond [(eq_attr "mnemonic" "fabs,fcpsgn,fmr,fmrgow,fnabs,fneg,\
+ xsabsdp,xscpsgndp,xsnabsdp,xsnegdp,\
+ xsabsqp,xscpsgnqp,xsnabsqp,xsnegqp")
+ (const_string "yes")]
+ (const_string "no")))
+
+; Quad-precision FP ops, execute in DFU
+(define_attr "power9_qp" "no,yes"
+ (if_then_else (ior (match_operand:KF 0 "" "")
+ (match_operand:TF 0 "" "")
+ (match_operand:KF 1 "" "")
+ (match_operand:TF 1 "" ""))
+ (const_string "yes")
+ (const_string "no")))
+
+
+; LS Unit
+(define_insn_reservation "power9-load" 4
+ (and (eq_attr "type" "load")
+ (eq_attr "sign_extend" "no")
+ (eq_attr "update" "no")
+ (eq_attr "cpu" "power9"))
+ "DU_any_power9,LSU_power9")
+
+(define_insn_reservation "power9-load-update" 4
+ (and (eq_attr "type" "load")
+ (eq_attr "sign_extend" "no")
+ (eq_attr "update" "yes")
+ (eq_attr "cpu" "power9"))
+ "DU_C2_power9,LSU_power9+VSU_power9")
+
+(define_insn_reservation "power9-load-ext" 6
+ (and (eq_attr "type" "load")
+ (eq_attr "sign_extend" "yes")
+ (eq_attr "update" "no")
+ (eq_attr "cpu" "power9"))
+ "DU_C2_power9,LSU_power9")
+
+(define_insn_reservation "power9-load-ext-update" 6
+ (and (eq_attr "type" "load")
+ (eq_attr "sign_extend" "yes")
+ (eq_attr "update" "yes")
+ (eq_attr "cpu" "power9"))
+ "DU_C3_power9,LSU_power9+VSU_power9")
+
+(define_insn_reservation "power9-fpload-double" 4
+ (and (eq_attr "type" "fpload")
+ (eq_attr "update" "no")
+ (match_operand:DF 0 "" "")
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,LSU_power9")
+
+(define_insn_reservation "power9-fpload-update-double" 4
+ (and (eq_attr "type" "fpload")
+ (eq_attr "update" "yes")
+ (match_operand:DF 0 "" "")
+ (eq_attr "cpu" "power9"))
+ "DU_C2_3_power9,LSU_power9+VSU_power9")
+
+; SFmode loads are cracked and have additional 2 cycles over DFmode
+(define_insn_reservation "power9-fpload-single" 6
+ (and (eq_attr "type" "fpload")
+ (eq_attr "update" "no")
+ (match_operand:SF 0 "" "")
+ (eq_attr "cpu" "power9"))
+ "DU_C2_3_power9,LSU_power9")
+
+(define_insn_reservation "power9-fpload-update-single" 6
+ (and (eq_attr "type" "fpload")
+ (eq_attr "update" "yes")
+ (match_operand:SF 0 "" "")
+ (eq_attr "cpu" "power9"))
+ "DU_C3_power9,LSU_power9+VSU_power9")
+
+(define_insn_reservation "power9-vecload" 5
+ (and (eq_attr "type" "vecload")
+ (eq_attr "cpu" "power9"))
+ "DU_any_power9,LSU_pair_power9")
+
+; Store data can issue 2 cycles after AGEN issue, 3 cycles for vector store
+(define_insn_reservation "power9-store" 0
+ (and (eq_attr "type" "store")
+ (not (and (eq_attr "update" "yes")
+ (eq_attr "indexed" "yes")))
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,LSU_power9")
+
+(define_insn_reservation "power9-store-indexed" 0
+ (and (eq_attr "type" "store")
+ (eq_attr "update" "no")
+ (eq_attr "indexed" "yes")
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,LSU_power9")
+
+; Update forms have 2 cycle latency for updated addr reg
+(define_insn_reservation "power9-store-update" 2
+ (and (eq_attr "type" "store")
+ (eq_attr "update" "yes")
+ (eq_attr "indexed" "no")
+ (eq_attr "cpu" "power9"))
+ "DU_C2_3_power9,LSU_power9+VSU_power9")
+
+; Update forms have 2 cycle latency for updated addr reg
+(define_insn_reservation "power9-store-update-indexed" 2
+ (and (eq_attr "type" "store")
+ (eq_attr "update" "yes")
+ (eq_attr "indexed" "yes")
+ (eq_attr "cpu" "power9"))
+ "DU_C2_3_power9,LSU_power9+VSU_power9")
+
+(define_insn_reservation "power9-fpstore" 0
+ (and (eq_attr "type" "fpstore")
+ (eq_attr "update" "no")
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,LSU_power9")
+
+; Update forms have 2 cycle latency for updated addr reg
+(define_insn_reservation "power9-fpstore-update" 2
+ (and (eq_attr "type" "fpstore")
+ (eq_attr "update" "yes")
+ (eq_attr "cpu" "power9"))
+ "DU_C2_3_power9,LSU_power9+VSU_power9")
+
+(define_insn_reservation "power9-vecstore" 0
+ (and (eq_attr "type" "vecstore")
+ (eq_attr "cpu" "power9"))
+ "DU_super_power9,LSU_pair_power9")
+
+(define_insn_reservation "power9-larx" 4
+ (and (eq_attr "type" "load_l")
+ (eq_attr "cpu" "power9"))
+ "DU_any_power9,LSU_power9")
+
+(define_insn_reservation "power9-stcx" 2
+ (and (eq_attr "type" "store_c")
+ (eq_attr "cpu" "power9"))
+ "DU_C2_3_power9,LSU_power9+VSU_power9")
+
+(define_insn_reservation "power9-sync" 4
+ (and (eq_attr "type" "sync,isync")
+ (eq_attr "cpu" "power9"))
+ "DU_any_power9,LSU_power9")
+
+
+; VSU Execution Unit
+
+; Fixed point ops
+
+; Most ALU insns are simple 2 cycl, including record form
+(define_insn_reservation "power9-alu" 2
+ (and (ior (eq_attr "type" "add,cmp,exts,integer,logical,trap,isel")
+ (and (eq_attr "type" "insert,shift")
+ (eq_attr "dot" "no")))
+ (eq_attr "cpu" "power9"))
+ "DU_any_power9,VSU_power9")
+
+; Record form rotate/shift are cracked
+(define_insn_reservation "power9-cracked-alu" 2
+ (and (eq_attr "type" "insert,shift")
+ (eq_attr "dot" "yes")
+ (eq_attr "cpu" "power9"))
+ "DU_C2_power9,VSU_power9")
+; 4 cycle CR latency
+(define_bypass 4 "power9-cracked-alu"
+ "power9-crlogical,power9-mfcr,power9-mfcrf,power9-branch")
+
+(define_insn_reservation "power9-alu2" 3
+ (and (eq_attr "type" "cntlz,popcnt")
+ (eq_attr "cpu" "power9"))
+ "DU_any_power9,VSU_power9")
+
+; Treat 'two' and 'three' types as 2 or 3 way cracked
+(define_insn_reservation "power9-two" 4
+ (and (eq_attr "type" "two")
+ (eq_attr "cpu" "power9"))
+ "DU_C2_power9,VSU_power9")
+
+(define_insn_reservation "power9-three" 6
+ (and (eq_attr "type" "three")
+ (eq_attr "cpu" "power9"))
+ "DU_C3_power9,VSU_power9")
+
+(define_insn_reservation "power9-mul" 4
+ (and (eq_attr "type" "mul")
+ (eq_attr "dot" "no")
+ (eq_attr "cpu" "power9"))
+ "DU_any_power9,VSU_power9")
+
+(define_insn_reservation "power9-mul-compare" 4
+ (and (eq_attr "type" "mul")
+ (eq_attr "dot" "yes")
+ (eq_attr "cpu" "power9"))
+ "DU_C2_power9,VSU_power9")
+; 6 cycle CR latency
+(define_bypass 6 "power9-mul-compare"
+ "power9-crlogical,power9-mfcr,power9-mfcrf,power9-branch")
+
+; Fixed point divides reserve the divide units for a minimum of 8 cycles
+(define_insn_reservation "power9-idiv" 16
+ (and (eq_attr "type" "div")
+ (eq_attr "size" "32")
+ (eq_attr "cpu" "power9"))
+ "DU_even_power9,fx_div0_power9*8|fx_div1_power9*8")
+
+(define_insn_reservation "power9-ldiv" 24
+ (and (eq_attr "type" "div")
+ (eq_attr "size" "64")
+ (eq_attr "cpu" "power9"))
+ "DU_even_power9,fx_div0_power9*8|fx_div1_power9*8")
+
+(define_insn_reservation "power9-crlogical" 2
+ (and (eq_attr "type" "cr_logical,delayed_cr")
+ (eq_attr "cpu" "power9"))
+ "DU_any_power9,VSU_power9")
+
+(define_insn_reservation "power9-mfcrf" 2
+ (and (eq_attr "type" "mfcrf")
+ (eq_attr "cpu" "power9"))
+ "DU_any_power9,VSU_power9")
+
+(define_insn_reservation "power9-mfcr" 6
+ (and (eq_attr "type" "mfcr")
+ (eq_attr "cpu" "power9"))
+ "DU_C3_power9,VSU_power9")
+
+; Should differentiate between 1 cr field and > 1 since target of > 1 cr
+; is cracked
+(define_insn_reservation "power9-mtcr" 2
+ (and (eq_attr "type" "mtcr")
+ (eq_attr "cpu" "power9"))
+ "DU_any_power9,VSU_power9")
+
+; Move to LR/CTR are executed in VSU
+(define_insn_reservation "power9-mtjmpr" 5
+ (and (eq_attr "type" "mtjmpr")
+ (eq_attr "cpu" "power9"))
+ "DU_any_power9,VSU_power9")
+
+; Floating point/Vector ops
+(define_insn_reservation "power9-fp" 7
+ (and (eq_attr "type" "fp,dmul")
+ (eq_attr "power9_fp_2cyc" "no")
+ (eq_attr "power9_alu2" "no")
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,VSU_power9")
+
+(define_insn_reservation "power9-fp2" 2
+ (and (eq_attr "type" "fp,dmul")
+ (eq_attr "power9_fp_2cyc" "yes")
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,VSU_power9")
+
+(define_insn_reservation "power9-fp-alu2" 3
+ (and (eq_attr "type" "fp,dmul")
+ (eq_attr "power9_alu2" "yes")
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,VSU_power9")
+
+(define_insn_reservation "power9-fpcompare" 3
+ (and (eq_attr "type" "fpcompare")
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,VSU_power9")
+
+; FP div/sqrt are executed in VSU slices, not pipelined for other divides, but for the
+; most part do not block pipelined ops.
+(define_insn_reservation "power9-sdiv" 22
+ (and (eq_attr "type" "sdiv")
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,VSU_power9")
+
+(define_insn_reservation "power9-ddiv" 33
+ (and (eq_attr "type" "ddiv")
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,VSU_power9")
+
+(define_insn_reservation "power9-sqrt" 26
+ (and (eq_attr "type" "ssqrt")
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,VSU_power9")
+
+(define_insn_reservation "power9-dsqrt" 36
+ (and (eq_attr "type" "dsqrt")
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,VSU_power9")
+
+(define_insn_reservation "power9-veccmp" 3
+ (and (eq_attr "type" "veccmp")
+ (eq_attr "cpu" "power9"))
+ "DU_super_power9,VSU_super_power9")
+
+(define_insn_reservation "power9-vecsimple" 2
+ (and (eq_attr "type" "vecsimple")
+ (eq_attr "power9_alu2" "no")
+ (eq_attr "cpu" "power9"))
+ "DU_super_power9,VSU_super_power9")
+
+(define_insn_reservation "power9-vecsimple-alu2" 3
+ (and (eq_attr "type" "vecsimple")
+ (eq_attr "power9_alu2" "yes")
+ (eq_attr "cpu" "power9"))
+ "DU_super_power9,VSU_super_power9")
+
+(define_insn_reservation "power9-vecnormal" 7
+ (and (eq_attr "type" "vecfloat,vecdouble")
+ (eq_attr "power9_fp_2cyc" "no")
+ (eq_attr "power9_alu2" "no")
+ (eq_attr "power9_qp" "no")
+ (eq_attr "cpu" "power9"))
+ "DU_super_power9,VSU_super_power9")
+
+(define_insn_reservation "power9-vecnormal2" 2
+ (and (eq_attr "type" "vecfloat")
+ (eq_attr "power9_fp_2cyc" "yes")
+ (eq_attr "cpu" "power9"))
+ "DU_super_power9,VSU_super_power9")
+
+(define_insn_reservation "power9-vecnormal-alu2" 3
+ (and (eq_attr "type" "vecfloat,vecdouble")
+ (eq_attr "power9_alu2" "yes")
+ (eq_attr "cpu" "power9"))
+ "DU_super_power9,VSU_super_power9")
+
+(define_insn_reservation "power9-qp" 12
+ (and (eq_attr "type" "vecfloat,vecdouble")
+ (eq_attr "power9_qp" "yes")
+ (eq_attr "cpu" "power9"))
+ "DU_super_power9,dfu_power9")
+
+(define_insn_reservation "power9-vecperm" 2
+ (and (eq_attr "type" "vecperm")
+ (eq_attr "cpu" "power9"))
+ "DU_super_power9,VSU_PRM_power9")
+
+(define_insn_reservation "power9-veccomplex" 7
+ (and (eq_attr "type" "veccomplex")
+ (eq_attr "cpu" "power9"))
+ "DU_super_power9,VSU_super_power9")
+
+(define_insn_reservation "power9-vecfdiv" 28
+ (and (eq_attr "type" "vecfdiv")
+ (eq_attr "cpu" "power9"))
+ "DU_super_power9,VSU_super_power9")
+
+(define_insn_reservation "power9-vecdiv" 32
+ (and (eq_attr "type" "vecdiv")
+ (eq_attr "power9_qp" "no")
+ (eq_attr "cpu" "power9"))
+ "DU_super_power9,VSU_super_power9")
+
+(define_insn_reservation "power9-qpdiv" 56
+ (and (eq_attr "type" "vecdiv")
+ (eq_attr "power9_qp" "yes")
+ (eq_attr "cpu" "power9"))
+ "DU_super_power9,dfu_power9")
+
+(define_insn_reservation "power9-mffgpr" 2
+ (and (eq_attr "type" "mffgpr")
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,VSU_power9")
+
+(define_insn_reservation "power9-mftgpr" 2
+ (and (eq_attr "type" "mftgpr")
+ (eq_attr "cpu" "power9"))
+ "DU_slice_3_power9,VSU_power9")
+
+
+; Branch Unit
+; Move from LR/CTR are executed in BRU but consume a writeback port from an
+; execution slice.
+(define_insn_reservation "power9-mfjmpr" 6
+ (and (eq_attr "type" "mfjmpr")
+ (eq_attr "cpu" "power9"))
+ "DU_branch_power9,bru_power9+VSU_power9")
+
+; Branch is 2 cycles
+(define_insn_reservation "power9-branch" 2
+ (and (eq_attr "type" "jmpreg,branch")
+ (eq_attr "cpu" "power9"))
+ "DU_branch_power9,bru_power9")
+
+
+; Crytpo Unit
+(define_insn_reservation "power9-crypto" 6
+ (and (eq_attr "type" "crypto")
+ (eq_attr "cpu" "power9"))
+ "DU_super_power9,cryptu_power9")
+
+
+; HTM Unit
+(define_insn_reservation "power9-htm" 6
+ (and (eq_attr "type" "htm")
+ (eq_attr "cpu" "power9"))
+ "DU_C2_power9,LSU_power9")
+
+(define_insn_reservation "power9-htm-simple" 2
+ (and (eq_attr "type" "htmsimple")
+ (eq_attr "power9_alu2" "no")
+ (eq_attr "cpu" "power9"))
+ "DU_any_power9,VSU_power9")
+
+(define_insn_reservation "power9-htm-simple-alu2" 3
+ (and (eq_attr "type" "htmsimple")
+ (eq_attr "power9_alu2" "yes")
+ (eq_attr "cpu" "power9"))
+ "DU_any_power9,VSU_power9")
+
+; DFP Unit
+(define_insn_reservation "power9-dfp" 12
+ (and (eq_attr "type" "dfp")
+ (eq_attr "cpu" "power9"))
+ "DU_even_power9,dfu_power9")
+
===================================================================
@@ -50,6 +50,7 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rs
$(srcdir)/config/rs6000/power6.md \
$(srcdir)/config/rs6000/power7.md \
$(srcdir)/config/rs6000/power8.md \
+ $(srcdir)/config/rs6000/power9.md \
$(srcdir)/config/rs6000/cell.md \
$(srcdir)/config/rs6000/xfpu.md \
$(srcdir)/config/rs6000/a2.md \
===================================================================
@@ -292,7 +292,7 @@ (define_insn_reservation "power7-branch"
; VS Unit (includes FP/VSX/VMX/DFP)
(define_insn_reservation "power7-fp" 6
- (and (eq_attr "type" "fp,dmul")
+ (and (eq_attr "type" "fp,dmul,dfp")
(eq_attr "cpu" "power7"))
"DU_power7,VSU_power7")
===================================================================
@@ -1104,16 +1104,16 @@ struct processor_costs power9_cost = {
COSTS_N_INSNS (3), /* mulsi_const */
COSTS_N_INSNS (3), /* mulsi_const9 */
COSTS_N_INSNS (3), /* muldi */
- COSTS_N_INSNS (19), /* divsi */
- COSTS_N_INSNS (35), /* divdi */
+ COSTS_N_INSNS (8), /* divsi */
+ COSTS_N_INSNS (12), /* divdi */
COSTS_N_INSNS (3), /* fp */
COSTS_N_INSNS (3), /* dmul */
- COSTS_N_INSNS (14), /* sdiv */
- COSTS_N_INSNS (17), /* ddiv */
+ COSTS_N_INSNS (13), /* sdiv */
+ COSTS_N_INSNS (18), /* ddiv */
128, /* cache line size */
32, /* l1 cache */
- 256, /* l2 cache */
- 12, /* prefetch streams */
+ 512, /* l2 cache */
+ 8, /* prefetch streams */
COSTS_N_INSNS (3), /* SF->DF convert */
};
@@ -3841,22 +3841,7 @@ rs6000_option_override_internal (bool gl
if (rs6000_tune_index >= 0)
tune_index = rs6000_tune_index;
else if (have_cpu)
- {
- /* Until power9 tuning is available, use power8 tuning if -mcpu=power9. */
- if (processor_target_table[cpu_index].processor != PROCESSOR_POWER9)
- rs6000_tune_index = tune_index = cpu_index;
- else
- {
- size_t i;
- tune_index = -1;
- for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
- if (processor_target_table[i].processor == PROCESSOR_POWER8)
- {
- rs6000_tune_index = tune_index = i;
- break;
- }
- }
- }
+ rs6000_tune_index = tune_index = cpu_index;
else
{
size_t i;
@@ -4636,8 +4621,7 @@ rs6000_option_override_internal (bool gl
rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
|| rs6000_cpu == PROCESSOR_POWER5
|| rs6000_cpu == PROCESSOR_POWER7
- || rs6000_cpu == PROCESSOR_POWER8
- || rs6000_cpu == PROCESSOR_POWER9);
+ || rs6000_cpu == PROCESSOR_POWER8);
rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
|| rs6000_cpu == PROCESSOR_POWER5
|| rs6000_cpu == PROCESSOR_POWER6
@@ -29825,13 +29809,19 @@ output_function_profiler (FILE *file, in
/* The following variable value is the last issued insn. */
-static rtx last_scheduled_insn;
+static rtx_insn * last_scheduled_insn;
/* The following variable helps to balance issuing of load and
store instructions */
static int load_store_pendulum;
+/* The following variables are used to keep track of various scheduling
+ information. */
+static int divCnt;
+static int vec_load_pendulum;
+
+
/* Power4 load update and store update instructions are cracked into a
load or store and an integer insn which are executed in the same cycle.
Branches have their own dispatch slot which does not count against the
@@ -29906,7 +29896,7 @@ rs6000_adjust_cost (rtx_insn *insn, rtx
some cycles later. */
/* Separate a load from a narrower, dependent store. */
- if (rs6000_sched_groups
+ if ((rs6000_sched_groups || rs6000_cpu_attr == CPU_POWER9)
&& GET_CODE (PATTERN (insn)) == SET
&& GET_CODE (PATTERN (dep_insn)) == SET
&& GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
@@ -30144,6 +30134,8 @@ rs6000_adjust_cost (rtx_insn *insn, rtx
break;
}
}
+ /* Fall through, no cost for output dependency. */
+
case REG_DEP_ANTI:
/* Anti dependency; DEP_INSN reads a register that INSN writes some
cycles later. */
@@ -30516,8 +30508,9 @@ rs6000_issue_rate (void)
case CPU_POWER7:
return 5;
case CPU_POWER8:
- case CPU_POWER9:
return 7;
+ case CPU_POWER9:
+ return 6;
default:
return 1;
}
@@ -30675,6 +30668,28 @@ is_store_insn (rtx insn, rtx *str_mem)
return is_store_insn1 (PATTERN (insn), str_mem);
}
+/* Return whether TYPE is a Power9 pairable vector instruction type. */
+
+static bool
+is_power9_pairable_vec_type (enum attr_type type)
+{
+ switch (type)
+ {
+ case TYPE_VECSIMPLE:
+ case TYPE_VECCOMPLEX:
+ case TYPE_VECDIV:
+ case TYPE_VECCMP:
+ case TYPE_VECPERM:
+ case TYPE_VECFLOAT:
+ case TYPE_VECFDIV:
+ case TYPE_VECDOUBLE:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
/* Returns whether the dependence between INSN and NEXT is considered
costly by the given target. */
@@ -30786,6 +30801,10 @@ static int
rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
{
+ int pos;
+ int i;
+ rtx_insn *tmp;
+
if (sched_verbose)
fprintf (dump, "// rs6000_sched_reorder2 :\n");
@@ -30831,9 +30850,6 @@ rs6000_sched_reorder2 (FILE *dump, int s
*/
if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
{
- int pos;
- int i;
- rtx_insn *tmp;
rtx load_mem, str_mem;
if (is_store_insn (last_scheduled_insn, &str_mem))
@@ -30982,6 +30998,224 @@ rs6000_sched_reorder2 (FILE *dump, int s
}
}
+ /* Do Power9 dependent reordering if necessary. */
+ if (rs6000_cpu == PROCESSOR_POWER9 && last_scheduled_insn
+ && recog_memoized (last_scheduled_insn) >= 0)
+ {
+ enum attr_type type;
+
+ type = get_attr_type (last_scheduled_insn);
+
+ /* Try to issue fixed point divides back-to-back in pairs so they will
+ be routed to separate execution units and execute in parallel. */
+ if (type == TYPE_DIV && divCnt == 0)
+ {
+ /* First divide has been scheduled. */
+ divCnt = 1;
+
+ /* Scan the ready list looking for another divide, if found move it
+ to the end of the list so it is chosen next. */
+ pos = *pn_ready-1;
+ while (pos >= 0)
+ {
+ if (recog_memoized (ready[pos]) >= 0
+ && get_attr_type (ready[pos]) == TYPE_DIV)
+ {
+ tmp = ready[pos];
+ for (i = pos; i < *pn_ready-1; i++)
+ ready[i] = ready[i + 1];
+ ready[*pn_ready-1] = tmp;
+ break;
+ }
+ pos--;
+ }
+ }
+ else
+ {
+ /* Last insn was the 2nd divide or not a divide, reset the counter. */
+ divCnt = 0;
+
+ /* Power9 can execute 2 vector operations and 2 vector loads in a
+ single cycle. So try to pair up and alternate groups of vector and
+ vector load instructions.
+
+ To aid this formation, a counter is maintained to keep track of
+ vec/vecload insns issued. The value of vec_load_pendulum maintains
+ the current state with the following values:
+
+ 0 : Initial state, no vec/vecload group has been started.
+
+ -1 : 1 vector load has been issued and another has been found on
+ the ready list and moved to the end.
+
+ -2 : 2 vector loads have been issued and a vector operation has
+ been found and moved to the end of the ready list.
+
+ -3 : 2 vector loads and a vector insn have been issued and a
+ vector operation has been found and moved to the end of the
+ ready list.
+
+ 1 : 1 vector insn has been issued and another has been found and
+ moved to the end of the ready list.
+
+ 2 : 2 vector insns have been issued and a vector load has been
+ found and moved to the end of the ready list.
+
+ 3 : 2 vector insns and a vector load have been issued and another
+ vector load has been found and moved to the end of the ready
+ list.
+ */
+ if (type == TYPE_VECLOAD)
+ {
+ /* Issued a vecload. */
+ if (vec_load_pendulum == 0)
+ {
+ /* We issued a single vecload, look for another and move to
+ to the end of the ready list so it will be scheduled next.
+ Set pendulum if found. */
+ pos = *pn_ready-1;
+ while (pos >= 0)
+ {
+ if (recog_memoized (ready[pos]) >= 0
+ && get_attr_type (ready[pos]) == TYPE_VECLOAD)
+ {
+ tmp = ready[pos];
+ for (i = pos; i < *pn_ready-1; i++)
+ ready[i] = ready[i + 1];
+ ready[*pn_ready-1] = tmp;
+ vec_load_pendulum = -1;
+ return cached_can_issue_more;
+ }
+ pos--;
+ }
+ }
+ else if (vec_load_pendulum == -1)
+ {
+ /* This is the second vecload we've issued, search the ready
+ list for a vector operation so we can try to schedule a
+ pair of those next. If found move to the end of the ready
+ list so it is scheduled next and set the pendulum. */
+ pos = *pn_ready-1;
+ while (pos >= 0)
+ {
+ if (recog_memoized (ready[pos]) >= 0
+ && is_power9_pairable_vec_type (
+ get_attr_type (ready[pos])))
+ {
+ tmp = ready[pos];
+ for (i = pos; i < *pn_ready-1; i++)
+ ready[i] = ready[i + 1];
+ ready[*pn_ready-1] = tmp;
+ vec_load_pendulum = -2;
+ return cached_can_issue_more;
+ }
+ pos--;
+ }
+ }
+ else if (vec_load_pendulum == 2)
+ {
+ /* Two vector ops have been issued and we've just issued a
+ vecload, look for another vecload and move to end of ready
+ list if found. */
+ pos = *pn_ready-1;
+ while (pos >= 0)
+ {
+ if (recog_memoized (ready[pos]) >= 0
+ && get_attr_type (ready[pos]) == TYPE_VECLOAD)
+ {
+ tmp = ready[pos];
+ for (i = pos; i < *pn_ready-1; i++)
+ ready[i] = ready[i + 1];
+ ready[*pn_ready-1] = tmp;
+ /* Set pendulum so that next vecload will be seen as
+ finishing a group, not start of one. */
+ vec_load_pendulum = 3;
+ return cached_can_issue_more;
+ }
+ pos--;
+ }
+ }
+ }
+ else if (is_power9_pairable_vec_type (type))
+ {
+ /* Issued a vector operation. */
+ if (vec_load_pendulum == 0)
+ /* We issued a single vec op, look for another and move to
+ to the end of the ready list so it will be scheduled next.
+ Set pendulum if found. */
+ {
+ pos = *pn_ready-1;
+ while (pos >= 0)
+ {
+ if (recog_memoized (ready[pos]) >= 0
+ && is_power9_pairable_vec_type (
+ get_attr_type (ready[pos])))
+ {
+ tmp = ready[pos];
+ for (i = pos; i < *pn_ready-1; i++)
+ ready[i] = ready[i + 1];
+ ready[*pn_ready-1] = tmp;
+ vec_load_pendulum = 1;
+ return cached_can_issue_more;
+ }
+ pos--;
+ }
+ }
+ else if (vec_load_pendulum == 1)
+ {
+ /* This is the second vec op we've issued, search the ready
+ list for a vecload operation so we can try to schedule a
+ pair of those next. If found move to the end of the ready
+ list so it is scheduled next and set the pendulum. */
+ pos = *pn_ready-1;
+ while (pos >= 0)
+ {
+ if (recog_memoized (ready[pos]) >= 0
+ && get_attr_type (ready[pos]) == TYPE_VECLOAD)
+ {
+ tmp = ready[pos];
+ for (i = pos; i < *pn_ready-1; i++)
+ ready[i] = ready[i + 1];
+ ready[*pn_ready-1] = tmp;
+ vec_load_pendulum = 2;
+ return cached_can_issue_more;
+ }
+ pos--;
+ }
+ }
+ else if (vec_load_pendulum == -2)
+ {
+ /* Two vecload ops have been issued and we've just issued a
+ vec op, look for another vec op and move to end of ready
+ list if found. */
+ pos = *pn_ready-1;
+ while (pos >= 0)
+ {
+ if (recog_memoized (ready[pos]) >= 0
+ && is_power9_pairable_vec_type (
+ get_attr_type (ready[pos])))
+ {
+ tmp = ready[pos];
+ for (i = pos; i < *pn_ready-1; i++)
+ ready[i] = ready[i + 1];
+ ready[*pn_ready-1] = tmp;
+ /* Set pendulum so that next vec op will be seen as
+ finishing a group, not start of one. */
+ vec_load_pendulum = -3;
+ return cached_can_issue_more;
+ }
+ pos--;
+ }
+ }
+ }
+ }
+
+ /* We've either finished a vec/vecload group, couldn't find an insn to
+ continue the current group, or the last insn had nothing to do with
+ with a group. In any case, reset the pendulum. */
+ vec_load_pendulum = 0;
+ }
+
return cached_can_issue_more;
}
@@ -31150,7 +31384,6 @@ insn_must_be_first_in_group (rtx_insn *i
}
break;
case PROCESSOR_POWER8:
- case PROCESSOR_POWER9:
type = get_attr_type (insn);
switch (type)
@@ -31281,7 +31514,6 @@ insn_must_be_last_in_group (rtx_insn *in
}
break;
case PROCESSOR_POWER8:
- case PROCESSOR_POWER9:
type = get_attr_type (insn);
switch (type)
@@ -31400,7 +31632,7 @@ force_new_group (int sched_verbose, FILE
/* Do we have a special group ending nop? */
if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
- || rs6000_cpu_attr == CPU_POWER8 || rs6000_cpu_attr == CPU_POWER9)
+ || rs6000_cpu_attr == CPU_POWER8)
{
nop = gen_group_ending_nop ();
emit_insn_before (nop, next_insn);
@@ -31654,8 +31886,10 @@ rs6000_sched_init (FILE *dump ATTRIBUTE_
int sched_verbose ATTRIBUTE_UNUSED,
int max_ready ATTRIBUTE_UNUSED)
{
- last_scheduled_insn = NULL_RTX;
+ last_scheduled_insn = NULL;
load_store_pendulum = 0;
+ divCnt = 0;
+ vec_load_pendulum = 0;
}
/* The following function is called at the end of scheduling BB.
@@ -31699,8 +31933,10 @@ rs6000_sched_finish (FILE *dump, int sch
struct _rs6000_sched_context
{
short cached_can_issue_more;
- rtx last_scheduled_insn;
+ rtx_insn * last_scheduled_insn;
int load_store_pendulum;
+ int divCnt;
+ int vec_load_pendulum;
};
typedef struct _rs6000_sched_context rs6000_sched_context_def;
@@ -31723,14 +31959,18 @@ rs6000_init_sched_context (void *_sc, bo
if (clean_p)
{
sc->cached_can_issue_more = 0;
- sc->last_scheduled_insn = NULL_RTX;
+ sc->last_scheduled_insn = NULL;
sc->load_store_pendulum = 0;
+ sc->divCnt = 0;
+ sc->vec_load_pendulum = 0;
}
else
{
sc->cached_can_issue_more = cached_can_issue_more;
sc->last_scheduled_insn = last_scheduled_insn;
sc->load_store_pendulum = load_store_pendulum;
+ sc->divCnt = divCnt;
+ sc->vec_load_pendulum = vec_load_pendulum;
}
}
@@ -31745,6 +31985,8 @@ rs6000_set_sched_context (void *_sc)
cached_can_issue_more = sc->cached_can_issue_more;
last_scheduled_insn = sc->last_scheduled_insn;
load_store_pendulum = sc->load_store_pendulum;
+ divCnt = sc->divCnt;
+ vec_load_pendulum = sc->vec_load_pendulum;
}
/* Free _SC. */
===================================================================
@@ -1252,6 +1252,7 @@ (define_insn "vsx_smax<mode>3"
"VECTOR_UNIT_VSX_P (<MODE>mode)"
"xvmax<VSs> %x0,%x1,%x2"
[(set_attr "type" "<VStype_simple>")
+ (set_attr "power9_alu2" "yes")
(set_attr "fp_type" "<VSfptype_simple>")])
(define_insn "*vsx_smin<mode>3"
@@ -1261,6 +1262,7 @@ (define_insn "*vsx_smin<mode>3"
"VECTOR_UNIT_VSX_P (<MODE>mode)"
"xvmin<VSs> %x0,%x1,%x2"
[(set_attr "type" "<VStype_simple>")
+ (set_attr "power9_alu2" "yes")
(set_attr "fp_type" "<VSfptype_simple>")])
(define_insn "*vsx_sqrt<mode>2"
@@ -1421,6 +1423,7 @@ (define_insn "vsx_eq<mode>"
"VECTOR_UNIT_VSX_P (<MODE>mode)"
"xvcmpeq<VSs> %x0,%x1,%x2"
[(set_attr "type" "<VStype_simple>")
+ (set_attr "power9_alu2" "yes")
(set_attr "fp_type" "<VSfptype_simple>")])
(define_insn "vsx_gt<mode>"
@@ -1430,6 +1433,7 @@ (define_insn "vsx_gt<mode>"
"VECTOR_UNIT_VSX_P (<MODE>mode)"
"xvcmpgt<VSs> %x0,%x1,%x2"
[(set_attr "type" "<VStype_simple>")
+ (set_attr "power9_alu2" "yes")
(set_attr "fp_type" "<VSfptype_simple>")])
(define_insn "*vsx_ge<mode>"
@@ -1439,6 +1443,7 @@ (define_insn "*vsx_ge<mode>"
"VECTOR_UNIT_VSX_P (<MODE>mode)"
"xvcmpge<VSs> %x0,%x1,%x2"
[(set_attr "type" "<VStype_simple>")
+ (set_attr "power9_alu2" "yes")
(set_attr "fp_type" "<VSfptype_simple>")])
;; Compare vectors producing a vector result and a predicate, setting CR6 to
@@ -1454,7 +1459,8 @@ (define_insn "*vsx_eq_<mode>_p"
(match_dup 2)))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
"xvcmpeq<VSs>. %x0,%x1,%x2"
- [(set_attr "type" "<VStype_simple>")])
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "power9_alu2" "yes")])
(define_insn "*vsx_gt_<mode>_p"
[(set (reg:CC 74)
@@ -1467,7 +1473,8 @@ (define_insn "*vsx_gt_<mode>_p"
(match_dup 2)))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
"xvcmpgt<VSs>. %x0,%x1,%x2"
- [(set_attr "type" "<VStype_simple>")])
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "power9_alu2" "yes")])
(define_insn "*vsx_ge_<mode>_p"
[(set (reg:CC 74)
@@ -1480,7 +1487,8 @@ (define_insn "*vsx_ge_<mode>_p"
(match_dup 2)))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
"xvcmpge<VSs>. %x0,%x1,%x2"
- [(set_attr "type" "<VStype_simple>")])
+ [(set_attr "type" "<VStype_simple>")
+ (set_attr "power9_alu2" "yes")])
;; Vector select
(define_insn "*vsx_xxsel<mode>"
@@ -1667,7 +1675,8 @@ (define_insn "vsx_xscvspdpn"
UNSPEC_VSX_CVSPDPN))]
"TARGET_XSCVSPDPN"
"xscvspdpn %x0,%x1"
- [(set_attr "type" "fp")])
+ [(set_attr "type" "fp")
+ (set_attr "power9_alu2" "yes")])
(define_insn "vsx_xscvdpspn_scalar"
[(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,?wa")
@@ -1684,7 +1693,8 @@ (define_insn "vsx_xscvspdpn_directmove"
UNSPEC_VSX_CVSPDPN))]
"TARGET_XSCVSPDPN"
"xscvspdpn %x0,%x1"
- [(set_attr "type" "fp")])
+ [(set_attr "type" "fp")
+ (set_attr "power9_alu2" "yes")])
;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
===================================================================
@@ -511,7 +511,8 @@ (define_insn "altivec_vaddu<VI_char>s"
(set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
"<VI_unit>"
"vaddu<VI_char>s %0,%1,%2"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "power9_alu2" "yes")])
(define_insn "altivec_vadds<VI_char>s"
[(set (match_operand:VI 0 "register_operand" "=v")
@@ -521,7 +522,8 @@ (define_insn "altivec_vadds<VI_char>s"
(set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
"VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
"vadds<VI_char>s %0,%1,%2"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "power9_alu2" "yes")])
;; sub
(define_insn "sub<mode>3"
@@ -557,7 +559,8 @@ (define_insn "altivec_vsubu<VI_char>s"
(set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
"VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
"vsubu<VI_char>s %0,%1,%2"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "power9_alu2" "yes")])
(define_insn "altivec_vsubs<VI_char>s"
[(set (match_operand:VI 0 "register_operand" "=v")
@@ -567,7 +570,8 @@ (define_insn "altivec_vsubs<VI_char>s"
(set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
"VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
"vsubs<VI_char>s %0,%1,%2"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "power9_alu2" "yes")])
;;
(define_insn "altivec_vavgu<VI_char>"
@@ -577,7 +581,8 @@ (define_insn "altivec_vavgu<VI_char>"
UNSPEC_VAVGU))]
"TARGET_ALTIVEC"
"vavgu<VI_char> %0,%1,%2"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "power9_alu2" "yes")])
(define_insn "altivec_vavgs<VI_char>"
[(set (match_operand:VI 0 "register_operand" "=v")
@@ -586,7 +591,8 @@ (define_insn "altivec_vavgs<VI_char>"
UNSPEC_VAVGS))]
"VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
"vavgs<VI_char> %0,%1,%2"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "power9_alu2" "yes")])
(define_insn "altivec_vcmpbfp"
[(set (match_operand:V4SI 0 "register_operand" "=v")
@@ -595,7 +601,8 @@ (define_insn "altivec_vcmpbfp"
UNSPEC_VCMPBFP))]
"VECTOR_UNIT_ALTIVEC_P (V4SImode)"
"vcmpbfp %0,%1,%2"
- [(set_attr "type" "veccmp")])
+ [(set_attr "type" "veccmp")
+ (set_attr "power9_alu2" "yes")])
(define_insn "*altivec_eq<mode>"
[(set (match_operand:VI2 0 "altivec_register_operand" "=v")
@@ -627,7 +634,8 @@ (define_insn "*altivec_eqv4sf"
(match_operand:V4SF 2 "altivec_register_operand" "v")))]
"VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
"vcmpeqfp %0,%1,%2"
- [(set_attr "type" "veccmp")])
+ [(set_attr "type" "veccmp")
+ (set_attr "power9_alu2" "yes")])
(define_insn "*altivec_gtv4sf"
[(set (match_operand:V4SF 0 "altivec_register_operand" "=v")
@@ -635,7 +643,8 @@ (define_insn "*altivec_gtv4sf"
(match_operand:V4SF 2 "altivec_register_operand" "v")))]
"VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
"vcmpgtfp %0,%1,%2"
- [(set_attr "type" "veccmp")])
+ [(set_attr "type" "veccmp")
+ (set_attr "power9_alu2" "yes")])
(define_insn "*altivec_gev4sf"
[(set (match_operand:V4SF 0 "altivec_register_operand" "=v")
@@ -643,7 +652,8 @@ (define_insn "*altivec_gev4sf"
(match_operand:V4SF 2 "altivec_register_operand" "v")))]
"VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
"vcmpgefp %0,%1,%2"
- [(set_attr "type" "veccmp")])
+ [(set_attr "type" "veccmp")
+ (set_attr "power9_alu2" "yes")])
(define_insn "*altivec_vsel<mode>"
[(set (match_operand:VM 0 "altivec_register_operand" "=v")
@@ -854,7 +864,8 @@ (define_insn "umax<mode>3"
(match_operand:VI2 2 "register_operand" "v")))]
"<VI_unit>"
"vmaxu<VI_char> %0,%1,%2"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "power9_alu2" "yes")])
(define_insn "smax<mode>3"
[(set (match_operand:VI2 0 "register_operand" "=v")
@@ -862,7 +873,8 @@ (define_insn "smax<mode>3"
(match_operand:VI2 2 "register_operand" "v")))]
"<VI_unit>"
"vmaxs<VI_char> %0,%1,%2"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "power9_alu2" "yes")])
(define_insn "*altivec_smaxv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=v")
@@ -870,7 +882,8 @@ (define_insn "*altivec_smaxv4sf3"
(match_operand:V4SF 2 "register_operand" "v")))]
"VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
"vmaxfp %0,%1,%2"
- [(set_attr "type" "veccmp")])
+ [(set_attr "type" "veccmp")
+ (set_attr "power9_alu2" "yes")])
(define_insn "umin<mode>3"
[(set (match_operand:VI2 0 "register_operand" "=v")
@@ -878,7 +891,8 @@ (define_insn "umin<mode>3"
(match_operand:VI2 2 "register_operand" "v")))]
"<VI_unit>"
"vminu<VI_char> %0,%1,%2"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "power9_alu2" "yes")])
(define_insn "smin<mode>3"
[(set (match_operand:VI2 0 "register_operand" "=v")
@@ -886,7 +900,8 @@ (define_insn "smin<mode>3"
(match_operand:VI2 2 "register_operand" "v")))]
"<VI_unit>"
"vmins<VI_char> %0,%1,%2"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "power9_alu2" "yes")])
(define_insn "*altivec_sminv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=v")
@@ -894,7 +909,8 @@ (define_insn "*altivec_sminv4sf3"
(match_operand:V4SF 2 "register_operand" "v")))]
"VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
"vminfp %0,%1,%2"
- [(set_attr "type" "veccmp")])
+ [(set_attr "type" "veccmp")
+ (set_attr "power9_alu2" "yes")])
(define_insn "altivec_vmhaddshs"
[(set (match_operand:V8HI 0 "register_operand" "=v")
@@ -1614,7 +1630,8 @@ (define_insn "*altivec_vrl<VI_char>"
(match_operand:VI2 2 "register_operand" "v")))]
"<VI_unit>"
"vrl<VI_char> %0,%1,%2"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "power9_alu2" "yes")])
(define_insn "altivec_vsl"
[(set (match_operand:V4SI 0 "register_operand" "=v")
@@ -1658,7 +1675,8 @@ (define_insn "*altivec_vsl<VI_char>"
(match_operand:VI2 2 "register_operand" "v")))]
"<VI_unit>"
"vsl<VI_char> %0,%1,%2"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "power9_alu2" "yes")])
(define_insn "*altivec_vsr<VI_char>"
[(set (match_operand:VI2 0 "register_operand" "=v")
@@ -1666,7 +1684,8 @@ (define_insn "*altivec_vsr<VI_char>"
(match_operand:VI2 2 "register_operand" "v")))]
"<VI_unit>"
"vsr<VI_char> %0,%1,%2"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "power9_alu2" "yes")])
(define_insn "*altivec_vsra<VI_char>"
[(set (match_operand:VI2 0 "register_operand" "=v")
@@ -1674,7 +1693,8 @@ (define_insn "*altivec_vsra<VI_char>"
(match_operand:VI2 2 "register_operand" "v")))]
"<VI_unit>"
"vsra<VI_char> %0,%1,%2"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "power9_alu2" "yes")])
(define_insn "altivec_vsr"
[(set (match_operand:V4SI 0 "register_operand" "=v")
@@ -3463,6 +3483,7 @@ (define_insn "*p8v_clz<mode>2"
"TARGET_P8_VECTOR"
"vclz<wd> %0,%1"
[(set_attr "length" "4")
+ (set_attr "power9_alu2" "yes")
(set_attr "type" "vecsimple")])
;; Vector absolute difference unsigned
@@ -3490,6 +3511,7 @@ (define_insn "*p9v_ctz<mode>2"
"TARGET_P9_VECTOR"
"vctz<wd> %0,%1"
[(set_attr "length" "4")
+ (set_attr "power9_alu2" "yes")
(set_attr "type" "vecsimple")])
;; Vector population count
@@ -3499,6 +3521,7 @@ (define_insn "*p8v_popcount<mode>2"
"TARGET_P8_VECTOR"
"vpopcnt<wd> %0,%1"
[(set_attr "length" "4")
+ (set_attr "power9_alu2" "yes")
(set_attr "type" "vecsimple")])
;; Vector parity
@@ -3508,6 +3531,7 @@ (define_insn "*p9v_parity<mode>2"
"TARGET_P9_VECTOR"
"vprtyb<wd> %0,%1"
[(set_attr "length" "4")
+ (set_attr "power9_alu2" "yes")
(set_attr "type" "vecsimple")])
;; Vector Gather Bits by Bytes by Doubleword
===================================================================
@@ -58,7 +58,7 @@ (define_insn "extendsddd2"
(float_extend:DD (match_operand:SD 1 "gpc_reg_operand" "f")))]
"TARGET_DFP"
"dctdp %0,%1"
- [(set_attr "type" "fp")])
+ [(set_attr "type" "dfp")])
(define_expand "extendsdtd2"
[(set (match_operand:TD 0 "gpc_reg_operand" "=d")
@@ -76,7 +76,7 @@ (define_insn "truncddsd2"
(float_truncate:SD (match_operand:DD 1 "gpc_reg_operand" "d")))]
"TARGET_DFP"
"drsp %0,%1"
- [(set_attr "type" "fp")])
+ [(set_attr "type" "dfp")])
(define_expand "negdd2"
[(set (match_operand:DD 0 "gpc_reg_operand" "")
@@ -160,7 +160,7 @@ (define_insn "extendddtd2"
(float_extend:TD (match_operand:DD 1 "gpc_reg_operand" "d")))]
"TARGET_DFP"
"dctqpq %0,%1"
- [(set_attr "type" "fp")])
+ [(set_attr "type" "dfp")])
;; The result of drdpq is an even/odd register pair with the converted
;; value in the even register and zero in the odd register.
@@ -173,7 +173,7 @@ (define_insn "trunctddd2"
(clobber (match_scratch:TD 2 "=d"))]
"TARGET_DFP"
"drdpq %2,%1\;fmr %0,%2"
- [(set_attr "type" "fp")
+ [(set_attr "type" "dfp")
(set_attr "length" "8")])
(define_insn "adddd3"
@@ -182,7 +182,7 @@ (define_insn "adddd3"
(match_operand:DD 2 "gpc_reg_operand" "d")))]
"TARGET_DFP"
"dadd %0,%1,%2"
- [(set_attr "type" "fp")])
+ [(set_attr "type" "dfp")])
(define_insn "addtd3"
[(set (match_operand:TD 0 "gpc_reg_operand" "=d")
@@ -190,7 +190,7 @@ (define_insn "addtd3"
(match_operand:TD 2 "gpc_reg_operand" "d")))]
"TARGET_DFP"
"daddq %0,%1,%2"
- [(set_attr "type" "fp")])
+ [(set_attr "type" "dfp")])
(define_insn "subdd3"
[(set (match_operand:DD 0 "gpc_reg_operand" "=d")
@@ -198,7 +198,7 @@ (define_insn "subdd3"
(match_operand:DD 2 "gpc_reg_operand" "d")))]
"TARGET_DFP"
"dsub %0,%1,%2"
- [(set_attr "type" "fp")])
+ [(set_attr "type" "dfp")])
(define_insn "subtd3"
[(set (match_operand:TD 0 "gpc_reg_operand" "=d")
@@ -206,7 +206,7 @@ (define_insn "subtd3"
(match_operand:TD 2 "gpc_reg_operand" "d")))]
"TARGET_DFP"
"dsubq %0,%1,%2"
- [(set_attr "type" "fp")])
+ [(set_attr "type" "dfp")])
(define_insn "muldd3"
[(set (match_operand:DD 0 "gpc_reg_operand" "=d")
@@ -214,7 +214,7 @@ (define_insn "muldd3"
(match_operand:DD 2 "gpc_reg_operand" "d")))]
"TARGET_DFP"
"dmul %0,%1,%2"
- [(set_attr "type" "fp")])
+ [(set_attr "type" "dfp")])
(define_insn "multd3"
[(set (match_operand:TD 0 "gpc_reg_operand" "=d")
@@ -222,7 +222,7 @@ (define_insn "multd3"
(match_operand:TD 2 "gpc_reg_operand" "d")))]
"TARGET_DFP"
"dmulq %0,%1,%2"
- [(set_attr "type" "fp")])
+ [(set_attr "type" "dfp")])
(define_insn "divdd3"
[(set (match_operand:DD 0 "gpc_reg_operand" "=d")
@@ -230,7 +230,7 @@ (define_insn "divdd3"
(match_operand:DD 2 "gpc_reg_operand" "d")))]
"TARGET_DFP"
"ddiv %0,%1,%2"
- [(set_attr "type" "fp")])
+ [(set_attr "type" "dfp")])
(define_insn "divtd3"
[(set (match_operand:TD 0 "gpc_reg_operand" "=d")
@@ -238,7 +238,7 @@ (define_insn "divtd3"
(match_operand:TD 2 "gpc_reg_operand" "d")))]
"TARGET_DFP"
"ddivq %0,%1,%2"
- [(set_attr "type" "fp")])
+ [(set_attr "type" "dfp")])
(define_insn "*cmpdd_internal1"
[(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
@@ -246,7 +246,7 @@ (define_insn "*cmpdd_internal1"
(match_operand:DD 2 "gpc_reg_operand" "d")))]
"TARGET_DFP"
"dcmpu %0,%1,%2"
- [(set_attr "type" "fpcompare")])
+ [(set_attr "type" "dfp")])
(define_insn "*cmptd_internal1"
[(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
@@ -254,21 +254,21 @@ (define_insn "*cmptd_internal1"
(match_operand:TD 2 "gpc_reg_operand" "d")))]
"TARGET_DFP"
"dcmpuq %0,%1,%2"
- [(set_attr "type" "fpcompare")])
+ [(set_attr "type" "dfp")])
(define_insn "floatdidd2"
[(set (match_operand:DD 0 "gpc_reg_operand" "=d")
(float:DD (match_operand:DI 1 "gpc_reg_operand" "d")))]
"TARGET_DFP && TARGET_POPCNTD"
"dcffix %0,%1"
- [(set_attr "type" "fp")])
+ [(set_attr "type" "dfp")])
(define_insn "floatditd2"
[(set (match_operand:TD 0 "gpc_reg_operand" "=d")
(float:TD (match_operand:DI 1 "gpc_reg_operand" "d")))]
"TARGET_DFP"
"dcffixq %0,%1"
- [(set_attr "type" "fp")])
+ [(set_attr "type" "dfp")])
;; Convert a decimal64 to a decimal64 whose value is an integer.
;; This is the first stage of converting it to an integer type.
@@ -278,7 +278,7 @@ (define_insn "ftruncdd2"
(fix:DD (match_operand:DD 1 "gpc_reg_operand" "d")))]
"TARGET_DFP"
"drintn. 0,%0,%1,1"
- [(set_attr "type" "fp")])
+ [(set_attr "type" "dfp")])
;; Convert a decimal64 whose value is an integer to an actual integer.
;; This is the second stage of converting decimal float to integer type.
@@ -288,7 +288,7 @@ (define_insn "fixdddi2"
(fix:DI (match_operand:DD 1 "gpc_reg_operand" "d")))]
"TARGET_DFP"
"dctfix %0,%1"
- [(set_attr "type" "fp")])
+ [(set_attr "type" "dfp")])
;; Convert a decimal128 to a decimal128 whose value is an integer.
;; This is the first stage of converting it to an integer type.
@@ -298,7 +298,7 @@ (define_insn "ftrunctd2"
(fix:TD (match_operand:TD 1 "gpc_reg_operand" "d")))]
"TARGET_DFP"
"drintnq. 0,%0,%1,1"
- [(set_attr "type" "fp")])
+ [(set_attr "type" "dfp")])
;; Convert a decimal128 whose value is an integer to an actual integer.
;; This is the second stage of converting decimal float to integer type.
@@ -308,7 +308,7 @@ (define_insn "fixtddi2"
(fix:DI (match_operand:TD 1 "gpc_reg_operand" "d")))]
"TARGET_DFP"
"dctfixq %0,%1"
- [(set_attr "type" "fp")])
+ [(set_attr "type" "dfp")])
;; Decimal builtin support
@@ -333,7 +333,7 @@ (define_insn "dfp_ddedpd_<mode>"
UNSPEC_DDEDPD))]
"TARGET_DFP"
"ddedpd<dfp_suffix> %1,%0,%2"
- [(set_attr "type" "fp")])
+ [(set_attr "type" "dfp")])
(define_insn "dfp_denbcd_<mode>"
[(set (match_operand:D64_D128 0 "gpc_reg_operand" "=d")
@@ -342,7 +342,7 @@ (define_insn "dfp_denbcd_<mode>"
UNSPEC_DENBCD))]
"TARGET_DFP"
"denbcd<dfp_suffix> %1,%0,%2"
- [(set_attr "type" "fp")])
+ [(set_attr "type" "dfp")])
(define_insn "dfp_dxex_<mode>"
[(set (match_operand:D64_D128 0 "gpc_reg_operand" "=d")
@@ -350,7 +350,7 @@ (define_insn "dfp_dxex_<mode>"
UNSPEC_DXEX))]
"TARGET_DFP"
"dxex<dfp_suffix> %0,%1"
- [(set_attr "type" "fp")])
+ [(set_attr "type" "dfp")])
(define_insn "dfp_diex_<mode>"
[(set (match_operand:D64_D128 0 "gpc_reg_operand" "=d")
@@ -359,7 +359,7 @@ (define_insn "dfp_diex_<mode>"
UNSPEC_DXEX))]
"TARGET_DFP"
"diex<dfp_suffix> %0,%1,%2"
- [(set_attr "type" "fp")])
+ [(set_attr "type" "dfp")])
(define_insn "dfp_dscli_<mode>"
[(set (match_operand:D64_D128 0 "gpc_reg_operand" "=d")
@@ -368,7 +368,7 @@ (define_insn "dfp_dscli_<mode>"
UNSPEC_DSCLI))]
"TARGET_DFP"
"dscli<dfp_suffix> %0,%1,%2"
- [(set_attr "type" "fp")])
+ [(set_attr "type" "dfp")])
(define_insn "dfp_dscri_<mode>"
[(set (match_operand:D64_D128 0 "gpc_reg_operand" "=d")
@@ -377,4 +377,4 @@ (define_insn "dfp_dscri_<mode>"
UNSPEC_DSCRI))]
"TARGET_DFP"
"dscri<dfp_suffix> %0,%1,%2"
- [(set_attr "type" "fp")])
+ [(set_attr "type" "dfp")])
===================================================================
@@ -107,4 +107,5 @@ (define_insn "crypto_vshasigma<CR_char>"
UNSPEC_VSHASIGMA))]
"TARGET_CRYPTO"
"vshasigma<CR_char> %0,%1,%2,%3"
- [(set_attr "type" "crypto")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "power9_alu2" "yes")])
===================================================================
@@ -183,7 +183,7 @@ (define_attr "type"
brinc,
vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,
vecfloat,vecfdiv,vecdouble,mffgpr,mftgpr,crypto,
- htm"
+ htm,htmsimple,dfp"
(const_string "integer"))
;; What data size does this instruction work on?
@@ -275,6 +275,13 @@ (define_attr "cell_micro" "not,condition
(const_string "always")
(const_string "not")))
+;; Is this instruction a Power9 ALU2 insn?
+(define_attr "power9_alu2" "no,yes" (const_string "no"))
+
+;; Define attribute for insn mnemonic
+(define_attr "mnemonic" "unknown" (const_string "unknown"))
+
+
(automata_option "ndfa")
(include "rs64.md")
@@ -298,6 +305,7 @@ (define_attr "cell_micro" "not,condition
(include "power6.md")
(include "power7.md")
(include "power8.md")
+(include "power9.md")
(include "cell.md")
(include "xfpu.md")
(include "a2.md")
@@ -4510,7 +4518,8 @@ (define_insn "*cmp<mode>_fpr"
"@
fcmpu %0,%1,%2
xscmpudp %0,%x1,%x2"
- [(set_attr "type" "fpcompare")])
+ [(set_attr "type" "fpcompare")
+ (set_attr "power9_alu2" "yes")])
;; Floating point conversions
(define_expand "extendsfdf2"
@@ -4830,7 +4839,8 @@ (define_insn "*fpmask<mode>"
(match_operand:V2DI 5 "zero_constant" "")))]
"TARGET_P9_MINMAX"
"xscmp%V1dp %x0,%x2,%x3"
- [(set_attr "type" "fpcompare")])
+ [(set_attr "type" "fpcompare")
+ (set_attr "power9_alu2" "yes")])
(define_insn "*xxsel<mode>"
[(set (match_operand:SFDF 0 "vsx_register_operand" "=<Fv>")
@@ -13658,7 +13668,7 @@ (define_insn "*cmp<mode>_hw"
(match_operand:IEEE128 2 "altivec_register_operand" "v")))]
"TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
"xscmpuqp %0,%1,%2"
- [(set_attr "type" "fpcompare")])
+ [(set_attr "type" "veccmp")])