===================================================================
@@ -3067,16 +3067,29 @@ (define_expand "vsx_mergeh_<mode>"
})
;; V2DF/V2DI splat
-(define_insn "vsx_splat_<mode>"
- [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>,<VSa>,we")
+(define_insn_and_split "vsx_splat_<mode>"
+ [(set (match_operand:VSX_D 0 "vsx_register_operand"
+ "=<VSa>, <VSa>,we,<VS_64dm>")
(vec_duplicate:VSX_D
- (match_operand:<VS_scalar> 1 "splat_input_operand" "<VS_64reg>,Z,b")))]
+ (match_operand:<VS_scalar> 1 "splat_input_operand"
+ "<VS_64reg>,Z, b, wr")))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
"@
xxpermdi %x0,%x1,%x1,0
lxvdsx %x0,%y1
- mtvsrdd %x0,%1,%1"
- [(set_attr "type" "vecperm,vecload,vecperm")])
+ mtvsrdd %x0,%1,%1
+ #"
+ "&& reload_completed && TARGET_POWERPC64 && !TARGET_P9_VECTOR
+ && int_reg_operand (operands[1], <VS_scalar>mode)"
+ [(set (match_dup 2)
+ (match_dup 1))
+ (set (match_dup 0)
+ (vec_duplicate:VSX_D (match_dup 2)))]
+{
+ operands[2] = gen_rtx_REG (<VS_scalar>mode, reg_or_subregno (operands[0]));
+}
+ [(set_attr "type" "vecperm,vecload,vecperm,vecperm")
+ (set_attr "length" "4,4,4,8")])
;; V4SI splat support
(define_insn "vsx_splat_v4si"
===================================================================
@@ -0,0 +1,60 @@
+// { dg-do compile { target { powerpc64*-*-* && lp64 } } }
+// { dg-require-effective-target powerpc_p8vector_ok } */
+// { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } }
+// { dg-options "-mcpu=power8 -O3 -fstack-protector -mno-lra" }
+
+// PAR target/71294 failed because RELOAD could not figure how create a V2DI
+// vector that auto vectorization created with each element being the same
+// stack address, with stack-protector turned on.
+
+class A;
+template <typename _Tp, int m, int n> class B {
+public:
+ _Tp val[m * n];
+};
+class C {
+public:
+ C(A);
+};
+struct D {
+ D();
+ unsigned long &operator[](int);
+ unsigned long *p;
+};
+class A {
+public:
+ template <typename _Tp, int m, int n> A(const B<_Tp, m, n> &, bool);
+ int rows, cols;
+ unsigned char *data;
+ unsigned char *datastart;
+ unsigned char *dataend;
+ unsigned char *datalimit;
+ D step;
+};
+template <typename _Tp, int m, int n>
+A::A(const B<_Tp, m, n> &p1, bool)
+ : rows(m), cols(n) {
+ step[0] = cols * sizeof(_Tp);
+ datastart = data = (unsigned char *)p1.val;
+ datalimit = dataend = datastart + rows * step[0];
+}
+class F {
+public:
+ static void compute(C);
+ template <typename _Tp, int m, int n, int nm>
+ static void compute(const B<_Tp, m, n> &, B<_Tp, nm, 1> &, B<_Tp, m, nm> &,
+ B<_Tp, n, nm> &);
+};
+D::D() {}
+unsigned long &D::operator[](int p1) { return p[p1]; }
+template <typename _Tp, int m, int n, int nm>
+void F::compute(const B<_Tp, m, n> &, B<_Tp, nm, 1> &, B<_Tp, m, nm> &,
+ B<_Tp, n, nm> &p4) {
+ A a(p4, false);
+ compute(a);
+}
+void fn1() {
+ B<double, 4, 4> b, c, e;
+ B<double, 4, 1> d;
+ F::compute(b, d, c, e);
+}