new file mode 100644
@@ -0,0 +1,128 @@
+;; Vector Quad, Vector Pair, and MMA patterns.
+;; Copyright (C) 2020 Free Software Foundation, Inc.
+;; Contributed by Peter Bergner <bergner@linux.ibm.com> and
+;; Michael Meissner <meissner@linux.ibm.com>
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Vector load/store pair operations
+;; We need to define an OImode move pattern, even though we don't enable it,
+;; because the machine independent parts of the compiler at times uses the
+;; large integer modes.
+;;
+;; If we enable movoi, the compiler will try and use it. Unfortunately, if it
+;; is enabled, it will cause problems on little endian systems with code that
+;; uses the vector_size attribute, due to endian issues.
+(define_expand "movoi"
+ [(set (match_operand:OI 0 "nonimmediate_operand")
+ (match_operand:OI 1 "input_operand"))]
+ "0"
+{
+ gcc_unreachable ();
+})
+
+;; Vector pair support. POImode is only defined for vector registers.
+(define_expand "movpoi"
+ [(set (match_operand:POI 0 "nonimmediate_operand")
+ (match_operand:POI 1 "input_operand"))]
+ "TARGET_MMA"
+{
+ rs6000_emit_move (operands[0], operands[1], POImode);
+ DONE;
+})
+
+(define_insn_and_split "*movpoi"
+ [(set (match_operand:POI 0 "nonimmediate_operand" "=wa,m,wa")
+ (match_operand:POI 1 "input_operand" "m,wa,wa"))]
+ "TARGET_MMA
+ && (gpc_reg_operand (operands[0], POImode)
+ || gpc_reg_operand (operands[1], POImode))"
+ "@
+ lxvp%X1 %x0,%1
+ stxvp%X0 %x1,%0
+ #"
+ "&& reload_completed
+ && (!MEM_P (operands[0]) && !MEM_P (operands[1]))"
+ [(const_int 0)]
+{
+ rs6000_split_multireg_move (operands[0], operands[1]);
+ DONE;
+}
+ [(set_attr "type" "vecload,vecstore,veclogical")
+ (set_attr "length" "*,*,8")])
+
+;; Special pattern to prevent DSE from generating an internal error if it
+;; notices a structure copy that it wants to eliminate. This generates pretty
+;; bad code, but at least it doesn't die.
+(define_insn_and_split "truncpoidi2"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+ (truncate:DI (match_operand:POI 1 "gpc_reg_operand" "wa")))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (vec_select:DI (match_dup 2)
+ (parallel [(match_dup 3)])))]
+{
+ unsigned r = reg_or_subregno (operands[1]) + !BYTES_BIG_ENDIAN;
+ operands[2] = gen_rtx_REG (V2DImode, r);
+ operands[3] = BYTES_BIG_ENDIAN ? const1_rtx : const0_rtx;
+})
+
+
+;; Vector quad load/store operations
+;; We need to define an XImode move pattern, even though we don't enable it,
+;; because the machine independent parts of the compiler at times uses the
+;; large integer modes.
+;;
+;; If we enable movxi, the compiler will try and use it. Unfortunately, if it
+;; is enabled, it will cause problems on little endian systems with code that
+;; uses the vector_size attribute, due to endian issues.
+(define_expand "movxi"
+ [(set (match_operand:XI 0 "nonimmediate_operand")
+ (match_operand:XI 1 "input_operand"))]
+ "0"
+{
+ gcc_unreachable ();
+})
+
+;; Vector quad support. PXImode is only defined for floating point registers.
+(define_expand "movpxi"
+ [(set (match_operand:PXI 0 "nonimmediate_operand")
+ (match_operand:PXI 1 "input_operand"))]
+ "TARGET_MMA"
+{
+ rs6000_emit_move (operands[0], operands[1], PXImode);
+ DONE;
+})
+
+(define_insn_and_split "*movpxi"
+ [(set (match_operand:PXI 0 "nonimmediate_operand" "=d,m,d")
+ (match_operand:PXI 1 "input_operand" "m,d,d"))]
+ "TARGET_MMA
+ && (gpc_reg_operand (operands[0], PXImode)
+ || gpc_reg_operand (operands[1], PXImode))"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rs6000_split_multireg_move (operands[0], operands[1]);
+ DONE;
+}
+ [(set_attr "type" "vecload,vecstore,veclogical")
+ (set_attr "length" "8,8,16")
+ (set_attr "max_prefixed_insns" "2,2,*")])
@@ -593,6 +593,10 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags,
PROCESSOR_CELL) (e.g. -mcpu=cell). */
if ((bu_mask & RS6000_BTM_CELL) != 0)
rs6000_define_or_undefine_macro (define_p, "__PPU__");
+
+ /* Tell the user if we support the MMA instructions. */
+ if ((flags & OPTION_MASK_MMA) != 0)
+ rs6000_define_or_undefine_macro (define_p, "__MMA__");
}
void
@@ -12205,6 +12205,24 @@ rs6000_init_builtins (void)
else
ieee128_float_type_node = ibm128_float_type_node = long_double_type_node;
+ /* Vector paired and vector quad support. */
+ if (TARGET_MMA)
+ {
+ tree oi_uns_type = make_unsigned_type (256);
+ vector_pair_type_node = build_distinct_type_copy (oi_uns_type);
+ SET_TYPE_MODE (vector_pair_type_node, POImode);
+ layout_type (vector_pair_type_node);
+ lang_hooks.types.register_builtin_type (vector_pair_type_node,
+ "__vector_pair");
+
+ tree xi_uns_type = make_unsigned_type (512);
+ vector_quad_type_node = build_distinct_type_copy (xi_uns_type);
+ SET_TYPE_MODE (vector_quad_type_node, PXImode);
+ layout_type (vector_quad_type_node);
+ lang_hooks.types.register_builtin_type (vector_quad_type_node,
+ "__vector_quad");
+ }
+
/* Initialize the modes for builtin_function_type, mapping a machine mode to
tree type node. */
builtin_mode_to_type[QImode][0] = integer_type_node;
@@ -12236,6 +12254,8 @@ rs6000_init_builtins (void)
builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
+ builtin_mode_to_type[POImode][1] = vector_pair_type_node;
+ builtin_mode_to_type[PXImode][1] = vector_quad_type_node;
tdecl = add_builtin_type ("__bool char", bool_char_type_node);
TYPE_NAME (bool_char_type_node) = tdecl;
@@ -76,7 +76,8 @@
| OPTION_MASK_P9_VECTOR)
/* Flags that need to be turned off if -mno-future. */
-#define OTHER_FUTURE_MASKS (OPTION_MASK_PCREL \
+#define OTHER_FUTURE_MASKS (OPTION_MASK_MMA \
+ | OPTION_MASK_PCREL \
| OPTION_MASK_PREFIXED)
/* Support for a future processor's features. */
@@ -132,6 +133,7 @@
| OPTION_MASK_HTM \
| OPTION_MASK_ISEL \
| OPTION_MASK_MFCRF \
+ | OPTION_MASK_MMA \
| OPTION_MASK_MODULO \
| OPTION_MASK_MULHW \
| OPTION_MASK_NO_UPDATE \
@@ -82,3 +82,13 @@ VECTOR_MODE (INT, SI, 2); /* V2SI */
for quad memory atomic operations to force getting an even/odd register
combination. */
PARTIAL_INT_MODE (TI, 128, PTI);
+
+/* Define, but don't use the larger integer modes. We need an integer mode
+ defined that is the same size as the vector pair and vector quad modes. */
+
+INT_MODE (OI, 32);
+INT_MODE (XI, 64);
+
+/* Modes used by __vector_pair and __vector_quad. */
+PARTIAL_INT_MODE (OI, 256, POI); /* __vector_pair. */
+PARTIAL_INT_MODE (XI, 512, PXI); /* __vector_quad. */
@@ -1745,6 +1745,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
#undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
#define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
rs6000_cannot_substitute_mem_equiv_p
+
+#undef TARGET_INVALID_CONVERSION
+#define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
/* Processor table. */
@@ -1798,7 +1801,7 @@ rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
128-bit floating point that can go in vector registers, which has VSX
memory addressing. */
if (FP_REGNO_P (regno))
- reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
+ reg_size = (VECTOR_MEM_VSX_P (mode) || VECTOR_ALIGNMENT_P (mode)
? UNITS_PER_VSX_WORD
: UNITS_PER_FP_WORD);
@@ -1821,6 +1824,20 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
if (COMPLEX_MODE_P (mode))
mode = GET_MODE_INNER (mode);
+ /* Vector pair modes need even/odd VSX register pairs. Only allow vector
+ registers. We need to allow OImode to have the same registers as POImode,
+ even though we do not enable the move pattern for OImode. */
+ if (mode == POImode || mode == OImode)
+ return (TARGET_MMA && VSX_REGNO_P (regno)
+ && (regno & 1) == 0);
+
+ /* MMA accumulator modes need FPR registers divisible by 4. We need to allow
+ XImode to have the same registers as PXImode, even though we do not enable
+ the move pattern for XImode. */
+ if (mode == PXImode || mode == XImode)
+ return (TARGET_MMA && FP_REGNO_P (regno)
+ && (regno & 3) == 0);
+
/* PTImode can only go in GPRs. Quad word memory operations require even/odd
register combinations, and use PTImode where we need to deal with quad
word memory operations. Don't allow quad words in the argument or frame
@@ -1836,7 +1853,7 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
asked for it. */
if (TARGET_VSX && VSX_REGNO_P (regno)
&& (VECTOR_MEM_VSX_P (mode)
- || FLOAT128_VECTOR_P (mode)
+ || VECTOR_ALIGNMENT_P (mode)
|| reg_addr[mode].scalar_in_vmx_p
|| mode == TImode
|| (TARGET_VADDUQM && mode == V1TImode)))
@@ -1846,7 +1863,7 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
if (ALTIVEC_REGNO_P (regno))
{
- if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
+ if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p)
return 0;
return ALTIVEC_REGNO_P (last_regno);
@@ -1862,7 +1879,7 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
modes and DImode. */
if (FP_REGNO_P (regno))
{
- if (FLOAT128_VECTOR_P (mode))
+ if (VECTOR_ALIGNMENT_P (mode))
return false;
if (SCALAR_FLOAT_MODE_P (mode)
@@ -1925,15 +1942,19 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
GPR registers, and TImode can go in any GPR as well as VSX registers (PR
57744).
+ Similarly, don't allow POImode (vector pair, restricted to even VSX
+ registers) or PXImode (vector quad, restricted to FPR registers divisible
+ by 4) to tie with other modes.
+
Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
128-bit floating point on VSX systems ties with other vectors. */
static bool
rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
{
- if (mode1 == PTImode)
- return mode2 == PTImode;
- if (mode2 == PTImode)
+ if (mode1 == PTImode || mode1 == POImode || mode1 == PXImode)
+ return mode1 == mode2;
+ if (mode2 == PTImode || mode2 == POImode || mode2 == PXImode)
return false;
if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
@@ -2206,6 +2227,8 @@ rs6000_debug_reg_global (void)
SDmode,
DDmode,
TDmode,
+ V2SImode,
+ V2SFmode,
V16QImode,
V8HImode,
V4SImode,
@@ -2220,9 +2243,14 @@ rs6000_debug_reg_global (void)
V2DFmode,
V8SFmode,
V4DFmode,
+ OImode,
+ XImode,
+ POImode,
+ PXImode,
CCmode,
CCUNSmode,
CCEQmode,
+ CCFPmode,
};
/* Virtual regs we are interested in. */
@@ -2619,7 +2647,7 @@ rs6000_setup_reg_addr_masks (void)
&& (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
&& msize <= 8
&& !VECTOR_MODE_P (m2)
- && !FLOAT128_VECTOR_P (m2)
+ && !VECTOR_ALIGNMENT_P (m2)
&& !complex_p
&& (m != E_DFmode || !TARGET_VSX)
&& (m != E_SFmode || !TARGET_P8_VECTOR)
@@ -2675,6 +2703,22 @@ rs6000_setup_reg_addr_masks (void)
addr_mask |= RELOAD_REG_QUAD_OFFSET;
}
+ /* Vector pairs can do both indexed and offset loads if the
+ instructions are enabled, otherwise they can only do offset loads
+ since it will be broken into two vector moves. Vector quads can
+ only do offset loads. */
+ else if ((addr_mask != 0) && TARGET_MMA
+ && (m2 == POImode || m2 == PXImode))
+ {
+ addr_mask |= RELOAD_REG_OFFSET;
+ if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
+ {
+ addr_mask |= RELOAD_REG_QUAD_OFFSET;
+ if (m2 == POImode)
+ addr_mask |= RELOAD_REG_INDEXED;
+ }
+ }
+
/* VMX registers can do (REG & -16) and ((REG+REG) & -16)
addressing on 128-bit types. */
if (rc == RELOAD_REG_VMX && msize == 16
@@ -2876,6 +2920,18 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
rs6000_vector_align[TImode] = align64;
}
+ /* Add support for vector pairs and vector quad registers. */
+ if (TARGET_MMA)
+ {
+ for (m = 0; m < NUM_MACHINE_MODES; ++m)
+ if (m == POImode || m == PXImode)
+ {
+ rs6000_vector_unit[m] = VECTOR_NONE;
+ rs6000_vector_mem[m] = VECTOR_VSX;
+ rs6000_vector_align[m] = (m == POImode) ? 256 : 512;
+ }
+ }
+
/* Register class constraints for the constraints that depend on compile
switches. When the VSX code was added, different constraints were added
based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
@@ -3007,6 +3063,14 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
}
+
+ if (TARGET_MMA)
+ {
+ reg_addr[POImode].reload_store = CODE_FOR_reload_poi_di_store;
+ reg_addr[POImode].reload_load = CODE_FOR_reload_poi_di_load;
+ reg_addr[PXImode].reload_store = CODE_FOR_reload_pxi_di_store;
+ reg_addr[PXImode].reload_load = CODE_FOR_reload_pxi_di_load;
+ }
}
}
else
@@ -3339,7 +3403,8 @@ rs6000_builtin_mask_calculate (void)
&& !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
| ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
| ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0)
- | ((TARGET_FUTURE) ? RS6000_BTM_FUTURE : 0));
+ | ((TARGET_MMA) ? RS6000_BTM_MMA : 0)
+ | ((TARGET_FUTURE) ? RS6000_BTM_FUTURE : 0));
}
/* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
@@ -4202,6 +4267,15 @@ rs6000_option_override_internal (bool global_init_p)
rs6000_isa_flags &= ~OPTION_MASK_PCREL;
}
+ /* Turn off vector pair/mma options on non-future systems. */
+ if (!TARGET_FUTURE && TARGET_MMA)
+ {
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
+ error ("%qs requires %qs", "-mmma", "-mcpu=future");
+
+ rs6000_isa_flags &= ~OPTION_MASK_MMA;
+ }
+
if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
@@ -7175,7 +7249,7 @@ rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
return (STRICT_ALIGNMENT
|| (!TARGET_EFFICIENT_UNALIGNED_VSX
&& ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
- || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
+ || ((VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode))
&& (int) align < VECTOR_ALIGN (mode)))));
}
@@ -7360,7 +7434,7 @@ quad_address_p (rtx addr, machine_mode mode, bool strict)
{
rtx op0, op1;
- if (GET_MODE_SIZE (mode) != 16)
+ if (GET_MODE_SIZE (mode) < 16)
return false;
if (legitimate_indirect_address_p (addr, strict))
@@ -7678,6 +7752,12 @@ reg_offset_addressing_ok_p (machine_mode mode)
return mode_supports_dq_form (mode);
break;
+ /* The vector pair/quad types support offset addressing if the
+ underlying vectors support offset addressing. */
+ case E_POImode:
+ case E_PXImode:
+ return TARGET_MMA;
+
case E_SDmode:
/* If we can do direct load/stores of SDmode, restrict it to reg+reg
addressing for the LFIWZX and STFIWX instructions. */
@@ -8024,8 +8104,14 @@ legitimate_indexed_address_p (rtx x, int strict)
bool
avoiding_indexed_address_p (machine_mode mode)
{
- /* Avoid indexed addressing for modes that have non-indexed
- load/store instruction forms. */
+ unsigned int msize = GET_MODE_SIZE (mode);
+
+ /* Avoid indexed addressing for modes that have non-indexed load/store
+ instruction forms. On the future system, vector pairs have an indexed
+ form, but vector quads don't. */
+ if (msize > 16)
+ return msize != 32;
+
return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
}
@@ -9856,6 +9942,13 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
operands[1] = force_const_mem (mode, operands[1]);
break;
+ case E_POImode:
+ case E_PXImode:
+ if (CONSTANT_P (operands[1]))
+ error ("%qs is an opaque type, and you can't set it to other values.",
+ (mode == POImode) ? "__vector_pair" : "__vector_quad");
+ break;
+
case E_SImode:
case E_DImode:
/* Use default pattern for address of ELF small data */
@@ -12117,8 +12210,20 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
return NO_REGS;
}
- if (GET_MODE_CLASS (mode) == MODE_INT && rclass == GEN_OR_FLOAT_REGS)
- return GENERAL_REGS;
+ /* For the vector pair and vector quad modes, prefer their natural register
+ (VSX or FPR) rather than GPR registers. For other integer types, prefer
+ the GPR registers. */
+ if (rclass == GEN_OR_FLOAT_REGS)
+ {
+ if (mode == POImode)
+ return VSX_REGS;
+
+ if (mode == PXImode)
+ return FLOAT_REGS;
+
+ if (GET_MODE_CLASS (mode) == MODE_INT)
+ return GENERAL_REGS;
+ }
return rclass;
}
@@ -15793,7 +15898,23 @@ rs6000_split_multireg_move (rtx dst, rtx src)
reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
mode = GET_MODE (dst);
nregs = hard_regno_nregs (reg, mode);
- if (FP_REGNO_P (reg))
+ /* If we have a quad vector register for MMA, and this is a load or store,
+ see if we can use vector paired load/stores. */
+ if (mode == PXImode && TARGET_MMA
+ && (MEM_P (dst) || MEM_P (src)))
+ {
+ reg_mode = POImode;;
+ nregs /= hard_regno_nregs (reg, reg_mode);
+ }
+
+ /* If we have a vector pair/quad mode, split it into two/four separate
+ vectors. */
+ else if (mode == POImode || mode == PXImode)
+ {
+ reg_mode = V1TImode;
+ nregs /= hard_regno_nregs (reg, reg_mode);
+ }
+ else if (FP_REGNO_P (reg))
reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
(TARGET_HARD_FLOAT ? DFmode : SFmode);
else if (ALTIVEC_REGNO_P (reg))
@@ -15837,6 +15958,48 @@ rs6000_split_multireg_move (rtx dst, rtx src)
return;
}
+ /* For __vector_pair and __vector_quad modes we have to load or store the
+ registers so that things are properly swapped in little endian mode.
+ This means the last register gets the first memory location. */
+ if (!WORDS_BIG_ENDIAN && (mode == POImode || mode == PXImode))
+ {
+ if (MEM_P (dst))
+ {
+ unsigned offset = 0;
+ unsigned size = GET_MODE_SIZE (reg_mode);
+
+ for (int i = nregs - 1; i >= 0; i--)
+ {
+ rtx dst2 = adjust_address (dst, reg_mode, offset);
+ rtx src2 = simplify_gen_subreg (reg_mode, src, mode, i * size);
+ offset += size;
+
+ emit_insn (gen_rtx_SET (dst2, src2));
+ }
+
+ return;
+ }
+
+ if (MEM_P (src))
+ {
+ unsigned offset = 0;
+ unsigned size = GET_MODE_SIZE (reg_mode);
+
+ for (int i = nregs - 1; i >= 0; i--)
+ {
+ rtx dst2 = simplify_gen_subreg (reg_mode, dst, mode, i * size);
+ rtx src2 = adjust_address (src, reg_mode, offset);
+ offset += size;
+
+ emit_insn (gen_rtx_SET (dst2, src2));
+ }
+
+ return;
+ }
+
+ /* Register -> register moves can use common code. */
+ }
+
if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
{
/* Move register range backwards, if we might have destructive
@@ -19227,7 +19390,8 @@ rs6000_handle_altivec_attribute (tree *node,
/* AltiVec defines five built-in scalar types that serve as vector
elements; we must teach the compiler how to mangle them. The 128-bit
- floating point mangling is target-specific as well. */
+ floating point mangling is target-specific as well. MMA defines
+ two built-in types to be used as opaque vector types. */
static const char *
rs6000_mangle_type (const_tree type)
@@ -19249,6 +19413,9 @@ rs6000_mangle_type (const_tree type)
if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
+ if (type == vector_pair_type_node) return "u13__vector_pair";
+ if (type == vector_quad_type_node) return "u13__vector_quad";
+
/* For all other types, use the default mangling. */
return NULL;
}
@@ -22506,7 +22673,7 @@ rs6000_function_value (const_tree valtype,
/* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
return register is used in both cases, and we won't see V2DImode/V2DFmode
for pure altivec, combine the two cases. */
- else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
+ else if ((TREE_CODE (valtype) == VECTOR_TYPE || VECTOR_ALIGNMENT_P (mode))
&& TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
&& ALTIVEC_OR_VSX_VECTOR_MODE (mode))
regno = ALTIVEC_ARG_RETURN;
@@ -22922,6 +23089,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
{ "isel", OPTION_MASK_ISEL, false, true },
{ "mfcrf", OPTION_MASK_MFCRF, false, true },
{ "mfpgpr", 0, false, true },
+ { "mma", OPTION_MASK_MMA, false, true },
{ "modulo", OPTION_MASK_MODULO, false, true },
{ "mulhw", OPTION_MASK_MULHW, false, true },
{ "multiple", OPTION_MASK_MULTIPLE, false, true },
@@ -22992,6 +23160,8 @@ static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
{ "powerpc64", RS6000_BTM_POWERPC64, false, false },
{ "float128", RS6000_BTM_FLOAT128, false, false },
{ "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
+ { "mma", RS6000_BTM_MMA, false, false },
+ { "future", RS6000_BTM_FUTURE, false, false },
};
/* Option variables that we want to support inside attribute((target)) and
@@ -24947,7 +25117,7 @@ address_to_insn_form (rtx addr,
non_prefixed_format = NON_PREFIXED_DS;
else if (TARGET_VSX && size >= 16
- && (VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode)))
+ && (VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode)))
non_prefixed_format = NON_PREFIXED_DQ;
else
@@ -25076,7 +25246,7 @@ reg_to_non_prefixed (rtx reg, machine_mode mode)
else if (TARGET_VSX && size >= 16
&& (VECTOR_MODE_P (mode)
- || FLOAT128_VECTOR_P (mode)
+ || VECTOR_ALIGNMENT_P (mode)
|| mode == TImode || mode == CTImode))
return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
@@ -25100,7 +25270,7 @@ reg_to_non_prefixed (rtx reg, machine_mode mode)
else if (TARGET_VSX && size >= 16
&& (VECTOR_MODE_P (mode)
- || FLOAT128_VECTOR_P (mode)
+ || VECTOR_ALIGNMENT_P (mode)
|| mode == TImode || mode == CTImode))
return NON_PREFIXED_DQ;
@@ -26494,6 +26664,45 @@ rs6000_cannot_substitute_mem_equiv_p (rtx mem)
return false;
}
+/* Implement TARGET_INVALID_CONVERSION. */
+
+static const char *
+rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
+{
+ if (element_mode (fromtype) != element_mode (totype))
+ {
+ /* Do not allow conversions to/from PXImode and POImode types. */
+ if (TYPE_MODE (fromtype) == PXImode)
+ return N_("invalid conversion from type %<__vector_quad%>");
+ if (TYPE_MODE (totype) == PXImode)
+ return N_("invalid conversion to type %<__vector_quad%>");
+ if (TYPE_MODE (fromtype) == POImode)
+ return N_("invalid conversion from type %<__vector_pair%>");
+ if (TYPE_MODE (totype) == POImode)
+ return N_("invalid conversion to type %<__vector_pair%>");
+ }
+ else if (POINTER_TYPE_P (fromtype) && POINTER_TYPE_P (totype))
+ {
+ /* Do not allow conversions to/from PXImode and POImode pointer
+ types, except to/from void pointers. */
+ if (TYPE_MODE (TREE_TYPE (fromtype)) == PXImode
+ && TYPE_MODE (TREE_TYPE (totype)) != VOIDmode)
+ return N_("invalid conversion from type %<* __vector_quad%>");
+ if (TYPE_MODE (TREE_TYPE (totype)) == PXImode
+ && TYPE_MODE (TREE_TYPE (fromtype)) != VOIDmode)
+ return N_("invalid conversion to type %<* __vector_quad%>");
+ if (TYPE_MODE (TREE_TYPE (fromtype)) == POImode
+ && TYPE_MODE (TREE_TYPE (totype)) != VOIDmode)
+ return N_("invalid conversion from type %<* __vector_pair%>");
+ if (TYPE_MODE (TREE_TYPE (totype)) == POImode
+ && TYPE_MODE (TREE_TYPE (fromtype)) != VOIDmode)
+ return N_("invalid conversion to type %<* __vector_pair%>");
+ }
+
+ /* Conversion allowed. */
+ return NULL;
+}
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-rs6000.h"
@@ -522,6 +522,7 @@ extern int rs6000_vector_align[];
#define MASK_HTM OPTION_MASK_HTM
#define MASK_ISEL OPTION_MASK_ISEL
#define MASK_MFCRF OPTION_MASK_MFCRF
+#define MASK_MMA OPTION_MASK_MMA
#define MASK_MULHW OPTION_MASK_MULHW
#define MASK_MULTIPLE OPTION_MASK_MULTIPLE
#define MASK_NO_UPDATE OPTION_MASK_NO_UPDATE
@@ -776,7 +777,7 @@ extern unsigned rs6000_pointer_size;
#define FUNCTION_BOUNDARY 32
/* No data type wants to be aligned rounder than this. */
-#define BIGGEST_ALIGNMENT 128
+#define BIGGEST_ALIGNMENT ((TARGET_MMA) ? 512 : 128)
/* Alignment of field after `int : 0' in a structure. */
#define EMPTY_FIELD_BOUNDARY 32
@@ -1035,16 +1036,17 @@ enum data_align { align_abi, align_opt, align_both };
((MODE) == V4SFmode \
|| (MODE) == V2DFmode) \
-/* Note KFmode and possibly TFmode (i.e. IEEE 128-bit floating point) are not
- really a vector, but we want to treat it as a vector for moves, and
- such. */
+/* Modes that are not vectors, but require vector alignment. Treat these like
+ vectors in terms of loads and stores. */
+#define VECTOR_ALIGNMENT_P(MODE) \
+ (FLOAT128_VECTOR_P (MODE) || (MODE) == POImode || (MODE) == PXImode)
#define ALTIVEC_VECTOR_MODE(MODE) \
((MODE) == V16QImode \
|| (MODE) == V8HImode \
|| (MODE) == V4SFmode \
|| (MODE) == V4SImode \
- || FLOAT128_VECTOR_P (MODE))
+ || VECTOR_ALIGNMENT_P (MODE))
#define ALTIVEC_OR_VSX_VECTOR_MODE(MODE) \
(ALTIVEC_VECTOR_MODE (MODE) || VSX_VECTOR_MODE (MODE) \
@@ -2309,6 +2311,7 @@ extern int frame_pointer_needed;
#define RS6000_BTM_POWERPC64 MASK_POWERPC64 /* 64-bit registers. */
#define RS6000_BTM_FLOAT128 MASK_FLOAT128_KEYWORD /* IEEE 128-bit float. */
#define RS6000_BTM_FLOAT128_HW MASK_FLOAT128_HW /* IEEE 128-bit float h/w. */
+#define RS6000_BTM_MMA MASK_MMA /* ISA 3.1 MMA. */
#define RS6000_BTM_FUTURE MASK_FUTURE
@@ -2331,7 +2334,9 @@ extern int frame_pointer_needed;
| RS6000_BTM_LDBL128 \
| RS6000_BTM_POWERPC64 \
| RS6000_BTM_FLOAT128 \
- | RS6000_BTM_FLOAT128_HW)
+ | RS6000_BTM_FLOAT128_HW \
+ | RS6000_BTM_MMA \
+ | RS6000_BTM_FUTURE)
/* Define builtin enum index. */
@@ -2443,6 +2448,8 @@ enum rs6000_builtin_type_index
RS6000_BTI_ieee128_float, /* ieee 128-bit floating point */
RS6000_BTI_ibm128_float, /* IBM 128-bit floating point */
RS6000_BTI_const_str, /* pointer to const char * */
+ RS6000_BTI_vector_pair, /* unsigned 256-bit types (vector pair). */
+ RS6000_BTI_vector_quad, /* unsigned 512-bit types (vector quad). */
RS6000_BTI_MAX
};
@@ -2495,6 +2502,8 @@ enum rs6000_builtin_type_index
#define ieee128_float_type_node (rs6000_builtin_types[RS6000_BTI_ieee128_float])
#define ibm128_float_type_node (rs6000_builtin_types[RS6000_BTI_ibm128_float])
#define const_str_type_node (rs6000_builtin_types[RS6000_BTI_const_str])
+#define vector_pair_type_node (rs6000_builtin_types[RS6000_BTI_vector_pair])
+#define vector_quad_type_node (rs6000_builtin_types[RS6000_BTI_vector_quad])
extern GTY(()) tree rs6000_builtin_types[RS6000_BTI_MAX];
extern GTY(()) tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
@@ -322,7 +322,7 @@ (define_attr "cpu"
(const (symbol_ref "(enum attr_cpu) rs6000_tune")))
;; The ISA we implement.
-(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9v,p9kf,p9tf,fut"
+(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9v,p9kf,p9tf,fut,mma"
(const_string "any"))
;; Is this alternative enabled for the current CPU/ISA/etc.?
@@ -366,6 +366,10 @@ (define_attr "enabled" ""
(and (eq_attr "isa" "fut")
(match_test "TARGET_FUTURE"))
(const_int 1)
+
+ (and (eq_attr "isa" "mma")
+ (match_test "TARGET_MMA"))
+ (const_int 1)
] (const_int 0)))
;; If this instruction is microcoded on the CELL processor
@@ -772,7 +776,8 @@ (define_mode_attr BOOL_REGS_UNARY [(TI "r,0,0,wa,v")
;; Reload iterator for creating the function to allocate a base register to
;; supplement addressing modes.
(define_mode_iterator RELOAD [V16QI V8HI V4SI V2DI V4SF V2DF V1TI
- SF SD SI DF DD DI TI PTI KF IF TF])
+ SF SD SI DF DD DI TI PTI KF IF TF
+ POI PXI])
;; Iterate over smin, smax
(define_code_iterator fp_minmax [smin smax])
@@ -14866,6 +14871,7 @@ (define_insn "*cmpeqb_internal"
(include "vector.md")
(include "vsx.md")
(include "altivec.md")
+(include "mma.md")
(include "dfp.md")
(include "crypto.md")
(include "htm.md")
@@ -578,3 +578,7 @@ Generate (do not generate) prefixed memory instructions.
mpcrel
Target Report Mask(PCREL) Var(rs6000_isa_flags)
Generate (do not generate) pc-relative memory addressing.
+
+mmma
+Target Report Mask(MMA) Var(rs6000_isa_flags)
+Generate (do not generate) MMA instructions.
@@ -83,6 +83,7 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rs64.md \
$(srcdir)/config/rs6000/vector.md \
$(srcdir)/config/rs6000/vsx.md \
$(srcdir)/config/rs6000/altivec.md \
+ $(srcdir)/config/rs6000/mma.md \
$(srcdir)/config/rs6000/crypto.md \
$(srcdir)/config/rs6000/htm.md \
$(srcdir)/config/rs6000/dfp.md
@@ -1201,7 +1201,7 @@ See RS/6000 and PowerPC Options.
-mgnu-attribute -mno-gnu-attribute @gol
-mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{reg} @gol
-mstack-protector-guard-offset=@var{offset} -mprefixed -mno-prefixed @gol
--mpcrel -mno-pcrel}
+-mpcrel -mno-pcrel -mmma -mno-mmma}
@emph{RX Options}
@gccoptlist{-m64bit-doubles -m32bit-doubles -fpu -nofpu@gol
@@ -25940,7 +25940,8 @@ following options:
-mpowerpc-gpopt -mpowerpc-gfxopt @gol
-mmulhw -mdlmzb -mmfpgpr -mvsx @gol
-mcrypto -mhtm -mpower8-fusion -mpower8-vector @gol
--mquad-memory -mquad-memory-atomic -mfloat128 -mfloat128-hardware}
+-mquad-memory -mquad-memory-atomic -mfloat128 @gol
+-mfloat128-hardware -mprefixed -mpcrel -mmma}
The particular options set for any particular CPU varies between
compiler versions, depending on what setting seems to produce optimal
@@ -26936,6 +26937,13 @@ addressing (@option{-mprefixed}) options are enabled.
@opindex mno-prefixed
Generate (do not generate) addressing modes using prefixed load and
store instructions when the option @option{-mcpu=future} is used.
+
+@item -mmma
+@itemx -mno-mma
+@opindex mmma
+@opindex mno-mma
+Generate (do not generate) the MMA instructions when the option
+@option{-mcpu=future} is used.
@end table
@node RX Options