diff mbox series

[RFC,1/4] Hard register constraints

Message ID 20240910142121.3285492-2-stefansf@gcc.gnu.org
State New
Headers show
Series Hard Register Constraints | expand

Commit Message

Stefan Schulze Frielinghaus Sept. 10, 2024, 2:20 p.m. UTC
Implement hard register constraints of the form {regname} where regname
must be any valid register name for the target.  Such constraints may be
used in asm statements as a replacement for register asm and in machine
descriptions.

Due to optimizations it is not unexpected if two or more inputs require
the same value, then those also share a common pseudo.  However, this in
turn may lead to unsatisfiable asm where multiple inputs with different
hard register constraints share the same pseudo.  Therefore, we have to
introduce copies of such a pseudo and use these for conflicting inputs.
This is done prior RA during asmcons in match_asm_constraints_2().
While IRA tries to reduce live ranges, it also replaces some
register-register moves.  That in turn might undo those copies of a
pseudo which we just introduced during asmcons.  Thus, check in
decrease_live_ranges_number() via valid_replacement_for_asm_input_p()
whether it is valid to perform a replacement.

The reminder of the patch mostly deals with parsing and decoding hard
register constraints.  The actual work is done by LRA in
process_alt_operands() where a register filter, according to the
constraint, is installed.

For the sake of "reviewability" and in order to show the beauty of LRA,
error handling (which gets pretty involved) is spread out into a
subsequent patch.

Limitation: Currently, a fixed register cannot be used as hard register
constraint.  For example, accessing the stack pointer on x86_64 via

void *
foo (void)
{
  void *y;
  __asm__ ("" : "={rsp}" (y));
  return y;
}

leads to an error.  This is unfortunate since register asm does not have
this limitation.  The culprit seems to be that during reload
ira_class_hard_regs_num[rclass] does not even include fixed registers
which is why lra_assign() ultimately fails.  Does anyone have an idea
how to lift this limitation?  Maybe there is even a shortcut in order to
force a pseudo into a hard reg?
---
 gcc/function.cc                       | 116 ++++++++++++++++++++++++++
 gcc/genoutput.cc                      |  14 ++++
 gcc/genpreds.cc                       |   4 +-
 gcc/ira.cc                            |  79 +++++++++++++++++-
 gcc/lra-constraints.cc                |  13 +++
 gcc/recog.cc                          |  11 ++-
 gcc/stmt.cc                           |  39 +++++++++
 gcc/stmt.h                            |   1 +
 gcc/testsuite/gcc.dg/asm-hard-reg-1.c |  85 +++++++++++++++++++
 gcc/testsuite/gcc.dg/asm-hard-reg-2.c |  33 ++++++++
 gcc/testsuite/gcc.dg/asm-hard-reg-3.c |  25 ++++++
 gcc/testsuite/gcc.dg/asm-hard-reg-4.c |  50 +++++++++++
 gcc/testsuite/gcc.dg/asm-hard-reg-5.c |  36 ++++++++
 gcc/testsuite/gcc.dg/asm-hard-reg-6.c |  60 +++++++++++++
 gcc/testsuite/gcc.dg/asm-hard-reg-7.c |  41 +++++++++
 gcc/testsuite/gcc.dg/asm-hard-reg-8.c |  49 +++++++++++
 16 files changed, 653 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-1.c
 create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-2.c
 create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-3.c
 create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-4.c
 create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-5.c
 create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-6.c
 create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-7.c
 create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-8.c
diff mbox series

Patch

diff --git a/gcc/function.cc b/gcc/function.cc
index a6f6de34942..bf5992f2b06 100644
--- a/gcc/function.cc
+++ b/gcc/function.cc
@@ -6974,6 +6974,115 @@  match_asm_constraints_1 (rtx_insn *insn, rtx *p_sets, int noutputs)
     df_insn_rescan (insn);
 }
 
+/* It is expected and desired that optimizations coalesce multiple pseudos into
+   one whenever possible.  However, in case of hard register constraints we may
+   have to undo this and introduce copies since otherwise we could constraint a
+   single pseudo to different hard registers.  For example, during register
+   allocation the following insn would be unsatisfiable since pseudo 60 is
+   constrained to hard register r5 and r6 at the same time.
+
+   (insn 7 5 0 2 (asm_operands/v ("foo") ("") 0 [
+	       (reg:DI 60) repeated x2
+	   ]
+	    [
+	       (asm_input:DI ("{r5}") t.c:4)
+	       (asm_input:DI ("{r6}") t.c:4)
+	   ]
+	    [] t.c:4) "t.c":4:3 -1
+	(expr_list:REG_DEAD (reg:DI 60)
+	   (nil)))
+
+   Therefore, introduce a copy of pseudo 60 and transform it into
+
+   (insn 10 5 7 2 (set (reg:DI 62)
+	   (reg:DI 60)) "t.c":4:3 1503 {*movdi_64}
+	(nil))
+   (insn 7 10 11 2 (asm_operands/v ("foo") ("") 0 [
+	       (reg:DI 60)
+	       (reg:DI 62)
+	   ]
+	    [
+	       (asm_input:DI ("{r5}") t.c:4)
+	       (asm_input:DI ("{r6}") t.c:4)
+	   ]
+	    [] t.c:4) "t.c":4:3 -1
+	(expr_list:REG_DEAD (reg:DI 62)
+	   (expr_list:REG_DEAD (reg:DI 60)
+	       (nil))))
+
+   Now, LRA can assign pseudo 60 to r5, and pseudo 62 to r6.
+
+   TODO: The current implementation is conservative and we could do a bit
+   better in case of alternatives.  For example
+
+   (insn 7 5 0 2 (asm_operands/v ("foo") ("") 0 [
+	       (reg:DI 60) repeated x2
+	   ]
+	    [
+	       (asm_input:DI ("r,{r5}") t.c:4)
+	       (asm_input:DI ("{r6},r") t.c:4)
+	   ]
+	    [] t.c:4) "t.c":4:3 -1
+	(expr_list:REG_DEAD (reg:DI 60)
+	   (nil)))
+
+   For this insn we wouldn't need to come up with a copy of pseudo 60 since in
+   each alternative pseudo 60 is constrained exactly one time.  */
+
+static void
+match_asm_constraints_2 (rtx_insn *insn, rtx pat)
+{
+  rtx op;
+  if (GET_CODE (pat) == SET && GET_CODE (SET_SRC (pat)) == ASM_OPERANDS)
+    op = SET_SRC (pat);
+  else if (GET_CODE (pat) == ASM_OPERANDS)
+    op = pat;
+  else
+    return;
+  int ninputs = ASM_OPERANDS_INPUT_LENGTH (op);
+  rtvec inputs = ASM_OPERANDS_INPUT_VEC (op);
+  bool changed = false;
+  auto_bitmap constrained_regs;
+
+  for (int i = 0; i < ninputs; ++i)
+    {
+      rtx input = RTVEC_ELT (inputs, i);
+      const char *constraint = ASM_OPERANDS_INPUT_CONSTRAINT (op, i);
+      if ((!REG_P (input) && !SUBREG_P (input))
+	  || (REG_P (input) && HARD_REGISTER_P (input))
+	  || strchr (constraint, '{') == nullptr)
+	continue;
+      int regno;
+      if (SUBREG_P (input))
+	{
+	  if (REG_P (SUBREG_REG (input)))
+	    regno = REGNO (SUBREG_REG (input));
+	  else
+	    continue;
+	}
+      else
+	regno = REGNO (input);
+      /* Keep the first usage of a constrained pseudo as is and only
+	 introduce copies for subsequent usages.  */
+      if (! bitmap_bit_p (constrained_regs, regno))
+	{
+	  bitmap_set_bit (constrained_regs, regno);
+	  continue;
+	}
+      rtx tmp = gen_reg_rtx (GET_MODE (input));
+      start_sequence ();
+      emit_move_insn (tmp, input);
+      rtx_insn *insns = get_insns ();
+      end_sequence ();
+      emit_insn_before (insns, insn);
+      RTVEC_ELT (inputs, i) = tmp;
+      changed = true;
+    }
+
+  if (changed)
+    df_insn_rescan (insn);
+}
+
 /* Add the decl D to the local_decls list of FUN.  */
 
 void
@@ -7030,6 +7139,13 @@  pass_match_asm_constraints::execute (function *fun)
 	    continue;
 
 	  pat = PATTERN (insn);
+
+	  if (GET_CODE (pat) == PARALLEL)
+	    for (int i = XVECLEN (pat, 0) - 1; i >= 0; --i)
+	      match_asm_constraints_2 (insn, XVECEXP (pat, 0, i));
+	  else
+	    match_asm_constraints_2 (insn, pat);
+
 	  if (GET_CODE (pat) == PARALLEL)
 	    p_sets = &XVECEXP (pat, 0, 0), noutputs = XVECLEN (pat, 0);
 	  else if (GET_CODE (pat) == SET)
diff --git a/gcc/genoutput.cc b/gcc/genoutput.cc
index 16fd811b5dd..2ffb2fb28d2 100644
--- a/gcc/genoutput.cc
+++ b/gcc/genoutput.cc
@@ -1284,6 +1284,20 @@  mdep_constraint_len (const char *s, file_location loc, int opno)
       if (!strncmp (s, p->name, p->namelen))
 	return p->namelen;
 
+  if (*s == '{')
+    {
+      const char *end = s + 1;
+      while (*end != '}' && *end != '"' && *end != '\0')
+	++end;
+      /* Similarly as in decode_hreg_constraint(), consider any hard register
+	 name longer than a few characters as an error.  */
+      ptrdiff_t len = end - s;
+      if (*end == '}' && len > 1 && len < 31)
+	{
+	  return len + 1;
+	}
+    }
+
   error_at (loc, "error: undefined machine-specific constraint "
 	    "at this point: \"%s\"", s);
   message_at (loc, "note:  in operand %d", opno);
diff --git a/gcc/genpreds.cc b/gcc/genpreds.cc
index 55d149e8a40..0777cb7a4db 100644
--- a/gcc/genpreds.cc
+++ b/gcc/genpreds.cc
@@ -1148,7 +1148,7 @@  write_insn_constraint_len (void)
   unsigned int i;
 
   puts ("static inline size_t\n"
-	"insn_constraint_len (char fc, const char *str ATTRIBUTE_UNUSED)\n"
+	"insn_constraint_len (char fc, const char *str)\n"
 	"{\n"
 	"  switch (fc)\n"
 	"    {");
@@ -1181,6 +1181,8 @@  write_insn_constraint_len (void)
 
   puts ("    default: break;\n"
 	"    }\n"
+	"  if (str[0] == '{')\n"
+	"      return ((const char *) rawmemchr (str + 1, '}') - str) + 1;\n"
 	"  return 1;\n"
 	"}\n");
 }
diff --git a/gcc/ira.cc b/gcc/ira.cc
index 156541df4e6..d17d70fd277 100644
--- a/gcc/ira.cc
+++ b/gcc/ira.cc
@@ -2128,6 +2128,82 @@  ira_get_dup_out_num (int op_num, alternative_mask alts,
 
 
 
+/* Return true if a replacement of SRC by DEST does not lead to unsatisfiable
+   asm.  Thus, a replacement is valid if and only if SRC and DEST are not
+   constrained in asm inputs of a single asm statement.  See
+   match_asm_constraints_2() for more details.  TODO: As in
+   match_asm_constraints_2() consider alternatives more precisely.  */
+
+static bool
+valid_replacement_for_asm_input_p_1 (const_rtx asmops, const_rtx src, const_rtx dest)
+{
+  int ninputs = ASM_OPERANDS_INPUT_LENGTH (asmops);
+  rtvec inputs = ASM_OPERANDS_INPUT_VEC (asmops);
+  for (int i = 0; i < ninputs; ++i)
+    {
+      rtx input_src = RTVEC_ELT (inputs, i);
+      const char *constraint_src
+	= ASM_OPERANDS_INPUT_CONSTRAINT (asmops, i);
+      if (rtx_equal_p (input_src, src)
+	  && strchr (constraint_src, '{') != nullptr)
+	for (int j = 0; j < ninputs; ++j)
+	  {
+	    rtx input_dest = RTVEC_ELT (inputs, j);
+	    const char *constraint_dest
+	      = ASM_OPERANDS_INPUT_CONSTRAINT (asmops, j);
+	    if (rtx_equal_p (input_dest, dest)
+		&& strchr (constraint_dest, '{') != nullptr)
+	      return false;
+	  }
+    }
+  return true;
+}
+
+static bool
+valid_replacement_for_asm_input_p (const_rtx src, const_rtx dest)
+{
+  /* Bail out early if there is no asm statement.  */
+  if (!crtl->has_asm_statement)
+    return true;
+  for (df_ref use = DF_REG_USE_CHAIN (REGNO (src));
+       use;
+       use = DF_REF_NEXT_REG (use))
+    {
+      struct df_insn_info *use_info = DF_REF_INSN_INFO (use);
+      /* Only check real uses, not artificial ones.  */
+      if (use_info)
+	{
+	  rtx_insn *insn = DF_REF_INSN (use);
+	  rtx pat = PATTERN (insn);
+	  if (asm_noperands (pat) <= 0)
+	    continue;
+	  if (GET_CODE (pat) == SET)
+	    {
+	      if (!valid_replacement_for_asm_input_p_1 (SET_SRC (pat), src, dest))
+		return false;
+	    }
+	  else if (GET_CODE (pat) == PARALLEL)
+	    for (int i = 0, len = XVECLEN (pat, 0); i < len; ++i)
+	      {
+		rtx asmops = XVECEXP (pat, 0, i);
+		if (GET_CODE (asmops) == SET)
+		  asmops = SET_SRC (asmops);
+		if (GET_CODE (asmops) == ASM_OPERANDS
+		    && !valid_replacement_for_asm_input_p_1 (asmops, src, dest))
+		  return false;
+	      }
+	  else if (GET_CODE (pat) == ASM_OPERANDS)
+	    {
+	      if (!valid_replacement_for_asm_input_p_1 (pat, src, dest))
+		return false;
+	    }
+	  else
+	    gcc_unreachable ();
+	}
+    }
+  return true;
+}
+
 /* Search forward to see if the source register of a copy insn dies
    before either it or the destination register is modified, but don't
    scan past the end of the basic block.  If so, we can replace the
@@ -2177,7 +2253,8 @@  decrease_live_ranges_number (void)
 	       auto-inc memory reference, so we must disallow this
 	       optimization on them.  */
 	    || sregno == STACK_POINTER_REGNUM
-	    || dregno == STACK_POINTER_REGNUM)
+	    || dregno == STACK_POINTER_REGNUM
+	    || !valid_replacement_for_asm_input_p (src, dest))
 	  continue;
 	
 	dest_death = NULL_RTX;
diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index fdcc07764a2..5d95072b8f8 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -114,6 +114,7 @@ 
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stmt.h"
 #include "predict.h"
 #include "df.h"
 #include "memmodel.h"
@@ -2165,6 +2166,7 @@  process_alt_operands (int only_alternative)
   bool costly_p;
   enum reg_class cl;
   const HARD_REG_SET *cl_filter;
+  HARD_REG_SET hard_reg_constraint;
 
   /* Calculate some data common for all alternatives to speed up the
      function.	*/
@@ -2536,6 +2538,17 @@  process_alt_operands (int only_alternative)
 		  cl_filter = nullptr;
 		  goto reg;
 
+		case '{':
+		    {
+		      int regno = decode_hard_reg_constraint (p);
+		      gcc_assert (regno >= 0);
+		      cl = REGNO_REG_CLASS (regno);
+		      CLEAR_HARD_REG_SET (hard_reg_constraint);
+		      SET_HARD_REG_BIT (hard_reg_constraint, regno);
+		      cl_filter = &hard_reg_constraint;
+		      goto reg;
+		    }
+
 		default:
 		  cn = lookup_constraint (p);
 		  switch (get_constraint_type (cn))
diff --git a/gcc/recog.cc b/gcc/recog.cc
index 615aaabc551..c95ac0bdfa0 100644
--- a/gcc/recog.cc
+++ b/gcc/recog.cc
@@ -25,6 +25,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stmt.h"
 #include "cfghooks.h"
 #include "df.h"
 #include "memmodel.h"
@@ -2367,7 +2368,8 @@  asm_operand_ok (rtx op, const char *constraint, const char **constraints)
 	    {
 	    case CT_REGISTER:
 	      if (!result
-		  && reg_class_for_constraint (cn) != NO_REGS
+		  && (reg_class_for_constraint (cn) != NO_REGS
+		      || constraint[0] == '{')
 		  && GET_MODE (op) != BLKmode
 		  && register_operand (op, VOIDmode))
 		result = 1;
@@ -3301,6 +3303,13 @@  constrain_operands (int strict, alternative_mask alternatives)
 		  win = true;
 		break;
 
+	      case '{':
+		if ((REG_P (op) && HARD_REGISTER_P (op)
+		     && (int) REGNO (op) == decode_hard_reg_constraint (p))
+		    || !reload_completed)
+		  win = true;
+		break;
+
 	      default:
 		{
 		  enum constraint_num cn = lookup_constraint (p);
diff --git a/gcc/stmt.cc b/gcc/stmt.cc
index ae1527f0a19..915969ee116 100644
--- a/gcc/stmt.cc
+++ b/gcc/stmt.cc
@@ -52,6 +52,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "tree-cfg.h"
 #include "dumpfile.h"
 #include "builtins.h"
+#include "output.h"
 
 
 /* Functions and data structures for expanding case statements.  */
@@ -174,6 +175,32 @@  expand_label (tree label)
     maybe_set_first_label_num (label_r);
 }
 
+/* Parse a hard register constraint and return its number or -1 in case of an
+   error.  BEGIN should point to a string of the form `{regname}`.  For the
+   sake of simplicity assume that a register name is not longer than 31
+   characters, if not error out.  */
+
+int
+decode_hard_reg_constraint (const char *begin)
+{
+  if (*begin != '{')
+    return -1;
+  ++begin;
+  const char *end = begin;
+  while (*end != '}' && *end != '\0')
+    ++end;
+  if (*end != '}' || end == begin)
+    return -1;
+  ptrdiff_t len = end - begin;
+  if (len >= 31)
+    return -1;
+  char regname[32];
+  memcpy (regname, begin, len);
+  regname[len] = '\0';
+  int regno = decode_reg_name (regname);
+  return regno;
+}
+
 /* Parse the output constraint pointed to by *CONSTRAINT_P.  It is the
    OPERAND_NUMth output operand, indexed from zero.  There are NINPUTS
    inputs and NOUTPUTS outputs to this extended-asm.  Upon return,
@@ -289,6 +316,12 @@  parse_output_constraint (const char **constraint_p, int operand_num,
 	  *allows_mem = true;
 	  break;
 
+	case '{':
+	  {
+	    *allows_reg = true;
+	    break;
+	  }
+
 	default:
 	  if (!ISALPHA (*p))
 	    break;
@@ -408,6 +441,12 @@  parse_input_constraint (const char **constraint_p, int input_num,
 	*allows_mem = true;
 	break;
 
+      case '{':
+	{
+	  *allows_reg = true;
+	  break;
+	}
+
       default:
 	if (! ISALPHA (constraint[j]))
 	  {
diff --git a/gcc/stmt.h b/gcc/stmt.h
index a2caae7121b..7d79d682645 100644
--- a/gcc/stmt.h
+++ b/gcc/stmt.h
@@ -25,6 +25,7 @@  extern bool parse_output_constraint (const char **, int, int, int,
 				     bool *, bool *, bool *);
 extern bool parse_input_constraint (const char **, int, int, int, int,
 				    const char * const *, bool *, bool *);
+extern int decode_hard_reg_constraint (const char *);
 extern tree resolve_asm_operand_names (tree, tree, tree, tree);
 #ifdef HARD_CONST
 /* Silly ifdef to avoid having all includers depend on hard-reg-set.h.  */
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-1.c b/gcc/testsuite/gcc.dg/asm-hard-reg-1.c
new file mode 100644
index 00000000000..6a5a9ada45f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-1.c
@@ -0,0 +1,85 @@ 
+/* { dg-do compile { target aarch64*-*-* arm*-*-* i?86-*-* powerpc*-*-* riscv*-*-* s390*-*-* x86_64-*-* } } */
+
+#if defined (__aarch64__)
+# define GPR "{x4}"
+/* { dg-final { scan-assembler-times "foo\tx4" 8 { target { aarch64*-*-* } } } } */
+#elif defined (__arm__)
+# define GPR "{r4}"
+/* { dg-final { scan-assembler-times "foo\tr4" 8 { target { arm*-*-* } } } } */
+#elif defined (__i386__)
+# define GPR "{ecx}"
+/* { dg-final { scan-assembler-times "foo\t%cl" 2 { target { i?86-*-* } } } } */
+/* { dg-final { scan-assembler-times "foo\t%cx" 2 { target { i?86-*-* } } } } */
+/* { dg-final { scan-assembler-times "foo\t%ecx" 4 { target { i?86-*-* } } } } */
+#elif defined (__powerpc__) || defined (__POWERPC__)
+# define GPR "{r5}"
+/* { dg-final { scan-assembler-times "foo\t5" 8 { target { powerpc*-*-* } } } } */
+#elif defined (__riscv)
+# define GPR "{t5}"
+/* { dg-final { scan-assembler-times "foo\tt5" 8 { target { riscv*-*-* } } } } */
+#elif defined (__s390__)
+# define GPR "{r4}"
+/* { dg-final { scan-assembler-times "foo\t%r4" 8 { target { s390*-*-* } } } } */
+#elif defined (__x86_64__)
+# define GPR "{rcx}"
+/* { dg-final { scan-assembler-times "foo\t%cl" 2 { target { x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "foo\t%cx" 2 { target { x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "foo\t%ecx" 2 { target { x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "foo\t%rcx" 2 { target { x86_64-*-* } } } } */
+#endif
+
+char
+test_char (char x)
+{
+  __asm__ ("foo\t%0" : "+"GPR (x));
+  return x;
+}
+
+char
+test_char_from_mem (char *x)
+{
+  __asm__ ("foo\t%0" : "+"GPR (*x));
+  return *x;
+}
+
+short
+test_short (short x)
+{
+  __asm__ ("foo\t%0" : "+"GPR (x));
+  return x;
+}
+
+short
+test_short_from_mem (short *x)
+{
+  __asm__ ("foo\t%0" : "+"GPR (*x));
+  return *x;
+}
+
+int
+test_int (int x)
+{
+  __asm__ ("foo\t%0" : "+"GPR (x));
+  return x;
+}
+
+int
+test_int_from_mem (int *x)
+{
+  __asm__ ("foo\t%0" : "+"GPR (*x));
+  return *x;
+}
+
+long
+test_long (long x)
+{
+  __asm__ ("foo\t%0" : "+"GPR (x));
+  return x;
+}
+
+long
+test_long_from_mem (long *x)
+{
+  __asm__ ("foo\t%0" : "+"GPR (*x));
+  return *x;
+}
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-2.c b/gcc/testsuite/gcc.dg/asm-hard-reg-2.c
new file mode 100644
index 00000000000..7dabf9657cb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-2.c
@@ -0,0 +1,33 @@ 
+/* { dg-do compile { target aarch64*-*-* powerpc64*-*-* riscv64-*-* s390*-*-* x86_64-*-* } } */
+/* { dg-options "-std=c99" } we need long long */
+
+#if defined (__aarch64__)
+# define GPR "{x4}"
+/* { dg-final { scan-assembler-times "foo\tx4" 2 { target { aarch64*-*-* } } } } */
+#elif defined (__powerpc__) || defined (__POWERPC__)
+# define GPR "{r5}"
+/* { dg-final { scan-assembler-times "foo\t5" 2 { target { powerpc64*-*-* } } } } */
+#elif defined (__riscv)
+# define GPR "{t5}"
+/* { dg-final { scan-assembler-times "foo\tt5" 2 { target { riscv64-*-* } } } } */
+#elif defined (__s390__)
+# define GPR "{r4}"
+/* { dg-final { scan-assembler-times "foo\t%r4" 2 { target { s390*-*-* } } } } */
+#elif defined (__x86_64__)
+# define GPR "{rcx}"
+/* { dg-final { scan-assembler-times "foo\t%rcx" 2 { target { x86_64-*-* } } } } */
+#endif
+
+long long
+test_longlong (long long x)
+{
+  __asm__ ("foo\t%0" : "+"GPR (x));
+  return x;
+}
+
+long long
+test_longlong_from_mem (long long *x)
+{
+  __asm__ ("foo\t%0" : "+"GPR (*x));
+  return *x;
+}
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-3.c b/gcc/testsuite/gcc.dg/asm-hard-reg-3.c
new file mode 100644
index 00000000000..fa4472ae8a8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-3.c
@@ -0,0 +1,25 @@ 
+/* { dg-do compile { target { { aarch64*-*-* powerpc64*-*-* riscv64-*-* s390*-*-* x86_64-*-* } && int128 } } } */
+/* { dg-options "-O2" } get rid of -ansi since we use __int128 */
+
+#if defined (__aarch64__)
+# define REG "{x4}"
+/* { dg-final { scan-assembler-times "foo\tx4" 1 { target { aarch64*-*-* } } } } */
+#elif defined (__powerpc__) || defined (__POWERPC__)
+# define REG "{r5}"
+/* { dg-final { scan-assembler-times "foo\t5" 1 { target { powerpc*-*-* } } } } */
+#elif defined (__riscv)
+# define REG "{t5}"
+/* { dg-final { scan-assembler-times "foo\tt5" 1 { target { riscv*-*-* } } } } */
+#elif defined (__s390__)
+# define REG "{r4}"
+/* { dg-final { scan-assembler-times "foo\t%r4" 1 { target { s390*-*-* } } } } */
+#elif defined (__x86_64__)
+# define REG "{xmm0}"
+/* { dg-final { scan-assembler-times "foo\t%xmm0" 1 { target { x86_64-*-* } } } } */
+#endif
+
+void
+test (void)
+{
+  __asm__ ("foo\t%0" :: REG ((__int128) 42));
+}
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-4.c b/gcc/testsuite/gcc.dg/asm-hard-reg-4.c
new file mode 100644
index 00000000000..0816df8f719
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-4.c
@@ -0,0 +1,50 @@ 
+/* { dg-do compile { target aarch64*-*-* arm*-*-* powerpc*-*-* riscv*-*-* s390*-*-* x86_64-*-* } } */
+
+#if defined (__aarch64__)
+# define FPR "{d5}"
+/* { dg-final { scan-assembler-times "foo\tv5" 4 { target { aarch64*-*-* } } } } */
+#elif defined (__arm__)
+# define FPR "{d5}"
+/* { dg-additional-options "-march=armv7-a+fp -mfloat-abi=hard" { target arm*-*-* } } */
+/* { dg-final { scan-assembler-times "foo\ts10" 4 { target { arm*-*-* } } } } */
+#elif defined (__powerpc__) || defined (__POWERPC__)
+# define FPR "{5}"
+/* { dg-final { scan-assembler-times "foo\t5" 4 { target { powerpc*-*-* } } } } */
+#elif defined (__riscv)
+# define FPR "{f5}"
+/* { dg-final { scan-assembler-times "foo\tf5" 4 { target { rsicv*-*-* } } } } */
+#elif defined (__s390__)
+# define FPR "{f5}"
+/* { dg-final { scan-assembler-times "foo\t%f5" 4 { target { s390*-*-* } } } } */
+#elif defined (__x86_64__)
+# define FPR "{xmm5}"
+/* { dg-final { scan-assembler-times "foo\t%xmm5" 4 { target { x86_64-*-* } } } } */
+#endif
+
+float
+test_float (float x)
+{
+  __asm__ ("foo\t%0" : "+"FPR (x));
+  return x;
+}
+
+float
+test_float_from_mem (float *x)
+{
+  __asm__ ("foo\t%0" : "+"FPR (*x));
+  return *x;
+}
+
+double
+test_double (double x)
+{
+  __asm__ ("foo\t%0" : "+"FPR (x));
+  return x;
+}
+
+double
+test_double_from_mem (double *x)
+{
+  __asm__ ("foo\t%0" : "+"FPR (*x));
+  return *x;
+}
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-5.c b/gcc/testsuite/gcc.dg/asm-hard-reg-5.c
new file mode 100644
index 00000000000..a9e25ce1746
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-5.c
@@ -0,0 +1,36 @@ 
+/* { dg-do compile { target aarch64*-*-* powerpc64*-*-* riscv64-*-* s390*-*-* x86_64-*-* } } */
+
+typedef int V __attribute__ ((vector_size (4 * sizeof (int))));
+
+#if defined (__aarch64__)
+# define VR "{v20}"
+/* { dg-final { scan-assembler-times "foo\tv20" 2 { target { aarch64*-*-* } } } } */
+#elif defined (__powerpc__) || defined (__POWERPC__)
+# define VR "{v5}"
+/* { dg-final { scan-assembler-times "foo\t5" 2 { target { powerpc64*-*-* } } } } */
+#elif defined (__riscv)
+# define VR "{v5}"
+/* { dg-additional-options "-march=rv64imv" { target riscv64-*-* } } */
+/* { dg-final { scan-assembler-times "foo\tv5" 2 { target { riscv*-*-* } } } } */
+#elif defined (__s390__)
+# define VR "{v5}"
+/* { dg-require-effective-target s390_mvx { target s390*-*-* } } */
+/* { dg-final { scan-assembler-times "foo\t%v5" 2 { target s390*-*-* } } } */
+#elif defined (__x86_64__)
+# define VR "{xmm9}"
+/* { dg-final { scan-assembler-times "foo\t%xmm9" 2 { target { x86_64-*-* } } } } */
+#endif
+
+V
+test (V x)
+{
+  __asm__ ("foo\t%0" : "+"VR (x));
+  return x;
+}
+
+V
+test_from_mem (V *x)
+{
+  __asm__ ("foo\t%0" : "+"VR (*x));
+  return *x;
+}
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-6.c b/gcc/testsuite/gcc.dg/asm-hard-reg-6.c
new file mode 100644
index 00000000000..d9b7fae8097
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-6.c
@@ -0,0 +1,60 @@ 
+/* { dg-do compile { target aarch64*-*-* arm*-*-* i?86-*-* powerpc*-*-* riscv*-*-* s390*-*-* x86_64-*-* } } */
+/* { dg-options "-O2" } */
+
+/* Test multiple alternatives.  */
+
+#if defined (__aarch64__)
+# define GPR1 "{x1}"
+# define GPR2 "{x2}"
+# define GPR3 "{x3}"
+/* { dg-final { scan-assembler-times "foo\tx1,x3" 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times "bar\tx2,\\\[x1\\\]" 1 { target { aarch64*-*-* } } } } */
+#elif defined (__arm__)
+# define GPR1 "{r1}"
+# define GPR2 "{r2}"
+# define GPR3 "{r3}"
+/* { dg-final { scan-assembler-times "foo\tr1,r3" 1 { target { arm*-*-* } } } } */
+/* { dg-final { scan-assembler-times "bar\tr2,\\\[r1\\\]" 1 { target { arm*-*-* } } } } */
+#elif defined (__i386__)
+# define GPR1 "{eax}"
+# define GPR2 "{ebx}"
+# define GPR3 "{ecx}"
+/* { dg-final { scan-assembler-times "foo\t4\\(%esp\\),%ecx" 1 { target { i?86-*-* } } } } */
+/* { dg-final { scan-assembler-times "bar\t%ebx,\\(%eax\\)" 1 { target { i?86-*-* } } } } */
+#elif defined (__powerpc__) || defined (__POWERPC__)
+# define GPR1 "{r4}"
+# define GPR2 "{r5}"
+# define GPR3 "{r6}"
+/* { dg-final { scan-assembler-times "foo\t4,6" 1 { target { powerpc*-*-* } } } } */
+/* { dg-final { scan-assembler-times "bar\t5,0\\(4\\)" 1 { target { powerpc*-*-* } } } } */
+#elif defined (__riscv)
+# define GPR1 "{t1}"
+# define GPR2 "{t2}"
+# define GPR3 "{t3}"
+/* { dg-final { scan-assembler-times "foo\tt1,t3" 1 { target { riscv*-*-* } } } } */
+/* { dg-final { scan-assembler-times "bar\tt2,0\\(a1\\)" 1 { target { riscv*-*-* } } } } */
+#elif defined (__s390__)
+# define GPR1 "{r0}"
+# define GPR2 "{r1}"
+# define GPR3 "{r2}"
+/* { dg-final { scan-assembler-times "foo\t%r0,%r2" 1 { target { s390*-*-* } } } } */
+/* { dg-final { scan-assembler-times "bar\t%r1,0\\(%r3\\)" 1 { target { s390*-*-* } } } } */
+#elif defined (__x86_64__)
+# define GPR1 "{eax}"
+# define GPR2 "{ebx}"
+# define GPR3 "{rcx}"
+/* { dg-final { scan-assembler-times "foo\t%eax,%rcx" 1 { target { x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "bar\t%ebx,\\(%rsi\\)" 1 { target { x86_64-*-* } } } } */
+#endif
+
+void
+test_reg_reg (int x, long long *y)
+{
+  __asm__ ("foo\t%0,%1" :: GPR1"m,"GPR2 (x), GPR3",m" (y));
+}
+
+void
+test_reg_mem (int x, long long *y)
+{
+  __asm__ ("bar\t%0,%1" :: GPR1"m,"GPR2 (x), GPR3",m" (*y));
+}
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-7.c b/gcc/testsuite/gcc.dg/asm-hard-reg-7.c
new file mode 100644
index 00000000000..761a6b77d34
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-7.c
@@ -0,0 +1,41 @@ 
+/* { dg-do compile { target aarch64*-*-* arm*-*-* i?86-*-* powerpc*-*-* riscv*-*-* s390*-*-* x86_64-*-* } } */
+/* { dg-options "-O2" } */
+
+/* Test multiple alternatives.  */
+
+#if defined (__aarch64__)
+# define GPR "{x1}"
+/* { dg-final { scan-assembler-times "foo\tx1,x1" 2 { target { aarch64*-*-* } } } } */
+#elif defined (__arm__)
+# define GPR "{r1}"
+/* { dg-final { scan-assembler-times "foo\tr1,r1" 2 { target { arm*-*-* } } } } */
+#elif defined (__i386__)
+# define GPR "{eax}"
+/* { dg-final { scan-assembler-times "foo\t%eax,%eax" 2 { target { i?86-*-* } } } } */
+#elif defined (__powerpc__) || defined (__POWERPC__)
+# define GPR "{r4}"
+/* { dg-final { scan-assembler-times "foo\t4,4" 2 { target { powerpc*-*-* } } } } */
+#elif defined (__riscv)
+# define GPR "{t1}"
+/* { dg-final { scan-assembler-times "foo\tt1,t1" 2 { target { riscv*-*-* } } } } */
+#elif defined (__s390__)
+# define GPR "{r0}"
+/* { dg-final { scan-assembler-times "foo\t%r0,%r0" 2 { target { s390*-*-* } } } } */
+#elif defined (__x86_64__)
+# define GPR "{eax}"
+/* { dg-final { scan-assembler-times "foo\t%eax,%eax" 2 { target { x86_64-*-* } } } } */
+#endif
+
+int
+test_1 (int x)
+{
+  __asm__ ("foo\t%0,%0" : "+"GPR (x));
+  return x;
+}
+
+int
+test_2 (int x, int y)
+{
+  __asm__ ("foo\t%0,%1" : "="GPR (x) : GPR (y));
+  return x;
+}
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-8.c b/gcc/testsuite/gcc.dg/asm-hard-reg-8.c
new file mode 100644
index 00000000000..cda5e3e4c3f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-8.c
@@ -0,0 +1,49 @@ 
+/* { dg-do compile { target aarch64*-*-* arm*-*-* i?86-*-* powerpc*-*-* riscv*-*-* s390*-*-* x86_64-*-* } } */
+
+/* Due to hard register constraints, X must be copied.  */
+
+#if defined (__aarch64__)
+# define GPR1 "{x1}"
+# define GPR2 "{x2}"
+#elif defined (__arm__)
+# define GPR1 "{r1}"
+# define GPR2 "{r2}"
+#elif defined (__i386__)
+# define GPR1 "{eax}"
+# define GPR2 "{ebx}"
+#elif defined (__powerpc__) || defined (__POWERPC__)
+# define GPR1 "{r4}"
+# define GPR2 "{r5}"
+#elif defined (__riscv)
+# define GPR1 "{t1}"
+# define GPR2 "{t2}"
+#elif defined (__s390__)
+# define GPR1 "{r0}"
+# define GPR2 "{r1}"
+#elif defined (__x86_64__)
+# define GPR1 "{eax}"
+# define GPR2 "{ebx}"
+#endif
+
+#define TEST(T) \
+int \
+test_##T (T x) \
+{ \
+  int out; \
+  __asm__ ("foo" : "=r" (out) : GPR1 (x), GPR2 (x)); \
+  return out; \
+}
+
+TEST(char)
+TEST(short)
+TEST(int)
+TEST(long)
+
+int
+test_subreg (long x)
+{
+  int out;
+  short subreg_x = x;
+  __asm__ ("foo" : "=r" (out) : GPR1 (x), GPR2 (subreg_x));
+  return out;
+}