diff mbox series

[v2] Hard register constraints

Message ID 20240805102857.2829431-2-stefansf@gcc.gnu.org
State New
Headers show
Series [v2] Hard register constraints | expand

Commit Message

Stefan Schulze Frielinghaus Aug. 5, 2024, 10:28 a.m. UTC
This is a follow-up of
https://gcc.gnu.org/pipermail/gcc-patches/2024-June/654013.html

What has changed?

- Rebased and fixed an issue in constrain_operands which manifested
after late-combine.

- Introduced new test cases for Arm, Intel, POWER, RISCV, S/390 for 32-
and 64-bit where appropriate (including register pairs etc.).  Test
gcc.dg/asm-hard-reg-7.c is a bit controversial since I'm testing for an
anti feature here, i.e., I'm testing for register asm in conjunction
with calls.  I'm fine with removing it in the end but I wanted to keep
it in for demonstration purposes at least during discussion of this
patch.

- Split test pr87600-2.c into pr87600-2.c and pr87600-3.c since test0
errors out early, now.  Otherwise, the remaining errors would not be
reported.  Beside that the error message has slightly changed.

- Modified genoutput.cc in order to allow hard register constraints in
machine descriptions.  For example, on s390 the instruction mvcrl makes
use of the implicit register r0 which we currently deal with as follows:

(define_insn "*mvcrl"
  [(set (match_operand:BLK 0 "memory_operand" "=Q")
       (unspec:BLK [(match_operand:BLK 1 "memory_operand" "Q")
                    (reg:SI GPR0_REGNUM)]
                   UNSPEC_MVCRL))]
  "TARGET_Z15"
  "mvcrl\t%0,%1"
  [(set_attr "op_type" "SSE")])

(define_expand "mvcrl"
  [(set (reg:SI GPR0_REGNUM) (match_operand:SI 2 "general_operand"))
   (set (match_operand:BLK 0 "memory_operand" "=Q")
       (unspec:BLK [(match_operand:BLK 1 "memory_operand" "Q")
                    (reg:SI GPR0_REGNUM)]
                   UNSPEC_MVCRL))]
  "TARGET_Z15"
  "")

In the expander we ensure that GPR0 is setup correctly.  With this patch
we could simply write

(define_insn "mvcrl"
  [(set (match_operand:BLK 0 "memory_operand" "=Q")
        (unspec:BLK [(match_operand:BLK 1 "memory_operand" "Q")
                     (match_operand:SI 2 "general_operand" "{r0}")]
                    UNSPEC_MVCRL))]
  "TARGET_Z15"
  "mvcrl\t%0,%1"
  [(set_attr "op_type" "SSE")])

What I dislike is that I didn't find a way to verify hard register names
during genoutput, i.e., ensuring that the name is valid after all.  This
is due to the fact how reg_names is defined which cannot be accessed by
genoutput.  The same holds true for REGISTER_NAMES et al. which may
reference some target specific variable (see e.g. POWER).  Thus, in case
of an invalid register name in a machine description file we do not
end-up with a genoutput-time error but instead fail at run-time in
process_alt_operands():

   case '{':
       {
         int regno = parse_constraint_regname (p);
         gcc_assert (regno >= 0);
         cl = REGNO_REG_CLASS (regno);
         CLEAR_HARD_REG_SET (hregset);
         SET_HARD_REG_BIT (hregset, regno);
         cl_filter = &hregset;
         goto reg;
       }

This is rather unfortunate but I couldn't find a way how to validate
register names during genoutput.  If no one else has an idea I will
replace gcc_assert with a more expressive error message.

What's next?

I was thinking about replacing register asm with the new hard register
constraint.  This would solve problems like demonstrated by
gcc.dg/asm-hard-reg-7.c.  For example, we could replace the constraint

   register int x asm ("r5") = 42;
   asm ("foo   %0" :: "r" (x));

with

   register int x asm ("r5") = 42;
   asm ("foo   %0" :: "{r5}" (x));

and ignore any further effect of the register asm.  However, I haven't
really thought this through and there are certainly cases which are
currently allowed which cannot trivially be converted as e.g. here:

   register int x asm ("r5") = 42;
   asm ("foo   %0" :: "rd" (x));

Multiple alternatives are kind of strange in combination with register
asm.  For example, on s390 the two constraints "r" and "d" restrict both
to GPRs.  That is not a show stopper but certainly something which needs
some consideration.  If you can think of some wild combinations/edge
cases I would be happy to here about.  Anyhow, this is something for a
further patch.

Last but not least, if there is enough consent to accept this feature, I
will start writing up some documentation.

Bootstrapped and regtested on Arm, Intel, POWER, RISCV, S/390.  I have
only verified the 32-bit tests via cross compilers and didn't execute
them in contrast to 64-bit targets.
---
 gcc/cfgexpand.cc                              |  42 -----
 gcc/genoutput.cc                              |  12 ++
 gcc/genpreds.cc                               |   4 +-
 gcc/gimplify.cc                               | 134 ++++++++++++++-
 gcc/lra-constraints.cc                        |  13 ++
 gcc/recog.cc                                  |  11 +-
 gcc/stmt.cc                                   | 155 +++++++++++++++++-
 gcc/stmt.h                                    |  12 +-
 gcc/testsuite/gcc.dg/asm-hard-reg-1.c         |  85 ++++++++++
 gcc/testsuite/gcc.dg/asm-hard-reg-2.c         |  33 ++++
 gcc/testsuite/gcc.dg/asm-hard-reg-3.c         |  25 +++
 gcc/testsuite/gcc.dg/asm-hard-reg-4.c         |  50 ++++++
 gcc/testsuite/gcc.dg/asm-hard-reg-5.c         |  36 ++++
 gcc/testsuite/gcc.dg/asm-hard-reg-6.c         |  60 +++++++
 gcc/testsuite/gcc.dg/asm-hard-reg-7.c         |  70 ++++++++
 gcc/testsuite/gcc.dg/asm-hard-reg-error-1.c   |  67 ++++++++
 gcc/testsuite/gcc.dg/asm-hard-reg-error-2.c   |  19 +++
 gcc/testsuite/gcc.dg/asm-hard-reg-error-3.c   |  20 +++
 gcc/testsuite/gcc.dg/pr87600-2.c              |  30 +---
 gcc/testsuite/gcc.dg/pr87600-3.c              |  35 ++++
 .../gcc.target/s390/asm-hard-reg-1.c          | 103 ++++++++++++
 .../gcc.target/s390/asm-hard-reg-2.c          |  43 +++++
 .../gcc.target/s390/asm-hard-reg-3.c          |  42 +++++
 gcc/testsuite/lib/scanasm.exp                 |   4 +
 24 files changed, 1020 insertions(+), 85 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-1.c
 create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-2.c
 create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-3.c
 create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-4.c
 create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-5.c
 create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-6.c
 create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-7.c
 create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-error-1.c
 create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-error-2.c
 create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-error-3.c
 create mode 100644 gcc/testsuite/gcc.dg/pr87600-3.c
 create mode 100644 gcc/testsuite/gcc.target/s390/asm-hard-reg-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/asm-hard-reg-2.c
 create mode 100644 gcc/testsuite/gcc.target/s390/asm-hard-reg-3.c

Comments

Georg-Johann Lay Aug. 5, 2024, 12:19 p.m. UTC | #1
Am 05.08.24 um 12:28 schrieb Stefan Schulze Frielinghaus:
> This is a follow-up of
> https://gcc.gnu.org/pipermail/gcc-patches/2024-June/654013.html
> 
> What has changed?
> 
> - Rebased and fixed an issue in constrain_operands which manifested
> after late-combine.
> 
> - Introduced new test cases for Arm, Intel, POWER, RISCV, S/390 for 32-
> and 64-bit where appropriate (including register pairs etc.).  Test
> gcc.dg/asm-hard-reg-7.c is a bit controversial since I'm testing for an
> anti feature here, i.e., I'm testing for register asm in conjunction
> with calls.  I'm fine with removing it in the end but I wanted to keep
> it in for demonstration purposes at least during discussion of this
> patch.
> 
> - Split test pr87600-2.c into pr87600-2.c and pr87600-3.c since test0
> errors out early, now.  Otherwise, the remaining errors would not be
> reported.  Beside that the error message has slightly changed.
> 
> - Modified genoutput.cc in order to allow hard register constraints in
> machine descriptions.  For example, on s390 the instruction mvcrl makes

As I already said, such a feature would be great.  Some questions:

Which pass is satisfying that constraint? AFAIK for local reg vars,
it is asmcons, but for register constraints in md it it the register
allocator.

The avr backend has many insns that use explicit hard regs in order to
model some libcalls (ones with footprints smaller than ABI, or that
deviate from the ABI).  A proper way would be to add a register
constraint for each possible hard reg, e.g. R20_1 for QImode in R20,
R20_2 for HImode in R20, etc.  This would require a dozen or more
new register classes, and the problem with that is that register
allocation produces less efficient code even for cases that do
not use these new constraints.  So I gave up that approach.

How does your feature work? Does it imply that for each hreg
constraint there must be an according register class?

Obviously local reg vars don't require respective reg classes,
so I thought about representing such insns as asm_input or
whatever, but that's pure hack and would never pass a review...

> use of the implicit register r0 which we currently deal with as follows:
> 
> (define_insn "*mvcrl"
>    [(set (match_operand:BLK 0 "memory_operand" "=Q")
>         (unspec:BLK [(match_operand:BLK 1 "memory_operand" "Q")
>                      (reg:SI GPR0_REGNUM)]
>                     UNSPEC_MVCRL))]
>    "TARGET_Z15"
>    "mvcrl\t%0,%1"
>    [(set_attr "op_type" "SSE")])
> 
> (define_expand "mvcrl"
>    [(set (reg:SI GPR0_REGNUM) (match_operand:SI 2 "general_operand"))
>     (set (match_operand:BLK 0 "memory_operand" "=Q")
>         (unspec:BLK [(match_operand:BLK 1 "memory_operand" "Q")
>                      (reg:SI GPR0_REGNUM)]
>                     UNSPEC_MVCRL))]
>    "TARGET_Z15"
>    "")
> 
> In the expander we ensure that GPR0 is setup correctly.  With this patch
> we could simply write
> 
> (define_insn "mvcrl"
>    [(set (match_operand:BLK 0 "memory_operand" "=Q")
>          (unspec:BLK [(match_operand:BLK 1 "memory_operand" "Q")
>                       (match_operand:SI 2 "general_operand" "{r0}")]
>                      UNSPEC_MVCRL))]
>    "TARGET_Z15"
>    "mvcrl\t%0,%1"
>    [(set_attr "op_type" "SSE")])
> 
> What I dislike is that I didn't find a way to verify hard register names

Are plain register numbers also supported? Like "{0}" ?
(Provided regno(r0) == 0).

> during genoutput, i.e., ensuring that the name is valid after all.  This
> is due to the fact how reg_names is defined which cannot be accessed by
> genoutput.  The same holds true for REGISTER_NAMES et al. which may
> reference some target specific variable (see e.g. POWER).  Thus, in case
> of an invalid register name in a machine description file we do not
> end-up with a genoutput-time error but instead fail at run-time in
> process_alt_operands():
> 
>     case '{':
>         {
>           int regno = parse_constraint_regname (p);
>           gcc_assert (regno >= 0);
>           cl = REGNO_REG_CLASS (regno);
>           CLEAR_HARD_REG_SET (hregset);
>           SET_HARD_REG_BIT (hregset, regno);

Is this correct when hard_regno_nregs(regno) > 1,
i.e. when the register occupies more than one hard register?

>           cl_filter = &hregset;
>           goto reg;
>         }
> 
> This is rather unfortunate but I couldn't find a way how to validate
> register names during genoutput.  If no one else has an idea I will
> replace gcc_assert with a more expressive error message.

[ADDITIONAL_]REGISTER_NAMES isn't available?  Though using that might
bypass the effect of target hooks like TARGET_CONDITIONAL_REGISTER_USAGE.

But there are also cases with an asm operand print modifier; you cannot
check that, it's checked by TARGET_PRINT_OPERAND etc. which get a
hard register and not a string for a register name.

Maybe genoutput could add additional information to insn-output.cc or
whatever, and the compiler proper checks that and emits diagnostics
as needed?

> What's next?
> 
> I was thinking about replacing register asm with the new hard register
> constraint.  This would solve problems like demonstrated by
> gcc.dg/asm-hard-reg-7.c.  For example, we could replace the constraint
> 
>     register int x asm ("r5") = 42;
>     asm ("foo   %0" :: "r" (x));
> 
> with
> 
>     register int x asm ("r5") = 42;
>     asm ("foo   %0" :: "{r5}" (x));
> 
> and ignore any further effect of the register asm.  However, I haven't
> really thought this through and there are certainly cases which are
> currently allowed which cannot trivially be converted as e.g. here:
> 
>     register int x asm ("r5") = 42;
>     asm ("foo   %0" :: "rd" (x));
> 
> Multiple alternatives are kind of strange in combination with register
> asm.  For example, on s390 the two constraints "r" and "d" restrict both

Though in that example there is just on alternative "rd" which is the
union of "r" and "d".  Inline asm doesn't support more than one
constraint alternative.  Multiple alternatives would require multiple
asm templates, like in

asm ("code for r", "code for d" :: "r,d" (x));

And a final question: Is it possible to specify more than one hard
reg constraint like in "{r0}{r1}" as the union of r0 and r1?

Johann

> to GPRs.  That is not a show stopper but certainly something which needs
> some consideration.  If you can think of some wild combinations/edge
> cases I would be happy to here about.  Anyhow, this is something for a
> further patch.
> 
> Last but not least, if there is enough consent to accept this feature, I
> will start writing up some documentation.
> 
> Bootstrapped and regtested on Arm, Intel, POWER, RISCV, S/390.  I have
> only verified the 32-bit tests via cross compilers and didn't execute
> them in contrast to 64-bit targets.
> ---
>   gcc/cfgexpand.cc                              |  42 -----
>   gcc/genoutput.cc                              |  12 ++
>   gcc/genpreds.cc                               |   4 +-
>   gcc/gimplify.cc                               | 134 ++++++++++++++-
>   gcc/lra-constraints.cc                        |  13 ++
>   gcc/recog.cc                                  |  11 +-
>   gcc/stmt.cc                                   | 155 +++++++++++++++++-
>   gcc/stmt.h                                    |  12 +-
>   gcc/testsuite/gcc.dg/asm-hard-reg-1.c         |  85 ++++++++++
>   gcc/testsuite/gcc.dg/asm-hard-reg-2.c         |  33 ++++
>   gcc/testsuite/gcc.dg/asm-hard-reg-3.c         |  25 +++
>   gcc/testsuite/gcc.dg/asm-hard-reg-4.c         |  50 ++++++
>   gcc/testsuite/gcc.dg/asm-hard-reg-5.c         |  36 ++++
>   gcc/testsuite/gcc.dg/asm-hard-reg-6.c         |  60 +++++++
>   gcc/testsuite/gcc.dg/asm-hard-reg-7.c         |  70 ++++++++
>   gcc/testsuite/gcc.dg/asm-hard-reg-error-1.c   |  67 ++++++++
>   gcc/testsuite/gcc.dg/asm-hard-reg-error-2.c   |  19 +++
>   gcc/testsuite/gcc.dg/asm-hard-reg-error-3.c   |  20 +++
>   gcc/testsuite/gcc.dg/pr87600-2.c              |  30 +---
>   gcc/testsuite/gcc.dg/pr87600-3.c              |  35 ++++
>   .../gcc.target/s390/asm-hard-reg-1.c          | 103 ++++++++++++
>   .../gcc.target/s390/asm-hard-reg-2.c          |  43 +++++
>   .../gcc.target/s390/asm-hard-reg-3.c          |  42 +++++
>   gcc/testsuite/lib/scanasm.exp                 |   4 +
>   24 files changed, 1020 insertions(+), 85 deletions(-)
>   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-1.c
>   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-2.c
>   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-3.c
>   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-4.c
>   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-5.c
>   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-6.c
>   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-7.c
>   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-error-1.c
>   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-error-2.c
>   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-error-3.c
>   create mode 100644 gcc/testsuite/gcc.dg/pr87600-3.c
>   create mode 100644 gcc/testsuite/gcc.target/s390/asm-hard-reg-1.c
>   create mode 100644 gcc/testsuite/gcc.target/s390/asm-hard-reg-2.c
>   create mode 100644 gcc/testsuite/gcc.target/s390/asm-hard-reg-3.c
> 
> diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
> index dad3ae1b7c6..8bdc530626c 100644
> --- a/gcc/cfgexpand.cc
> +++ b/gcc/cfgexpand.cc
> @@ -2966,44 +2966,6 @@ expand_asm_loc (tree string, int vol, location_t locus)
>     emit_insn (body);
>   }
>   
> -/* Return the number of times character C occurs in string S.  */
> -static int
> -n_occurrences (int c, const char *s)
> -{
> -  int n = 0;
> -  while (*s)
> -    n += (*s++ == c);
> -  return n;
> -}
> -
> -/* A subroutine of expand_asm_operands.  Check that all operands have
> -   the same number of alternatives.  Return true if so.  */
> -
> -static bool
> -check_operand_nalternatives (const vec<const char *> &constraints)
> -{
> -  unsigned len = constraints.length();
> -  if (len > 0)
> -    {
> -      int nalternatives = n_occurrences (',', constraints[0]);
> -
> -      if (nalternatives + 1 > MAX_RECOG_ALTERNATIVES)
> -	{
> -	  error ("too many alternatives in %<asm%>");
> -	  return false;
> -	}
> -
> -      for (unsigned i = 1; i < len; ++i)
> -	if (n_occurrences (',', constraints[i]) != nalternatives)
> -	  {
> -	    error ("operand constraints for %<asm%> differ "
> -		   "in number of alternatives");
> -	    return false;
> -	  }
> -    }
> -  return true;
> -}
> -
>   /* Check for overlap between registers marked in CLOBBERED_REGS and
>      anything inappropriate in T.  Emit error and return the register
>      variable definition for error, NULL_TREE for ok.  */
> @@ -3169,10 +3131,6 @@ expand_asm_stmt (gasm *stmt)
>   	= TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
>       }
>   
> -  /* ??? Diagnose during gimplification?  */
> -  if (! check_operand_nalternatives (constraints))
> -    return;
> -
>     /* Count the number of meaningful clobbered registers, ignoring what
>        we would ignore later.  */
>     auto_vec<rtx> clobber_rvec;
> diff --git a/gcc/genoutput.cc b/gcc/genoutput.cc
> index efd81766bb5..c1efb043579 100644
> --- a/gcc/genoutput.cc
> +++ b/gcc/genoutput.cc
> @@ -1219,6 +1219,18 @@ mdep_constraint_len (const char *s, file_location loc, int opno)
>         if (!strncmp (s, p->name, p->namelen))
>   	return p->namelen;
>   
> +  if (*s == '{')
> +    {
> +      const char *end = s + 1;
> +      while (*end != '}' && *end != '"' && *end != '\0')
> +	++end;
> +      /* Similarly as in parse_constraint_regname(), consider any hard register
> +	 name longer than a few characters as an error.  */
> +      ptrdiff_t len = end - s;
> +      if (*end == '}' && len > 1 && len < 31)
> +	return len + 1;
> +    }
> +
>     error_at (loc, "error: undefined machine-specific constraint "
>   	    "at this point: \"%s\"", s);
>     message_at (loc, "note:  in operand %d", opno);
> diff --git a/gcc/genpreds.cc b/gcc/genpreds.cc
> index 55d149e8a40..0777cb7a4db 100644
> --- a/gcc/genpreds.cc
> +++ b/gcc/genpreds.cc
> @@ -1148,7 +1148,7 @@ write_insn_constraint_len (void)
>     unsigned int i;
>   
>     puts ("static inline size_t\n"
> -	"insn_constraint_len (char fc, const char *str ATTRIBUTE_UNUSED)\n"
> +	"insn_constraint_len (char fc, const char *str)\n"
>   	"{\n"
>   	"  switch (fc)\n"
>   	"    {");
> @@ -1181,6 +1181,8 @@ write_insn_constraint_len (void)
>   
>     puts ("    default: break;\n"
>   	"    }\n"
> +	"  if (str[0] == '{')\n"
> +	"      return ((const char *) rawmemchr (str + 1, '}') - str) + 1;\n"
>   	"  return 1;\n"
>   	"}\n");
>   }
> diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
> index ab323d764e8..abb705d49ac 100644
> --- a/gcc/gimplify.cc
> +++ b/gcc/gimplify.cc
> @@ -70,6 +70,9 @@ along with GCC; see the file COPYING3.  If not see
>   #include "omp-offload.h"
>   #include "context.h"
>   #include "tree-nested.h"
> +#include "insn-config.h"
> +#include "recog.h"
> +#include "output.h"
>   
>   /* Identifier for a basic condition, mapping it to other basic conditions of
>      its Boolean expression.  Basic conditions given the same uid (in the same
> @@ -6993,6 +6996,42 @@ gimplify_addr_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
>     return ret;
>   }
>   
> +/* Return the number of times character C occurs in string S.  */
> +
> +static int
> +num_occurrences (int c, const char *s)
> +{
> +  int n = 0;
> +  while (*s)
> +    n += (*s++ == c);
> +  return n;
> +}
> +
> +/* A subroutine of gimplify_asm_expr.  Check that all operands have
> +   the same number of alternatives.  Return -1 if this is violated.  Otherwise
> +   return the number of alternatives.  */
> +
> +static int
> +num_alternatives (const_tree link)
> +{
> +  if (link == nullptr)
> +    return 0;
> +
> +  const char *constraint = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (link)));
> +  int num = num_occurrences (',', constraint);
> +
> +  if (num + 1 > MAX_RECOG_ALTERNATIVES)
> +    return -1;
> +
> +  for (link = TREE_CHAIN (link); link; link = TREE_CHAIN (link))
> +    {
> +      constraint = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (link)));
> +      if (num_occurrences (',', constraint) != num)
> +	return -1;
> +    }
> +  return num + 1;
> +}
> +
>   /* Gimplify the operands of an ASM_EXPR.  Input operands should be a gimple
>      value; output operands should be a gimple lvalue.  */
>   
> @@ -7023,6 +7062,36 @@ gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
>     clobbers = NULL;
>     labels = NULL;
>   
> +  int num_alternatives_out = num_alternatives (ASM_OUTPUTS (expr));
> +  int num_alternatives_in = num_alternatives (ASM_INPUTS (expr));
> +  if (num_alternatives_out == -1 || num_alternatives_in == -1
> +      || (num_alternatives_out > 0 && num_alternatives_in > 0
> +	  && num_alternatives_out != num_alternatives_in))
> +    {
> +      error ("operand constraints for %<asm%> differ "
> +	     "in number of alternatives");
> +      return GS_ERROR;
> +    }
> +  int num_alternatives = MAX (num_alternatives_out, num_alternatives_in);
> +
> +  /* Regarding hard register constraints ensure that each hard register is used
> +     at most once over all inputs/outputs and each alternative.  Keep track in
> +     hardregs[0] which hard register is used via an asm register over all
> +     inputs/outputs.  hardregs[i] for i >= 2 describes which hard registers are
> +     used for alternative i-2 over all inputs/outputs.  hardregs[1] is a
> +     reduction of all alternatives, i.e., hardregs[1] |= hardregs[i] for i >= 2
> +     and describes whether a hard register is used in any alternative.  This is
> +     just a shortcut instead of recomputing the union over all alternatives;
> +     possibly multiple times.  */
> +  auto_vec<HARD_REG_SET> hardregs (num_alternatives + 2);
> +  std::pair <vec <HARD_REG_SET> *, machine_mode> hardreg_props = {&hardregs, VOIDmode};
> +  for (int i = 0; i < num_alternatives + 2; ++i)
> +    {
> +      HARD_REG_SET hregset;
> +      CLEAR_HARD_REG_SET (hregset);
> +      hardregs.quick_push (hregset);
> +    }
> +
>     ret = GS_ALL_DONE;
>     link_next = NULL_TREE;
>     for (i = 0, link = ASM_OUTPUTS (expr); link; ++i, link = link_next)
> @@ -7039,8 +7108,13 @@ gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
>         if (constraint_len == 0)
>           continue;
>   
> -      ok = parse_output_constraint (&constraint, i, 0, 0,
> -				    &allows_mem, &allows_reg, &is_inout);
> +      tree outtype = TREE_TYPE (TREE_VALUE (link));
> +      auto hardreg_props_p
> +	= outtype != error_mark_node
> +	? (hardreg_props.second = TYPE_MODE (outtype), &hardreg_props)
> +	: nullptr;
> +      ok = parse_output_constraint (&constraint, i, 0, 0, &allows_mem,
> +				    &allows_reg, &is_inout, hardreg_props_p);
>         if (!ok)
>   	{
>   	  ret = GS_ERROR;
> @@ -7049,7 +7123,6 @@ gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
>   
>         /* If we can't make copies, we can only accept memory.
>   	 Similarly for VLAs.  */
> -      tree outtype = TREE_TYPE (TREE_VALUE (link));
>         if (outtype != error_mark_node
>   	  && (TREE_ADDRESSABLE (outtype)
>   	      || !COMPLETE_TYPE_P (outtype)
> @@ -7111,6 +7184,24 @@ gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
>   	      TREE_VALUE (link) = tem;
>   	      tret = GS_OK;
>   	    }
> +	  if (VAR_P (op) && DECL_HARD_REGISTER (op))
> +	    {
> +	      tree id = DECL_ASSEMBLER_NAME (op);
> +	      const char *asmspec = IDENTIFIER_POINTER (id) + 1;
> +	      int hardreg = decode_reg_name (asmspec);
> +	      if (hardreg >= 0)
> +		{
> +		  if (TEST_HARD_REG_BIT (hardregs[0], hardreg)
> +		      || TEST_HARD_REG_BIT (hardregs[1], hardreg))
> +		    {
> +		      error ("multiple outputs to hard register: %s",
> +			     reg_names[hardreg]);
> +		      return GS_ERROR;
> +		    }
> +		  else
> +		    SET_HARD_REG_BIT (hardregs[0], hardreg);
> +		}
> +	    }
>   	}
>   
>         vec_safe_push (outputs, link);
> @@ -7210,16 +7301,29 @@ gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
>   	}
>       }
>   
> +  for (unsigned int i = 0; i < hardregs.length (); ++i)
> +    CLEAR_HARD_REG_SET (hardregs[i]);
> +
>     link_next = NULL_TREE;
>     for (link = ASM_INPUTS (expr); link; ++i, link = link_next)
>       {
>         link_next = TREE_CHAIN (link);
>         constraint = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (link)));
> -      parse_input_constraint (&constraint, 0, 0, noutputs, 0,
> -			      oconstraints, &allows_mem, &allows_reg);
> +      tree intype = TREE_TYPE (TREE_VALUE (link));
> +      auto hardreg_props_p
> +	= intype != error_mark_node
> +	? (hardreg_props.second = TYPE_MODE (intype), &hardreg_props)
> +	: nullptr;
> +      bool ok = parse_input_constraint (&constraint, 0, 0, noutputs, 0,
> +					oconstraints, &allows_mem, &allows_reg,
> +					hardreg_props_p);
> +      if (!ok)
> +	{
> +	  ret = GS_ERROR;
> +	  is_inout = false;
> +	}
>   
>         /* If we can't make copies, we can only accept memory.  */
> -      tree intype = TREE_TYPE (TREE_VALUE (link));
>         if (intype != error_mark_node
>   	  && (TREE_ADDRESSABLE (intype)
>   	      || !COMPLETE_TYPE_P (intype)
> @@ -7290,6 +7394,24 @@ gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
>   				is_gimple_asm_val, fb_rvalue);
>   	  if (tret == GS_ERROR)
>   	    ret = tret;
> +	  tree inputv = TREE_VALUE (link);
> +	  if (VAR_P (inputv) && DECL_HARD_REGISTER (inputv))
> +	    {
> +	      tree id = DECL_ASSEMBLER_NAME (inputv);
> +	      const char *asmspec = IDENTIFIER_POINTER (id) + 1;
> +	      int hardreg = decode_reg_name (asmspec);
> +	      if (hardreg >= 0)
> +		{
> +		  if (TEST_HARD_REG_BIT (hardregs[1], hardreg))
> +		    {
> +		      error ("multiple inputs to hard register: %s",
> +			     reg_names[hardreg]);
> +		      return GS_ERROR;
> +		    }
> +		  else
> +		    SET_HARD_REG_BIT (hardregs[0], hardreg);
> +		}
> +	    }
>   	}
>   
>         TREE_CHAIN (link) = NULL_TREE;
> diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
> index 92b343fa99a..632c75ef01c 100644
> --- a/gcc/lra-constraints.cc
> +++ b/gcc/lra-constraints.cc
> @@ -114,6 +114,7 @@
>   #include "target.h"
>   #include "rtl.h"
>   #include "tree.h"
> +#include "stmt.h"
>   #include "predict.h"
>   #include "df.h"
>   #include "memmodel.h"
> @@ -2165,6 +2166,7 @@ process_alt_operands (int only_alternative)
>     bool costly_p;
>     enum reg_class cl;
>     const HARD_REG_SET *cl_filter;
> +  HARD_REG_SET hregset;
>   
>     /* Calculate some data common for all alternatives to speed up the
>        function.	*/
> @@ -2536,6 +2538,17 @@ process_alt_operands (int only_alternative)
>   		  cl_filter = nullptr;
>   		  goto reg;
>   
> +		case '{':
> +		    {
> +		      int regno = decode_hreg_constraint (p);
> +		      gcc_assert (regno >= 0);
> +		      cl = REGNO_REG_CLASS (regno);
> +		      CLEAR_HARD_REG_SET (hregset);
> +		      SET_HARD_REG_BIT (hregset, regno);
> +		      cl_filter = &hregset;
> +		      goto reg;
> +		    }
> +
>   		default:
>   		  cn = lookup_constraint (p);
>   		  switch (get_constraint_type (cn))
> diff --git a/gcc/recog.cc b/gcc/recog.cc
> index 54b317126c2..b604029d5f1 100644
> --- a/gcc/recog.cc
> +++ b/gcc/recog.cc
> @@ -25,6 +25,7 @@ along with GCC; see the file COPYING3.  If not see
>   #include "target.h"
>   #include "rtl.h"
>   #include "tree.h"
> +#include "stmt.h"
>   #include "cfghooks.h"
>   #include "df.h"
>   #include "memmodel.h"
> @@ -2333,7 +2334,8 @@ asm_operand_ok (rtx op, const char *constraint, const char **constraints)
>   	    {
>   	    case CT_REGISTER:
>   	      if (!result
> -		  && reg_class_for_constraint (cn) != NO_REGS
> +		  && (reg_class_for_constraint (cn) != NO_REGS
> +		      || constraint[0] == '{')
>   		  && GET_MODE (op) != BLKmode
>   		  && register_operand (op, VOIDmode))
>   		result = 1;
> @@ -3267,6 +3269,13 @@ constrain_operands (int strict, alternative_mask alternatives)
>   		  win = true;
>   		break;
>   
> +	      case '{':
> +		if ((REG_P (op) && HARD_REGISTER_P (op)
> +		     && (int) REGNO (op) == decode_hreg_constraint (p))
> +		    || !reload_completed)
> +		  win = true;
> +		break;
> +
>   	      default:
>   		{
>   		  enum constraint_num cn = lookup_constraint (p);
> diff --git a/gcc/stmt.cc b/gcc/stmt.cc
> index ae1527f0a19..7b073f8ce85 100644
> --- a/gcc/stmt.cc
> +++ b/gcc/stmt.cc
> @@ -39,6 +39,7 @@ along with GCC; see the file COPYING3.  If not see
>   #include "emit-rtl.h"
>   #include "pretty-print.h"
>   #include "diagnostic-core.h"
> +#include "output.h"
>   
>   #include "fold-const.h"
>   #include "varasm.h"
> @@ -174,6 +175,77 @@ expand_label (tree label)
>       maybe_set_first_label_num (label_r);
>   }
>   
> +/* Parse a hard register constraint and return its number or -1 in case of an
> +   error.  BEGIN should point to a string of the form "{regname}".  For the
> +   sake of simplicity assume that a register name is not longer than 31
> +   characters, if not error out.  */
> +
> +int
> +decode_hreg_constraint (const char *begin)
> +{
> +  if (*begin != '{')
> +    return -1;
> +  ++begin;
> +  const char *end = begin;
> +  while (*end != '}' && *end != '\0')
> +    ++end;
> +  if (*end != '}' || end == begin)
> +    return -1;
> +  ptrdiff_t len = end - begin;
> +  if (len >= 31)
> +    return -1;
> +  char regname[32];
> +  memcpy (regname, begin, len);
> +  regname[len] = '\0';
> +  int regno = decode_reg_name (regname);
> +  return regno;
> +}
> +
> +static bool
> +eliminable_regno_p (int regnum)
> +{
> +  static const struct
> +  {
> +    const int from;
> +    const int to;
> +  } eliminables[] = ELIMINABLE_REGS;
> +  for (size_t i = 0; i < ARRAY_SIZE (eliminables); i++)
> +    if (regnum == eliminables[i].from)
> +      return true;
> +  return false;
> +}
> +
> +/* Perform a similar check as done in make_decl_rtl().  */
> +
> +static bool
> +hardreg_ok_p (int reg_number, machine_mode mode, int operand_num)
> +{
> +  if (mode == BLKmode)
> +    error ("data type isn%'t suitable for register %s of operand %i",
> +	   reg_names[reg_number], operand_num);
> +  else if (!in_hard_reg_set_p (accessible_reg_set, mode, reg_number))
> +    error ("register %s for operand %i cannot be accessed"
> +	   " by the current target", reg_names[reg_number], operand_num);
> +  else if (!in_hard_reg_set_p (operand_reg_set, mode, reg_number))
> +    error ("register %s for operand %i is not general enough"
> +	   " to be used as a register variable", reg_names[reg_number], operand_num);
> +  else if (!targetm.hard_regno_mode_ok (reg_number, mode))
> +    error ("register %s for operand %i isn%'t suitable for data type",
> +	   reg_names[reg_number], operand_num);
> +  else if (reg_number != HARD_FRAME_POINTER_REGNUM
> +	   && (reg_number == FRAME_POINTER_REGNUM
> +#ifdef RETURN_ADDRESS_POINTER_REGNUM
> +	       || reg_number == RETURN_ADDRESS_POINTER_REGNUM
> +#endif
> +	       || reg_number == ARG_POINTER_REGNUM)
> +	   && eliminable_regno_p (reg_number))
> +    error ("register for operand %i is an internal GCC "
> +	   "implementation detail", operand_num);
> +  else
> +    return true;
> +  return false;
> +}
> +
>   /* Parse the output constraint pointed to by *CONSTRAINT_P.  It is the
>      OPERAND_NUMth output operand, indexed from zero.  There are NINPUTS
>      inputs and NOUTPUTS outputs to this extended-asm.  Upon return,
> @@ -190,7 +262,9 @@ expand_label (tree label)
>   bool
>   parse_output_constraint (const char **constraint_p, int operand_num,
>   			 int ninputs, int noutputs, bool *allows_mem,
> -			 bool *allows_reg, bool *is_inout)
> +			 bool *allows_reg, bool *is_inout,
> +			 const std::pair <vec <HARD_REG_SET> *, machine_mode>
> +			 *hardreg_props)
>   {
>     const char *constraint = *constraint_p;
>     const char *p;
> @@ -244,6 +318,8 @@ parse_output_constraint (const char **constraint_p, int operand_num,
>         constraint = *constraint_p;
>       }
>   
> +  unsigned int alternative = 2;
> +
>     /* Loop through the constraint string.  */
>     for (p = constraint + 1; *p; )
>       {
> @@ -268,7 +344,11 @@ parse_output_constraint (const char **constraint_p, int operand_num,
>   	case 'E':  case 'F':  case 'G':  case 'H':
>   	case 's':  case 'i':  case 'n':
>   	case 'I':  case 'J':  case 'K':  case 'L':  case 'M':
> -	case 'N':  case 'O':  case 'P':  case ',':
> +	case 'N':  case 'O':  case 'P':
> +	  break;
> +
> +	case ',':
> +	  ++alternative;
>   	  break;
>   
>   	case '0':  case '1':  case '2':  case '3':  case '4':
> @@ -289,6 +369,36 @@ parse_output_constraint (const char **constraint_p, int operand_num,
>   	  *allows_mem = true;
>   	  break;
>   
> +	case '{':
> +	  {
> +	    int regno = decode_hreg_constraint (p);
> +	    if (regno < 0)
> +	      {
> +		error ("invalid output constraint: %s", p);
> +		return false;
> +	      }
> +	    if (hardreg_props)
> +	      {
> +		vec<HARD_REG_SET> *hardregs = hardreg_props->first;
> +		if (TEST_HARD_REG_BIT ((*hardregs)[0], regno)
> +		    || TEST_HARD_REG_BIT ((*hardregs)[alternative], regno))
> +		  {
> +		    error ("multiple outputs to hard register: %s",
> +			   reg_names[regno]);
> +		    return false;
> +		  }
> +		else
> +		  {
> +		    SET_HARD_REG_BIT ((*hardregs)[1], regno);
> +		    SET_HARD_REG_BIT ((*hardregs)[alternative], regno);
> +		  }
> +		if (!hardreg_ok_p (regno, hardreg_props->second, operand_num))
> +		  return false;
> +	      }
> +	    *allows_reg = true;
> +	    break;
> +	  }
> +
>   	default:
>   	  if (!ISALPHA (*p))
>   	    break;
> @@ -317,7 +427,9 @@ bool
>   parse_input_constraint (const char **constraint_p, int input_num,
>   			int ninputs, int noutputs, int ninout,
>   			const char * const * constraints,
> -			bool *allows_mem, bool *allows_reg)
> +			bool *allows_mem, bool *allows_reg,
> +			const std::pair <vec<HARD_REG_SET> *, machine_mode>
> +			*hardreg_props)
>   {
>     const char *constraint = *constraint_p;
>     const char *orig_constraint = constraint;
> @@ -332,6 +444,8 @@ parse_input_constraint (const char **constraint_p, int input_num,
>   
>     /* Make sure constraint has neither `=', `+', nor '&'.  */
>   
> +  unsigned int alternative = 2;
> +
>     for (j = 0; j < c_len; j += CONSTRAINT_LEN (constraint[j], constraint+j))
>       switch (constraint[j])
>         {
> @@ -358,7 +472,11 @@ parse_input_constraint (const char **constraint_p, int input_num,
>         case 'E':  case 'F':  case 'G':  case 'H':
>         case 's':  case 'i':  case 'n':
>         case 'I':  case 'J':  case 'K':  case 'L':  case 'M':
> -      case 'N':  case 'O':  case 'P':  case ',':
> +      case 'N':  case 'O':  case 'P':
> +	break;
> +
> +      case ',':
> +	++alternative;
>   	break;
>   
>   	/* Whether or not a numeric constraint allows a register is
> @@ -408,6 +526,35 @@ parse_input_constraint (const char **constraint_p, int input_num,
>   	*allows_mem = true;
>   	break;
>   
> +      case '{':
> +	{
> +	  int regno = decode_hreg_constraint (constraint + j);
> +	  if (regno < 0)
> +	    {
> +	      error ("invalid input constraint: %s", constraint + j);
> +	      return false;
> +	    }
> +	  if (hardreg_props)
> +	    {
> +	      vec <HARD_REG_SET> *hardregs = hardreg_props->first;
> +	      if (TEST_HARD_REG_BIT ((*hardregs)[0], regno)
> +		  || TEST_HARD_REG_BIT ((*hardregs)[alternative], regno))
> +		{
> +		  error ("multiple inputs to hard register: %s",
> +			    reg_names[regno]);
> +		}
> +	      else
> +		{
> +		  SET_HARD_REG_BIT ((*hardregs)[1], regno);
> +		  SET_HARD_REG_BIT ((*hardregs)[alternative], regno);
> +		}
> +	      if (!hardreg_ok_p (regno, hardreg_props->second, input_num))
> +		return false;
> +	    }
> +	  *allows_reg = true;
> +	  break;
> +	}
> +
>         default:
>   	if (! ISALPHA (constraint[j]))
>   	  {
> diff --git a/gcc/stmt.h b/gcc/stmt.h
> index a2caae7121b..a380ecd8cbf 100644
> --- a/gcc/stmt.h
> +++ b/gcc/stmt.h
> @@ -20,11 +20,19 @@ along with GCC; see the file COPYING3.  If not see
>   #ifndef GCC_STMT_H
>   #define GCC_STMT_H
>   
> +#include "target.h"
> +#include "hard-reg-set.h"
> +
>   extern void expand_label (tree);
>   extern bool parse_output_constraint (const char **, int, int, int,
> -				     bool *, bool *, bool *);
> +				     bool *, bool *, bool *,
> +				     const std::pair <vec <HARD_REG_SET> *,
> +						      machine_mode> * = nullptr);
>   extern bool parse_input_constraint (const char **, int, int, int, int,
> -				    const char * const *, bool *, bool *);
> +				    const char * const *, bool *, bool *,
> +				    const std::pair <vec <HARD_REG_SET> *,
> +						     machine_mode> * = nullptr);
> +extern int decode_hreg_constraint (const char *);
>   extern tree resolve_asm_operand_names (tree, tree, tree, tree);
>   #ifdef HARD_CONST
>   /* Silly ifdef to avoid having all includers depend on hard-reg-set.h.  */
> diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-1.c b/gcc/testsuite/gcc.dg/asm-hard-reg-1.c
> new file mode 100644
> index 00000000000..6a5a9ada45f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-1.c
> @@ -0,0 +1,85 @@
> +/* { dg-do compile { target aarch64*-*-* arm*-*-* i?86-*-* powerpc*-*-* riscv*-*-* s390*-*-* x86_64-*-* } } */
> +
> +#if defined (__aarch64__)
> +# define GPR "{x4}"
> +/* { dg-final { scan-assembler-times "foo\tx4" 8 { target { aarch64*-*-* } } } } */
> +#elif defined (__arm__)
> +# define GPR "{r4}"
> +/* { dg-final { scan-assembler-times "foo\tr4" 8 { target { arm*-*-* } } } } */
> +#elif defined (__i386__)
> +# define GPR "{ecx}"
> +/* { dg-final { scan-assembler-times "foo\t%cl" 2 { target { i?86-*-* } } } } */
> +/* { dg-final { scan-assembler-times "foo\t%cx" 2 { target { i?86-*-* } } } } */
> +/* { dg-final { scan-assembler-times "foo\t%ecx" 4 { target { i?86-*-* } } } } */
> +#elif defined (__powerpc__) || defined (__POWERPC__)
> +# define GPR "{r5}"
> +/* { dg-final { scan-assembler-times "foo\t5" 8 { target { powerpc*-*-* } } } } */
> +#elif defined (__riscv)
> +# define GPR "{t5}"
> +/* { dg-final { scan-assembler-times "foo\tt5" 8 { target { riscv*-*-* } } } } */
> +#elif defined (__s390__)
> +# define GPR "{r4}"
> +/* { dg-final { scan-assembler-times "foo\t%r4" 8 { target { s390*-*-* } } } } */
> +#elif defined (__x86_64__)
> +# define GPR "{rcx}"
> +/* { dg-final { scan-assembler-times "foo\t%cl" 2 { target { x86_64-*-* } } } } */
> +/* { dg-final { scan-assembler-times "foo\t%cx" 2 { target { x86_64-*-* } } } } */
> +/* { dg-final { scan-assembler-times "foo\t%ecx" 2 { target { x86_64-*-* } } } } */
> +/* { dg-final { scan-assembler-times "foo\t%rcx" 2 { target { x86_64-*-* } } } } */
> +#endif
> +
> +char
> +test_char (char x)
> +{
> +  __asm__ ("foo\t%0" : "+"GPR (x));
> +  return x;
> +}
> +
> +char
> +test_char_from_mem (char *x)
> +{
> +  __asm__ ("foo\t%0" : "+"GPR (*x));
> +  return *x;
> +}
> +
> +short
> +test_short (short x)
> +{
> +  __asm__ ("foo\t%0" : "+"GPR (x));
> +  return x;
> +}
> +
> +short
> +test_short_from_mem (short *x)
> +{
> +  __asm__ ("foo\t%0" : "+"GPR (*x));
> +  return *x;
> +}
> +
> +int
> +test_int (int x)
> +{
> +  __asm__ ("foo\t%0" : "+"GPR (x));
> +  return x;
> +}
> +
> +int
> +test_int_from_mem (int *x)
> +{
> +  __asm__ ("foo\t%0" : "+"GPR (*x));
> +  return *x;
> +}
> +
> +long
> +test_long (long x)
> +{
> +  __asm__ ("foo\t%0" : "+"GPR (x));
> +  return x;
> +}
> +
> +long
> +test_long_from_mem (long *x)
> +{
> +  __asm__ ("foo\t%0" : "+"GPR (*x));
> +  return *x;
> +}
> diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-2.c b/gcc/testsuite/gcc.dg/asm-hard-reg-2.c
> new file mode 100644
> index 00000000000..7dabf9657cb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-2.c
> @@ -0,0 +1,33 @@
> +/* { dg-do compile { target aarch64*-*-* powerpc64*-*-* riscv64-*-* s390*-*-* x86_64-*-* } } */
> +/* { dg-options "-std=c99" } we need long long */
> +
> +#if defined (__aarch64__)
> +# define GPR "{x4}"
> +/* { dg-final { scan-assembler-times "foo\tx4" 2 { target { aarch64*-*-* } } } } */
> +#elif defined (__powerpc__) || defined (__POWERPC__)
> +# define GPR "{r5}"
> +/* { dg-final { scan-assembler-times "foo\t5" 2 { target { powerpc64*-*-* } } } } */
> +#elif defined (__riscv)
> +# define GPR "{t5}"
> +/* { dg-final { scan-assembler-times "foo\tt5" 2 { target { riscv64-*-* } } } } */
> +#elif defined (__s390__)
> +# define GPR "{r4}"
> +/* { dg-final { scan-assembler-times "foo\t%r4" 2 { target { s390*-*-* } } } } */
> +#elif defined (__x86_64__)
> +# define GPR "{rcx}"
> +/* { dg-final { scan-assembler-times "foo\t%rcx" 2 { target { x86_64-*-* } } } } */
> +#endif
> +
> +long long
> +test_longlong (long long x)
> +{
> +  __asm__ ("foo\t%0" : "+"GPR (x));
> +  return x;
> +}
> +
> +long long
> +test_longlong_from_mem (long long *x)
> +{
> +  __asm__ ("foo\t%0" : "+"GPR (*x));
> +  return *x;
> +}
> diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-3.c b/gcc/testsuite/gcc.dg/asm-hard-reg-3.c
> new file mode 100644
> index 00000000000..fa4472ae8a8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-3.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile { target { { aarch64*-*-* powerpc64*-*-* riscv64-*-* s390*-*-* x86_64-*-* } && int128 } } } */
> +/* { dg-options "-O2" } get rid of -ansi since we use __int128 */
> +
> +#if defined (__aarch64__)
> +# define REG "{x4}"
> +/* { dg-final { scan-assembler-times "foo\tx4" 1 { target { aarch64*-*-* } } } } */
> +#elif defined (__powerpc__) || defined (__POWERPC__)
> +# define REG "{r5}"
> +/* { dg-final { scan-assembler-times "foo\t5" 1 { target { powerpc*-*-* } } } } */
> +#elif defined (__riscv)
> +# define REG "{t5}"
> +/* { dg-final { scan-assembler-times "foo\tt5" 1 { target { riscv*-*-* } } } } */
> +#elif defined (__s390__)
> +# define REG "{r4}"
> +/* { dg-final { scan-assembler-times "foo\t%r4" 1 { target { s390*-*-* } } } } */
> +#elif defined (__x86_64__)
> +# define REG "{xmm0}"
> +/* { dg-final { scan-assembler-times "foo\t%xmm0" 1 { target { x86_64-*-* } } } } */
> +#endif
> +
> +void
> +test (void)
> +{
> +  __asm__ ("foo\t%0" :: REG ((__int128) 42));
> +}
> diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-4.c b/gcc/testsuite/gcc.dg/asm-hard-reg-4.c
> new file mode 100644
> index 00000000000..0816df8f719
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-4.c
> @@ -0,0 +1,50 @@
> +/* { dg-do compile { target aarch64*-*-* arm*-*-* powerpc*-*-* riscv*-*-* s390*-*-* x86_64-*-* } } */
> +
> +#if defined (__aarch64__)
> +# define FPR "{d5}"
> +/* { dg-final { scan-assembler-times "foo\tv5" 4 { target { aarch64*-*-* } } } } */
> +#elif defined (__arm__)
> +# define FPR "{d5}"
> +/* { dg-additional-options "-march=armv7-a+fp -mfloat-abi=hard" { target arm*-*-* } } */
> +/* { dg-final { scan-assembler-times "foo\ts10" 4 { target { arm*-*-* } } } } */
> +#elif defined (__powerpc__) || defined (__POWERPC__)
> +# define FPR "{5}"
> +/* { dg-final { scan-assembler-times "foo\t5" 4 { target { powerpc*-*-* } } } } */
> +#elif defined (__riscv)
> +# define FPR "{f5}"
> +/* { dg-final { scan-assembler-times "foo\tf5" 4 { target { rsicv*-*-* } } } } */
> +#elif defined (__s390__)
> +# define FPR "{f5}"
> +/* { dg-final { scan-assembler-times "foo\t%f5" 4 { target { s390*-*-* } } } } */
> +#elif defined (__x86_64__)
> +# define FPR "{xmm5}"
> +/* { dg-final { scan-assembler-times "foo\t%xmm5" 4 { target { x86_64-*-* } } } } */
> +#endif
> +
> +float
> +test_float (float x)
> +{
> +  __asm__ ("foo\t%0" : "+"FPR (x));
> +  return x;
> +}
> +
> +float
> +test_float_from_mem (float *x)
> +{
> +  __asm__ ("foo\t%0" : "+"FPR (*x));
> +  return *x;
> +}
> +
> +double
> +test_double (double x)
> +{
> +  __asm__ ("foo\t%0" : "+"FPR (x));
> +  return x;
> +}
> +
> +double
> +test_double_from_mem (double *x)
> +{
> +  __asm__ ("foo\t%0" : "+"FPR (*x));
> +  return *x;
> +}
> diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-5.c b/gcc/testsuite/gcc.dg/asm-hard-reg-5.c
> new file mode 100644
> index 00000000000..a9e25ce1746
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-5.c
> @@ -0,0 +1,36 @@
> +/* { dg-do compile { target aarch64*-*-* powerpc64*-*-* riscv64-*-* s390*-*-* x86_64-*-* } } */
> +
> +typedef int V __attribute__ ((vector_size (4 * sizeof (int))));
> +
> +#if defined (__aarch64__)
> +# define VR "{v20}"
> +/* { dg-final { scan-assembler-times "foo\tv20" 2 { target { aarch64*-*-* } } } } */
> +#elif defined (__powerpc__) || defined (__POWERPC__)
> +# define VR "{v5}"
> +/* { dg-final { scan-assembler-times "foo\t5" 2 { target { powerpc64*-*-* } } } } */
> +#elif defined (__riscv)
> +# define VR "{v5}"
> +/* { dg-additional-options "-march=rv64imv" { target riscv64-*-* } } */
> +/* { dg-final { scan-assembler-times "foo\tv5" 2 { target { riscv*-*-* } } } } */
> +#elif defined (__s390__)
> +# define VR "{v5}"
> +/* { dg-require-effective-target s390_mvx { target s390*-*-* } } */
> +/* { dg-final { scan-assembler-times "foo\t%v5" 2 { target s390*-*-* } } } */
> +#elif defined (__x86_64__)
> +# define VR "{xmm9}"
> +/* { dg-final { scan-assembler-times "foo\t%xmm9" 2 { target { x86_64-*-* } } } } */
> +#endif
> +
> +V
> +test (V x)
> +{
> +  __asm__ ("foo\t%0" : "+"VR (x));
> +  return x;
> +}
> +
> +V
> +test_from_mem (V *x)
> +{
> +  __asm__ ("foo\t%0" : "+"VR (*x));
> +  return *x;
> +}
> diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-6.c b/gcc/testsuite/gcc.dg/asm-hard-reg-6.c
> new file mode 100644
> index 00000000000..d9b7fae8097
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-6.c
> @@ -0,0 +1,60 @@
> +/* { dg-do compile { target aarch64*-*-* arm*-*-* i?86-*-* powerpc*-*-* riscv*-*-* s390*-*-* x86_64-*-* } } */
> +/* { dg-options "-O2" } */
> +
> +/* Test multiple alternatives.  */
> +
> +#if defined (__aarch64__)
> +# define GPR1 "{x1}"
> +# define GPR2 "{x2}"
> +# define GPR3 "{x3}"
> +/* { dg-final { scan-assembler-times "foo\tx1,x3" 1 { target { aarch64*-*-* } } } } */
> +/* { dg-final { scan-assembler-times "bar\tx2,\\\[x1\\\]" 1 { target { aarch64*-*-* } } } } */
> +#elif defined (__arm__)
> +# define GPR1 "{r1}"
> +# define GPR2 "{r2}"
> +# define GPR3 "{r3}"
> +/* { dg-final { scan-assembler-times "foo\tr1,r3" 1 { target { arm*-*-* } } } } */
> +/* { dg-final { scan-assembler-times "bar\tr2,\\\[r1\\\]" 1 { target { arm*-*-* } } } } */
> +#elif defined (__i386__)
> +# define GPR1 "{eax}"
> +# define GPR2 "{ebx}"
> +# define GPR3 "{ecx}"
> +/* { dg-final { scan-assembler-times "foo\t4\\(%esp\\),%ecx" 1 { target { i?86-*-* } } } } */
> +/* { dg-final { scan-assembler-times "bar\t%ebx,\\(%eax\\)" 1 { target { i?86-*-* } } } } */
> +#elif defined (__powerpc__) || defined (__POWERPC__)
> +# define GPR1 "{r4}"
> +# define GPR2 "{r5}"
> +# define GPR3 "{r6}"
> +/* { dg-final { scan-assembler-times "foo\t4,6" 1 { target { powerpc*-*-* } } } } */
> +/* { dg-final { scan-assembler-times "bar\t5,0\\(4\\)" 1 { target { powerpc*-*-* } } } } */
> +#elif defined (__riscv)
> +# define GPR1 "{t1}"
> +# define GPR2 "{t2}"
> +# define GPR3 "{t3}"
> +/* { dg-final { scan-assembler-times "foo\tt1,t3" 1 { target { riscv*-*-* } } } } */
> +/* { dg-final { scan-assembler-times "bar\tt2,0\\(a1\\)" 1 { target { riscv*-*-* } } } } */
> +#elif defined (__s390__)
> +# define GPR1 "{r0}"
> +# define GPR2 "{r1}"
> +# define GPR3 "{r2}"
> +/* { dg-final { scan-assembler-times "foo\t%r0,%r2" 1 { target { s390*-*-* } } } } */
> +/* { dg-final { scan-assembler-times "bar\t%r1,0\\(%r3\\)" 1 { target { s390*-*-* } } } } */
> +#elif defined (__x86_64__)
> +# define GPR1 "{eax}"
> +# define GPR2 "{ebx}"
> +# define GPR3 "{rcx}"
> +/* { dg-final { scan-assembler-times "foo\t%eax,%rcx" 1 { target { x86_64-*-* } } } } */
> +/* { dg-final { scan-assembler-times "bar\t%ebx,\\(%rsi\\)" 1 { target { x86_64-*-* } } } } */
> +#endif
> +
> +void
> +test_reg_reg (int x, long long *y)
> +{
> +  __asm__ ("foo\t%0,%1" :: GPR1"m,"GPR2 (x), GPR3",m" (y));
> +}
> +
> +void
> +test_reg_mem (int x, long long *y)
> +{
> +  __asm__ ("bar\t%0,%1" :: GPR1"m,"GPR2 (x), GPR3",m" (*y));
> +}
> diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-7.c b/gcc/testsuite/gcc.dg/asm-hard-reg-7.c
> new file mode 100644
> index 00000000000..39c4497ecaf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-7.c
> @@ -0,0 +1,70 @@
> +/* { dg-do run { target aarch64*-*-* powerpc64*-*-* riscv64-*-* s390*-*-* x86_64-*-* } } */
> +/* { dg-options "-O2" } */
> +
> +/* Pass parameter x in the first general argument register to the assembler
> +   instruction.
> +
> +   In function bar we fail to do so because after the function call to foo,
> +   variable argreg1 does not contain the value of x but rather 42 which got
> +   passed to foo.  Thus, the function always returns 42.  In contrast in
> +   function baz, variable x is saved over the function call and materializes in
> +   the asm statement and therefore is returned.  */
> +
> +#if defined (__aarch64__)
> +# define REG register int argreg1 __asm__ ("x0") = x;
> +# define MOVE1 __asm__ ("mov\t%0,%1" : "=r" (out) : "r" (argreg1));
> +# define MOVE2 __asm__ ("mov\t%0,%1" : "=r" (out) : "{x0}" (x));
> +#elif defined (__powerpc__) || defined (__POWERPC__)
> +# define REG register int argreg1 __asm__ ("r3") = x;
> +# define MOVE1 __asm__ ("mr\t%0,%1" : "=r" (out) : "r" (argreg1));
> +# define MOVE2 __asm__ ("mr\t%0,%1" : "=r" (out) : "{r3}" (x));
> +#elif defined (__riscv)
> +# define REG register int argreg1 __asm__ ("a0") = x;
> +# define MOVE1 __asm__ ("mv\t%0,%1" : "=r" (out) : "r" (argreg1));
> +# define MOVE2 __asm__ ("mv\t%0,%1" : "=r" (out) : "{a0}" (x));
> +#elif defined (__s390__)
> +# define REG register int argreg1 __asm__ ("r2") = x;
> +# define MOVE1 __asm__ ("lr\t%0,%1" : "=r" (out) : "r" (argreg1));
> +# define MOVE2 __asm__ ("lr\t%0,%1" : "=r" (out) : "{r2}" (x));
> +#elif defined (__x86_64__)
> +# define REG register int argreg1 __asm__ ("edi") = x;
> +# define MOVE1 __asm__ ("mov\t%1,%0" : "=r" (out) : "r" (argreg1));
> +# define MOVE2 __asm__ ("mov\t%1,%0" : "=r" (out) : "{edi}" (x));
> +#endif
> +
> +__attribute__ ((noipa))
> +int foo (int unused) { }
> +
> +int
> +bar (int x)
> +{
> +  int out;
> +  REG
> +  foo (42);
> +  MOVE1
> +  return out;
> +}
> +
> +int
> +baz (int x)
> +{
> +  int out;
> +  foo (42);
> +  MOVE2
> +  return out;
> +}
> +
> +int
> +main (void)
> +{
> +  if (bar (0) != 42
> +      || bar (1) != 42
> +      || bar (2) != 42
> +      || bar (32) != 42
> +      || baz (0) != 0
> +      || baz (1) != 1
> +      || baz (2) != 2
> +      || baz (32) != 32)
> +    __builtin_abort ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-error-1.c b/gcc/testsuite/gcc.dg/asm-hard-reg-error-1.c
> new file mode 100644
> index 00000000000..6060c0946da
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-error-1.c
> @@ -0,0 +1,67 @@
> +/* { dg-do compile { target aarch64*-*-* arm*-*-* i?86-*-* powerpc*-*-* riscv*-*-* s390*-*-* x86_64-*-* } } */
> +
> +#if defined (__aarch64__)
> +# define GPR1_RAW "x0"
> +# define GPR2 "{x1}"
> +# define GPR3 "{x2}"
> +# define INVALID_GPR_A "{x31}"
> +#elif defined (__arm__)
> +# define GPR1_RAW "r0"
> +# define GPR2 "{r1}"
> +# define GPR3 "{r2}"
> +# define INVALID_GPR_A "{r16}"
> +#elif defined (__i386__)
> +# define GPR1_RAW "%eax"
> +# define GPR2 "{%ebx}"
> +# define GPR3 "{%edx}"
> +# define INVALID_GPR_A "{%eex}"
> +#elif defined (__powerpc__) || defined (__POWERPC__)
> +# define GPR1_RAW "r4"
> +# define GPR2 "{r5}"
> +# define GPR3 "{r6}"
> +# define INVALID_GPR_A "{r33}"
> +#elif defined (__riscv)
> +# define GPR1_RAW "t4"
> +# define GPR2 "{t5}"
> +# define GPR3 "{t6}"
> +# define INVALID_GPR_A "{t7}"
> +#elif defined (__s390__)
> +# define GPR1_RAW "r4"
> +# define GPR2 "{r5}"
> +# define GPR3 "{r6}"
> +# define INVALID_GPR_A "{r17}"
> +#elif defined (__x86_64__)
> +# define GPR1_RAW "rax"
> +# define GPR2 "{rbx}"
> +# define GPR3 "{rcx}"
> +# define INVALID_GPR_A "{rex}"
> +#endif
> +
> +#define GPR1 "{"GPR1_RAW"}"
> +#define INVALID_GPR_B "{"GPR1_RAW
> +
> +struct { int a[128]; } s = {0};
> +
> +void
> +test (void)
> +{
> +  int x, y;
> +  register int gpr1 __asm__ (GPR1_RAW) = 0;
> +
> +  __asm__ ("" :: "{}" (42)); /* { dg-error "invalid input constraint: \{\}" } */
> +  __asm__ ("" :: INVALID_GPR_A (42)); /* { dg-error "invalid input constraint" } */
> +  __asm__ ("" :: INVALID_GPR_B (42)); /* { dg-error "invalid input constraint" } */
> +
> +  __asm__ ("" :: GPR1 (s)); /* { dg-error "data type isn't suitable for register .* of operand 0" } */
> +
> +  __asm__ ("" :: "r" (gpr1), GPR1 (42)); /* { dg-error "multiple inputs to hard register" } */
> +  __asm__ ("" :: GPR1 (42), "r" (gpr1)); /* { dg-error "multiple inputs to hard register" } */
> +  __asm__ ("" :: GPR1 (42), GPR1 (42)); /* { dg-error "multiple inputs to hard register" } */
> +  __asm__ ("" :: GPR1","GPR2 (42), GPR2","GPR3 (42));
> +  __asm__ ("" :: GPR1","GPR2 (42), GPR3","GPR2 (42)); /* { dg-error "multiple inputs to hard register" } */
> +  __asm__ ("" :: GPR1","GPR2 (42), GPR1","GPR3 (42)); /* { dg-error "multiple inputs to hard register" } */
> +  __asm__ ("" :: GPR1 GPR2 (42), GPR2 (42)); /* { dg-error "multiple inputs to hard register" } */
> +  __asm__ ("" : "+"GPR1 (x), "="GPR1 (y)); /* { dg-error "multiple outputs to hard register" } */
> +  __asm__ ("" : "="GPR1 (y) : GPR1 (42), "0" (42)); /* { dg-error "multiple inputs to hard register" } */
> +  __asm__ ("" : "+"GPR1 (x) : GPR1 (42)); /* { dg-error "multiple inputs to hard register" } */
> +}
> diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-error-2.c b/gcc/testsuite/gcc.dg/asm-hard-reg-error-2.c
> new file mode 100644
> index 00000000000..efa843e0800
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-error-2.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile { target { { aarch64*-*-* s390x-*-* } && int128 } } } */
> +/* { dg-options "-O2" } get rid of -ansi since we use __int128 */
> +
> +/* Test register pairs.  */
> +
> +#if defined (__aarch64__)
> +# define GPR "{x4}"
> +# define INVALID_GPR "{x5}"
> +#elif defined (__s390__)
> +# define GPR "{r4}"
> +# define INVALID_GPR "{r5}"
> +#endif
> +
> +void
> +test (void)
> +{
> +  __asm__ ("" :: GPR ((__int128) 42));
> +  __asm__ ("" :: INVALID_GPR ((__int128) 42)); /* { dg-error "register .* for operand 0 isn't suitable for data type" } */
> +}
> diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-error-3.c b/gcc/testsuite/gcc.dg/asm-hard-reg-error-3.c
> new file mode 100644
> index 00000000000..ef8af5a6d52
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-error-3.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile { target arm*-*-* s390-*-* } } */
> +/* { dg-options "-std=c99" } we need long long */
> +/* { dg-additional-options "-march=armv8-a" { target arm*-*-* } } */
> +
> +/* Test register pairs.  */
> +
> +#if defined (__arm__)
> +# define GPR "{r4}"
> +# define INVALID_GPR "{r5}"
> +#elif defined (__s390__)
> +# define GPR "{r4}"
> +# define INVALID_GPR "{r5}"
> +#endif
> +
> +void
> +test (void)
> +{
> +  __asm__ ("" :: GPR (42ll));
> +  __asm__ ("" :: INVALID_GPR (42ll)); /* { dg-error "register .* for operand 0 isn't suitable for data type" } */
> +}
> diff --git a/gcc/testsuite/gcc.dg/pr87600-2.c b/gcc/testsuite/gcc.dg/pr87600-2.c
> index e8a9f194b73..860d3f965ef 100644
> --- a/gcc/testsuite/gcc.dg/pr87600-2.c
> +++ b/gcc/testsuite/gcc.dg/pr87600-2.c
> @@ -11,34 +11,6 @@ test0 (void)
>   {
>     register long var1 asm (REG1);
>     register long var2 asm (REG1);
> -  asm ("blah %0 %1" : "=r" (var1), "=r" (var2)); /* { dg-error "invalid hard register usage between output operands" } */
> +  asm ("blah %0 %1" : "=r" (var1), "=r" (var2)); /* { dg-error "multiple outputs to hard register" } */
>     return var1;
>   }
> -
> -long
> -test1 (void)
> -{
> -  register long var1 asm (REG1);
> -  register long var2 asm (REG2);
> -  asm ("blah %0 %1" : "=r" (var1) : "0" (var2)); /* { dg-error "invalid hard register usage between output operand and matching constraint operand" } */
> -  return var1;
> -}
> -
> -long
> -test2 (void)
> -{
> -  register long var1 asm (REG1);
> -  register long var2 asm (REG1);
> -  asm ("blah %0 %1" : "=&r" (var1) : "r" (var2)); /* { dg-error "invalid hard register usage between earlyclobber operand and input operand" } */
> -  return var1;
> -}
> -
> -long
> -test3 (void)
> -{
> -  register long var1 asm (REG1);
> -  register long var2 asm (REG1);
> -  long var3;
> -  asm ("blah %0 %1" : "=&r" (var1), "=r" (var3) : "1" (var2)); /* { dg-error "invalid hard register usage between earlyclobber operand and input operand" } */
> -  return var1 + var3;
> -}
> diff --git a/gcc/testsuite/gcc.dg/pr87600-3.c b/gcc/testsuite/gcc.dg/pr87600-3.c
> new file mode 100644
> index 00000000000..2673d004130
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr87600-3.c
> @@ -0,0 +1,35 @@
> +/* PR rtl-optimization/87600  */
> +/* { dg-do compile { target aarch64*-*-* arm*-*-* i?86-*-* powerpc*-*-* s390*-*-* x86_64-*-* } } */
> +/* { dg-options "-O2" } */
> +
> +#include "pr87600.h"
> +
> +/* The following are all invalid uses of local register variables.  */
> +
> +long
> +test1 (void)
> +{
> +  register long var1 asm (REG1);
> +  register long var2 asm (REG2);
> +  asm ("blah %0 %1" : "=r" (var1) : "0" (var2)); /* { dg-error "invalid hard register usage between output operand and matching constraint operand" } */
> +  return var1;
> +}
> +
> +long
> +test2 (void)
> +{
> +  register long var1 asm (REG1);
> +  register long var2 asm (REG1);
> +  asm ("blah %0 %1" : "=&r" (var1) : "r" (var2)); /* { dg-error "invalid hard register usage between earlyclobber operand and input operand" } */
> +  return var1;
> +}
> +
> +long
> +test3 (void)
> +{
> +  register long var1 asm (REG1);
> +  register long var2 asm (REG1);
> +  long var3;
> +  asm ("blah %0 %1" : "=&r" (var1), "=r" (var3) : "1" (var2)); /* { dg-error "invalid hard register usage between earlyclobber operand and input operand" } */
> +  return var1 + var3;
> +}
> diff --git a/gcc/testsuite/gcc.target/s390/asm-hard-reg-1.c b/gcc/testsuite/gcc.target/s390/asm-hard-reg-1.c
> new file mode 100644
> index 00000000000..671c0ede6ef
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/asm-hard-reg-1.c
> @@ -0,0 +1,103 @@
> +/* { dg-do compile { target { lp64 } } } */
> +/* { dg-options "-O2 -march=z13 -mzarch" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +/*
> +** test_in_1:
> +**     foo	%r2
> +**     br	%r14
> +*/
> +
> +int
> +test_in_1 (int x)
> +{
> +  asm ("foo	%0" :: "{r2}" (x));
> +  return x;
> +}
> +
> +/*
> +** test_in_2:
> +**     lgr	(%r[0-9]+),%r2
> +**     lr	%r2,%r3
> +**     foo	%r2
> +**     lgr	%r2,\1
> +**     br	%r14
> +*/
> +
> +int
> +test_in_2 (int x, int y)
> +{
> +  asm ("foo	%0" :: "{r2}" (y));
> +  return x;
> +}
> +
> +/*
> +** test_in_3:
> +**     stmg	%r12,%r15,96\(%r15\)
> +**     lay	%r15,-160\(%r15\)
> +**     lgr	(%r[0-9]+),%r2
> +**     ahi	%r2,1
> +**     lgfr	%r2,%r2
> +**     brasl	%r14,foo@PLT
> +**     lr	%r3,%r2
> +**     lr	%r2,\1
> +**     foo	%r3,%r2
> +**     lgr	%r2,\1
> +**     lmg	%r12,%r15,256\(%r15\)
> +**     br	%r14
> +*/
> +
> +extern int foo (int);
> +
> +int
> +test_in_3 (int x)
> +{
> +  asm ("foo	%0,%1\n" :: "{r3}" (foo (x + 1)), "{r2}" (x));
> +  return x;
> +}
> +
> +/*
> +** test_out_1:
> +**     foo	%r3
> +**     lgfr	%r2,%r3
> +**     br	%r14
> +*/
> +
> +int
> +test_out_1 (void)
> +{
> +  int x;
> +  asm ("foo	%0" : "={r3}" (x));
> +  return x;
> +}
> +
> +/*
> +** test_out_2:
> +**     lgr	(%r[0-9]+),%r2
> +**     foo	%r2
> +**     ark	(%r[0-9]+),\1,%r2
> +**     lgfr	%r2,\2
> +**     br	%r14
> +*/
> +
> +int
> +test_out_2 (int x)
> +{
> +  int y;
> +  asm ("foo	%0" : "={r2}" (y));
> +  return x + y;
> +}
> +
> +/*
> +** test_inout_1:
> +**     foo	%r2
> +**     lgfr	%r2,%r2
> +**     br	%r14
> +*/
> +
> +int
> +test_inout_1 (int x)
> +{
> +  asm ("foo	%0" : "+{r2}" (x));
> +  return x;
> +}
> diff --git a/gcc/testsuite/gcc.target/s390/asm-hard-reg-2.c b/gcc/testsuite/gcc.target/s390/asm-hard-reg-2.c
> new file mode 100644
> index 00000000000..a892fe8f0aa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/asm-hard-reg-2.c
> @@ -0,0 +1,43 @@
> +/* { dg-do compile { target { lp64 } } } */
> +/* { dg-options "-O2 -march=z13 -mzarch" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +/* { dg-final { scan-assembler {\.LC0:\n\t\.long\t1078523331\n} } } */
> +
> +
> +/*
> +** test_float_into_gpr:
> +**     lrl	%r4,.LC0
> +**     foo	%r4
> +**     br	%r14
> +*/
> +
> +void
> +test_float_into_gpr (void)
> +{
> +  // This is the counterpart to
> +  //   register float x asm ("r4") = 3.14f;
> +  //   asm ("foo	%0" :: "r" (x));
> +  // where the bit-pattern of 3.14f is loaded into GPR.
> +  asm ("foo	%0" :: "{r4}" (3.14f));
> +}
> +
> +/*
> +** test_float:
> +** (
> +**     ldr	%f4,%f0
> +**     ldr	%f5,%f2
> +** |
> +**     ldr	%f5,%f2
> +**     ldr	%f4,%f0
> +** )
> +**     aebr	%f5,%f4
> +**     ldr	%f0,%f5
> +**     br	%r14
> +*/
> +
> +float
> +test_float (float x, float y)
> +{
> +  asm ("aebr	%0,%1" : "+{f5}" (y) : "{f4}" (x));
> +  return y;
> +}
> diff --git a/gcc/testsuite/gcc.target/s390/asm-hard-reg-3.c b/gcc/testsuite/gcc.target/s390/asm-hard-reg-3.c
> new file mode 100644
> index 00000000000..5df37b5b717
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/asm-hard-reg-3.c
> @@ -0,0 +1,42 @@
> +/* { dg-do compile { target lp64 } } */
> +/* { dg-options "-O2 -march=z13 -mzarch" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +/* { dg-final { scan-assembler {\.LC0:\n\t\.long\t1074339512\n\t\.long\t1374389535\n} } } */
> +
> +/*
> +** test_double_into_gpr:
> +**     lgrl	%r4,.LC0
> +**     foo	%r4
> +**     br	%r14
> +*/
> +
> +void
> +test_double_into_gpr (void)
> +{
> +  // This is the counterpart to
> +  //   register double x asm ("r4") = 3.14;
> +  //   asm ("foo	%0" :: "r" (x));
> +  // where the bit-pattern of 3.14 is loaded into GPR.
> +  asm ("foo	%0" :: "{r4}" (3.14));
> +}
> +
> +/*
> +** test_double:
> +** (
> +**     ldr	%f4,%f0
> +**     ldr	%f5,%f2
> +** |
> +**     ldr	%f5,%f2
> +**     ldr	%f4,%f0
> +** )
> +**     adbr	%f5,%f4
> +**     ldr	%f0,%f5
> +**     br	%r14
> +*/
> +
> +double
> +test_double (double x, double y)
> +{
> +  asm ("adbr	%0,%1" : "+{f5}" (y) : "{f4}" (x));
> +  return y;
> +}
> diff --git a/gcc/testsuite/lib/scanasm.exp b/gcc/testsuite/lib/scanasm.exp
> index 42c719c512c..3c28ccec955 100644
> --- a/gcc/testsuite/lib/scanasm.exp
> +++ b/gcc/testsuite/lib/scanasm.exp
> @@ -896,6 +896,10 @@ proc configure_check-function-bodies { config } {
>   	set up_config(fluff) {^\s*(?://)}
>       } elseif { [istarget *-*-darwin*] } {
>   	set up_config(fluff) {^\s*(?:\.|//|@)|^L[0-9ABCESV]}
> +    } elseif { [istarget s390*-*-*] } {
> +	# Additionally to the defaults skip lines beginning with a # resulting
> +	# from inline asm.
> +	set up_config(fluff) {^\s*(?:\.|//|@|$|#)}
>       } else {
>   	# Skip lines beginning with labels ('.L[...]:') or other directives
>   	# ('.align', '.cfi_startproc', '.quad [...]', '.text', etc.), '//' or
Stefan Schulze Frielinghaus Aug. 5, 2024, 1:59 p.m. UTC | #2
On Mon, Aug 05, 2024 at 02:19:50PM +0200, Georg-Johann Lay wrote:
> Am 05.08.24 um 12:28 schrieb Stefan Schulze Frielinghaus:
> > This is a follow-up of
> > https://gcc.gnu.org/pipermail/gcc-patches/2024-June/654013.html
> > 
> > What has changed?
> > 
> > - Rebased and fixed an issue in constrain_operands which manifested
> > after late-combine.
> > 
> > - Introduced new test cases for Arm, Intel, POWER, RISCV, S/390 for 32-
> > and 64-bit where appropriate (including register pairs etc.).  Test
> > gcc.dg/asm-hard-reg-7.c is a bit controversial since I'm testing for an
> > anti feature here, i.e., I'm testing for register asm in conjunction
> > with calls.  I'm fine with removing it in the end but I wanted to keep
> > it in for demonstration purposes at least during discussion of this
> > patch.
> > 
> > - Split test pr87600-2.c into pr87600-2.c and pr87600-3.c since test0
> > errors out early, now.  Otherwise, the remaining errors would not be
> > reported.  Beside that the error message has slightly changed.
> > 
> > - Modified genoutput.cc in order to allow hard register constraints in
> > machine descriptions.  For example, on s390 the instruction mvcrl makes
> 
> As I already said, such a feature would be great.  Some questions:
> 
> Which pass is satisfying that constraint? AFAIK for local reg vars,
> it is asmcons, but for register constraints in md it it the register
> allocator.

This is done by reload during process_alt_operands().  Basically
every other change in gimplify.cc, stmt.cc etc. is only there in order
to do some error checking and have some proper diagnostics.

> The avr backend has many insns that use explicit hard regs in order to
> model some libcalls (ones with footprints smaller than ABI, or that
> deviate from the ABI).  A proper way would be to add a register
> constraint for each possible hard reg, e.g. R20_1 for QImode in R20,
> R20_2 for HImode in R20, etc.  This would require a dozen or more
> new register classes, and the problem with that is that register
> allocation produces less efficient code even for cases that do
> not use these new constraints.  So I gave up that approach.
> 
> How does your feature work? Does it imply that for each hreg
> constraint there must be an according register class?

No.  During reload I limit the set of registers by installing a filter
and let RA solve it.

> 
> Obviously local reg vars don't require respective reg classes,
> so I thought about representing such insns as asm_input or
> whatever, but that's pure hack and would never pass a review...
> 
> > use of the implicit register r0 which we currently deal with as follows:
> > 
> > (define_insn "*mvcrl"
> >    [(set (match_operand:BLK 0 "memory_operand" "=Q")
> >         (unspec:BLK [(match_operand:BLK 1 "memory_operand" "Q")
> >                      (reg:SI GPR0_REGNUM)]
> >                     UNSPEC_MVCRL))]
> >    "TARGET_Z15"
> >    "mvcrl\t%0,%1"
> >    [(set_attr "op_type" "SSE")])
> > 
> > (define_expand "mvcrl"
> >    [(set (reg:SI GPR0_REGNUM) (match_operand:SI 2 "general_operand"))
> >     (set (match_operand:BLK 0 "memory_operand" "=Q")
> >         (unspec:BLK [(match_operand:BLK 1 "memory_operand" "Q")
> >                      (reg:SI GPR0_REGNUM)]
> >                     UNSPEC_MVCRL))]
> >    "TARGET_Z15"
> >    "")
> > 
> > In the expander we ensure that GPR0 is setup correctly.  With this patch
> > we could simply write
> > 
> > (define_insn "mvcrl"
> >    [(set (match_operand:BLK 0 "memory_operand" "=Q")
> >          (unspec:BLK [(match_operand:BLK 1 "memory_operand" "Q")
> >                       (match_operand:SI 2 "general_operand" "{r0}")]
> >                      UNSPEC_MVCRL))]
> >    "TARGET_Z15"
> >    "mvcrl\t%0,%1"
> >    [(set_attr "op_type" "SSE")])
> > 
> > What I dislike is that I didn't find a way to verify hard register names
> 
> Are plain register numbers also supported? Like "{0}" ?
> (Provided regno(r0) == 0).

Basically whatever passes decode_reg_name() is allowed.

> 
> > during genoutput, i.e., ensuring that the name is valid after all.  This
> > is due to the fact how reg_names is defined which cannot be accessed by
> > genoutput.  The same holds true for REGISTER_NAMES et al. which may
> > reference some target specific variable (see e.g. POWER).  Thus, in case
> > of an invalid register name in a machine description file we do not
> > end-up with a genoutput-time error but instead fail at run-time in
> > process_alt_operands():
> > 
> >     case '{':
> >         {
> >           int regno = parse_constraint_regname (p);
> >           gcc_assert (regno >= 0);
> >           cl = REGNO_REG_CLASS (regno);
> >           CLEAR_HARD_REG_SET (hregset);
> >           SET_HARD_REG_BIT (hregset, regno);
> 
> Is this correct when hard_regno_nregs(regno) > 1,
> i.e. when the register occupies more than one hard register?

This is the actual place where the hard register constraint manifests
(beside all the error handling).  By restricting the possible set of
registers via a filter.  If a value goes into a register pair, then I
still only want the "base" register to be available.  At least this was
my understanding of how reload handles register pairs.

> 
> >           cl_filter = &hregset;
> >           goto reg;
> >         }
> > 
> > This is rather unfortunate but I couldn't find a way how to validate
> > register names during genoutput.  If no one else has an idea I will
> > replace gcc_assert with a more expressive error message.
> 
> [ADDITIONAL_]REGISTER_NAMES isn't available?  Though using that might
> bypass the effect of target hooks like TARGET_CONDITIONAL_REGISTER_USAGE.

REGISTER_NAMES references sometimes target variables (see rs6000 e.g.)
which aren't linked into genoutput and are therefore unavailable.

> 
> But there are also cases with an asm operand print modifier; you cannot
> check that, it's checked by TARGET_PRINT_OPERAND etc. which get a
> hard register and not a string for a register name.
> 
> Maybe genoutput could add additional information to insn-output.cc or
> whatever, and the compiler proper checks that and emits diagnostics
> as needed?

Though, this would be a run-time check, right?  I was actually hoping
for a "compile-time" check, i.e., something which errors while compiling
GCC and not when GCC is executed.  The latter is already implemented.

> 
> > What's next?
> > 
> > I was thinking about replacing register asm with the new hard register
> > constraint.  This would solve problems like demonstrated by
> > gcc.dg/asm-hard-reg-7.c.  For example, we could replace the constraint
> > 
> >     register int x asm ("r5") = 42;
> >     asm ("foo   %0" :: "r" (x));
> > 
> > with
> > 
> >     register int x asm ("r5") = 42;
> >     asm ("foo   %0" :: "{r5}" (x));
> > 
> > and ignore any further effect of the register asm.  However, I haven't
> > really thought this through and there are certainly cases which are
> > currently allowed which cannot trivially be converted as e.g. here:
> > 
> >     register int x asm ("r5") = 42;
> >     asm ("foo   %0" :: "rd" (x));
> > 
> > Multiple alternatives are kind of strange in combination with register
> > asm.  For example, on s390 the two constraints "r" and "d" restrict both
> 
> Though in that example there is just on alternative "rd" which is the
> union of "r" and "d".  Inline asm doesn't support more than one
> constraint alternative.  Multiple alternatives would require multiple
> asm templates, like in
> 
> asm ("code for r", "code for d" :: "r,d" (x));

Right, I actually didn't mean to speak about multiple alternatives but
rather unions here.  I kinda struggle with what is allowed and what not
as I cannot find a rigorous definition.  At least the given example

     register int x asm ("r5") = 42;
     asm ("foo   %0" :: "rd" (x));

compiles without any warning which is why I was assuming this is valid
code.  Of course, not everything which compiles is valid ;-) That being
said, I'm currently trying to figure out where I should error out and
where not.

> 
> And a final question: Is it possible to specify more than one hard
> reg constraint like in "{r0}{r1}" as the union of r0 and r1?

Yes, you could write

int
times_two (int x)
{
  asm ("agr     %0,%0" : "+{r1}{r2}{r3}" (x));
  return x;
}

and reload would choose r2 which is most optimal on s390 since parameter
x is passed via r2 and also the return value will be in r2, too.

However, I'm still wondering whether we really want to support unions or
not for hard register constraints.

Cheers,
Stefan

> 
> Johann
> 
> > to GPRs.  That is not a show stopper but certainly something which needs
> > some consideration.  If you can think of some wild combinations/edge
> > cases I would be happy to here about.  Anyhow, this is something for a
> > further patch.
> > 
> > Last but not least, if there is enough consent to accept this feature, I
> > will start writing up some documentation.
> > 
> > Bootstrapped and regtested on Arm, Intel, POWER, RISCV, S/390.  I have
> > only verified the 32-bit tests via cross compilers and didn't execute
> > them in contrast to 64-bit targets.
> > ---
> >   gcc/cfgexpand.cc                              |  42 -----
> >   gcc/genoutput.cc                              |  12 ++
> >   gcc/genpreds.cc                               |   4 +-
> >   gcc/gimplify.cc                               | 134 ++++++++++++++-
> >   gcc/lra-constraints.cc                        |  13 ++
> >   gcc/recog.cc                                  |  11 +-
> >   gcc/stmt.cc                                   | 155 +++++++++++++++++-
> >   gcc/stmt.h                                    |  12 +-
> >   gcc/testsuite/gcc.dg/asm-hard-reg-1.c         |  85 ++++++++++
> >   gcc/testsuite/gcc.dg/asm-hard-reg-2.c         |  33 ++++
> >   gcc/testsuite/gcc.dg/asm-hard-reg-3.c         |  25 +++
> >   gcc/testsuite/gcc.dg/asm-hard-reg-4.c         |  50 ++++++
> >   gcc/testsuite/gcc.dg/asm-hard-reg-5.c         |  36 ++++
> >   gcc/testsuite/gcc.dg/asm-hard-reg-6.c         |  60 +++++++
> >   gcc/testsuite/gcc.dg/asm-hard-reg-7.c         |  70 ++++++++
> >   gcc/testsuite/gcc.dg/asm-hard-reg-error-1.c   |  67 ++++++++
> >   gcc/testsuite/gcc.dg/asm-hard-reg-error-2.c   |  19 +++
> >   gcc/testsuite/gcc.dg/asm-hard-reg-error-3.c   |  20 +++
> >   gcc/testsuite/gcc.dg/pr87600-2.c              |  30 +---
> >   gcc/testsuite/gcc.dg/pr87600-3.c              |  35 ++++
> >   .../gcc.target/s390/asm-hard-reg-1.c          | 103 ++++++++++++
> >   .../gcc.target/s390/asm-hard-reg-2.c          |  43 +++++
> >   .../gcc.target/s390/asm-hard-reg-3.c          |  42 +++++
> >   gcc/testsuite/lib/scanasm.exp                 |   4 +
> >   24 files changed, 1020 insertions(+), 85 deletions(-)
> >   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-1.c
> >   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-2.c
> >   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-3.c
> >   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-4.c
> >   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-5.c
> >   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-6.c
> >   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-7.c
> >   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-error-1.c
> >   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-error-2.c
> >   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-error-3.c
> >   create mode 100644 gcc/testsuite/gcc.dg/pr87600-3.c
> >   create mode 100644 gcc/testsuite/gcc.target/s390/asm-hard-reg-1.c
> >   create mode 100644 gcc/testsuite/gcc.target/s390/asm-hard-reg-2.c
> >   create mode 100644 gcc/testsuite/gcc.target/s390/asm-hard-reg-3.c
> > 
> > diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
> > index dad3ae1b7c6..8bdc530626c 100644
> > --- a/gcc/cfgexpand.cc
> > +++ b/gcc/cfgexpand.cc
> > @@ -2966,44 +2966,6 @@ expand_asm_loc (tree string, int vol, location_t locus)
> >     emit_insn (body);
> >   }
> > -/* Return the number of times character C occurs in string S.  */
> > -static int
> > -n_occurrences (int c, const char *s)
> > -{
> > -  int n = 0;
> > -  while (*s)
> > -    n += (*s++ == c);
> > -  return n;
> > -}
> > -
> > -/* A subroutine of expand_asm_operands.  Check that all operands have
> > -   the same number of alternatives.  Return true if so.  */
> > -
> > -static bool
> > -check_operand_nalternatives (const vec<const char *> &constraints)
> > -{
> > -  unsigned len = constraints.length();
> > -  if (len > 0)
> > -    {
> > -      int nalternatives = n_occurrences (',', constraints[0]);
> > -
> > -      if (nalternatives + 1 > MAX_RECOG_ALTERNATIVES)
> > -	{
> > -	  error ("too many alternatives in %<asm%>");
> > -	  return false;
> > -	}
> > -
> > -      for (unsigned i = 1; i < len; ++i)
> > -	if (n_occurrences (',', constraints[i]) != nalternatives)
> > -	  {
> > -	    error ("operand constraints for %<asm%> differ "
> > -		   "in number of alternatives");
> > -	    return false;
> > -	  }
> > -    }
> > -  return true;
> > -}
> > -
> >   /* Check for overlap between registers marked in CLOBBERED_REGS and
> >      anything inappropriate in T.  Emit error and return the register
> >      variable definition for error, NULL_TREE for ok.  */
> > @@ -3169,10 +3131,6 @@ expand_asm_stmt (gasm *stmt)
> >   	= TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
> >       }
> > -  /* ??? Diagnose during gimplification?  */
> > -  if (! check_operand_nalternatives (constraints))
> > -    return;
> > -
> >     /* Count the number of meaningful clobbered registers, ignoring what
> >        we would ignore later.  */
> >     auto_vec<rtx> clobber_rvec;
> > diff --git a/gcc/genoutput.cc b/gcc/genoutput.cc
> > index efd81766bb5..c1efb043579 100644
> > --- a/gcc/genoutput.cc
> > +++ b/gcc/genoutput.cc
> > @@ -1219,6 +1219,18 @@ mdep_constraint_len (const char *s, file_location loc, int opno)
> >         if (!strncmp (s, p->name, p->namelen))
> >   	return p->namelen;
> > +  if (*s == '{')
> > +    {
> > +      const char *end = s + 1;
> > +      while (*end != '}' && *end != '"' && *end != '\0')
> > +	++end;
> > +      /* Similarly as in parse_constraint_regname(), consider any hard register
> > +	 name longer than a few characters as an error.  */
> > +      ptrdiff_t len = end - s;
> > +      if (*end == '}' && len > 1 && len < 31)
> > +	return len + 1;
> > +    }
> > +
> >     error_at (loc, "error: undefined machine-specific constraint "
> >   	    "at this point: \"%s\"", s);
> >     message_at (loc, "note:  in operand %d", opno);
> > diff --git a/gcc/genpreds.cc b/gcc/genpreds.cc
> > index 55d149e8a40..0777cb7a4db 100644
> > --- a/gcc/genpreds.cc
> > +++ b/gcc/genpreds.cc
> > @@ -1148,7 +1148,7 @@ write_insn_constraint_len (void)
> >     unsigned int i;
> >     puts ("static inline size_t\n"
> > -	"insn_constraint_len (char fc, const char *str ATTRIBUTE_UNUSED)\n"
> > +	"insn_constraint_len (char fc, const char *str)\n"
> >   	"{\n"
> >   	"  switch (fc)\n"
> >   	"    {");
> > @@ -1181,6 +1181,8 @@ write_insn_constraint_len (void)
> >     puts ("    default: break;\n"
> >   	"    }\n"
> > +	"  if (str[0] == '{')\n"
> > +	"      return ((const char *) rawmemchr (str + 1, '}') - str) + 1;\n"
> >   	"  return 1;\n"
> >   	"}\n");
> >   }
> > diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
> > index ab323d764e8..abb705d49ac 100644
> > --- a/gcc/gimplify.cc
> > +++ b/gcc/gimplify.cc
> > @@ -70,6 +70,9 @@ along with GCC; see the file COPYING3.  If not see
> >   #include "omp-offload.h"
> >   #include "context.h"
> >   #include "tree-nested.h"
> > +#include "insn-config.h"
> > +#include "recog.h"
> > +#include "output.h"
> >   /* Identifier for a basic condition, mapping it to other basic conditions of
> >      its Boolean expression.  Basic conditions given the same uid (in the same
> > @@ -6993,6 +6996,42 @@ gimplify_addr_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
> >     return ret;
> >   }
> > +/* Return the number of times character C occurs in string S.  */
> > +
> > +static int
> > +num_occurrences (int c, const char *s)
> > +{
> > +  int n = 0;
> > +  while (*s)
> > +    n += (*s++ == c);
> > +  return n;
> > +}
> > +
> > +/* A subroutine of gimplify_asm_expr.  Check that all operands have
> > +   the same number of alternatives.  Return -1 if this is violated.  Otherwise
> > +   return the number of alternatives.  */
> > +
> > +static int
> > +num_alternatives (const_tree link)
> > +{
> > +  if (link == nullptr)
> > +    return 0;
> > +
> > +  const char *constraint = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (link)));
> > +  int num = num_occurrences (',', constraint);
> > +
> > +  if (num + 1 > MAX_RECOG_ALTERNATIVES)
> > +    return -1;
> > +
> > +  for (link = TREE_CHAIN (link); link; link = TREE_CHAIN (link))
> > +    {
> > +      constraint = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (link)));
> > +      if (num_occurrences (',', constraint) != num)
> > +	return -1;
> > +    }
> > +  return num + 1;
> > +}
> > +
> >   /* Gimplify the operands of an ASM_EXPR.  Input operands should be a gimple
> >      value; output operands should be a gimple lvalue.  */
> > @@ -7023,6 +7062,36 @@ gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
> >     clobbers = NULL;
> >     labels = NULL;
> > +  int num_alternatives_out = num_alternatives (ASM_OUTPUTS (expr));
> > +  int num_alternatives_in = num_alternatives (ASM_INPUTS (expr));
> > +  if (num_alternatives_out == -1 || num_alternatives_in == -1
> > +      || (num_alternatives_out > 0 && num_alternatives_in > 0
> > +	  && num_alternatives_out != num_alternatives_in))
> > +    {
> > +      error ("operand constraints for %<asm%> differ "
> > +	     "in number of alternatives");
> > +      return GS_ERROR;
> > +    }
> > +  int num_alternatives = MAX (num_alternatives_out, num_alternatives_in);
> > +
> > +  /* Regarding hard register constraints ensure that each hard register is used
> > +     at most once over all inputs/outputs and each alternative.  Keep track in
> > +     hardregs[0] which hard register is used via an asm register over all
> > +     inputs/outputs.  hardregs[i] for i >= 2 describes which hard registers are
> > +     used for alternative i-2 over all inputs/outputs.  hardregs[1] is a
> > +     reduction of all alternatives, i.e., hardregs[1] |= hardregs[i] for i >= 2
> > +     and describes whether a hard register is used in any alternative.  This is
> > +     just a shortcut instead of recomputing the union over all alternatives;
> > +     possibly multiple times.  */
> > +  auto_vec<HARD_REG_SET> hardregs (num_alternatives + 2);
> > +  std::pair <vec <HARD_REG_SET> *, machine_mode> hardreg_props = {&hardregs, VOIDmode};
> > +  for (int i = 0; i < num_alternatives + 2; ++i)
> > +    {
> > +      HARD_REG_SET hregset;
> > +      CLEAR_HARD_REG_SET (hregset);
> > +      hardregs.quick_push (hregset);
> > +    }
> > +
> >     ret = GS_ALL_DONE;
> >     link_next = NULL_TREE;
> >     for (i = 0, link = ASM_OUTPUTS (expr); link; ++i, link = link_next)
> > @@ -7039,8 +7108,13 @@ gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
> >         if (constraint_len == 0)
> >           continue;
> > -      ok = parse_output_constraint (&constraint, i, 0, 0,
> > -				    &allows_mem, &allows_reg, &is_inout);
> > +      tree outtype = TREE_TYPE (TREE_VALUE (link));
> > +      auto hardreg_props_p
> > +	= outtype != error_mark_node
> > +	? (hardreg_props.second = TYPE_MODE (outtype), &hardreg_props)
> > +	: nullptr;
> > +      ok = parse_output_constraint (&constraint, i, 0, 0, &allows_mem,
> > +				    &allows_reg, &is_inout, hardreg_props_p);
> >         if (!ok)
> >   	{
> >   	  ret = GS_ERROR;
> > @@ -7049,7 +7123,6 @@ gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
> >         /* If we can't make copies, we can only accept memory.
> >   	 Similarly for VLAs.  */
> > -      tree outtype = TREE_TYPE (TREE_VALUE (link));
> >         if (outtype != error_mark_node
> >   	  && (TREE_ADDRESSABLE (outtype)
> >   	      || !COMPLETE_TYPE_P (outtype)
> > @@ -7111,6 +7184,24 @@ gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
> >   	      TREE_VALUE (link) = tem;
> >   	      tret = GS_OK;
> >   	    }
> > +	  if (VAR_P (op) && DECL_HARD_REGISTER (op))
> > +	    {
> > +	      tree id = DECL_ASSEMBLER_NAME (op);
> > +	      const char *asmspec = IDENTIFIER_POINTER (id) + 1;
> > +	      int hardreg = decode_reg_name (asmspec);
> > +	      if (hardreg >= 0)
> > +		{
> > +		  if (TEST_HARD_REG_BIT (hardregs[0], hardreg)
> > +		      || TEST_HARD_REG_BIT (hardregs[1], hardreg))
> > +		    {
> > +		      error ("multiple outputs to hard register: %s",
> > +			     reg_names[hardreg]);
> > +		      return GS_ERROR;
> > +		    }
> > +		  else
> > +		    SET_HARD_REG_BIT (hardregs[0], hardreg);
> > +		}
> > +	    }
> >   	}
> >         vec_safe_push (outputs, link);
> > @@ -7210,16 +7301,29 @@ gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
> >   	}
> >       }
> > +  for (unsigned int i = 0; i < hardregs.length (); ++i)
> > +    CLEAR_HARD_REG_SET (hardregs[i]);
> > +
> >     link_next = NULL_TREE;
> >     for (link = ASM_INPUTS (expr); link; ++i, link = link_next)
> >       {
> >         link_next = TREE_CHAIN (link);
> >         constraint = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (link)));
> > -      parse_input_constraint (&constraint, 0, 0, noutputs, 0,
> > -			      oconstraints, &allows_mem, &allows_reg);
> > +      tree intype = TREE_TYPE (TREE_VALUE (link));
> > +      auto hardreg_props_p
> > +	= intype != error_mark_node
> > +	? (hardreg_props.second = TYPE_MODE (intype), &hardreg_props)
> > +	: nullptr;
> > +      bool ok = parse_input_constraint (&constraint, 0, 0, noutputs, 0,
> > +					oconstraints, &allows_mem, &allows_reg,
> > +					hardreg_props_p);
> > +      if (!ok)
> > +	{
> > +	  ret = GS_ERROR;
> > +	  is_inout = false;
> > +	}
> >         /* If we can't make copies, we can only accept memory.  */
> > -      tree intype = TREE_TYPE (TREE_VALUE (link));
> >         if (intype != error_mark_node
> >   	  && (TREE_ADDRESSABLE (intype)
> >   	      || !COMPLETE_TYPE_P (intype)
> > @@ -7290,6 +7394,24 @@ gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
> >   				is_gimple_asm_val, fb_rvalue);
> >   	  if (tret == GS_ERROR)
> >   	    ret = tret;
> > +	  tree inputv = TREE_VALUE (link);
> > +	  if (VAR_P (inputv) && DECL_HARD_REGISTER (inputv))
> > +	    {
> > +	      tree id = DECL_ASSEMBLER_NAME (inputv);
> > +	      const char *asmspec = IDENTIFIER_POINTER (id) + 1;
> > +	      int hardreg = decode_reg_name (asmspec);
> > +	      if (hardreg >= 0)
> > +		{
> > +		  if (TEST_HARD_REG_BIT (hardregs[1], hardreg))
> > +		    {
> > +		      error ("multiple inputs to hard register: %s",
> > +			     reg_names[hardreg]);
> > +		      return GS_ERROR;
> > +		    }
> > +		  else
> > +		    SET_HARD_REG_BIT (hardregs[0], hardreg);
> > +		}
> > +	    }
> >   	}
> >         TREE_CHAIN (link) = NULL_TREE;
> > diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
> > index 92b343fa99a..632c75ef01c 100644
> > --- a/gcc/lra-constraints.cc
> > +++ b/gcc/lra-constraints.cc
> > @@ -114,6 +114,7 @@
> >   #include "target.h"
> >   #include "rtl.h"
> >   #include "tree.h"
> > +#include "stmt.h"
> >   #include "predict.h"
> >   #include "df.h"
> >   #include "memmodel.h"
> > @@ -2165,6 +2166,7 @@ process_alt_operands (int only_alternative)
> >     bool costly_p;
> >     enum reg_class cl;
> >     const HARD_REG_SET *cl_filter;
> > +  HARD_REG_SET hregset;
> >     /* Calculate some data common for all alternatives to speed up the
> >        function.	*/
> > @@ -2536,6 +2538,17 @@ process_alt_operands (int only_alternative)
> >   		  cl_filter = nullptr;
> >   		  goto reg;
> > +		case '{':
> > +		    {
> > +		      int regno = decode_hreg_constraint (p);
> > +		      gcc_assert (regno >= 0);
> > +		      cl = REGNO_REG_CLASS (regno);
> > +		      CLEAR_HARD_REG_SET (hregset);
> > +		      SET_HARD_REG_BIT (hregset, regno);
> > +		      cl_filter = &hregset;
> > +		      goto reg;
> > +		    }
> > +
> >   		default:
> >   		  cn = lookup_constraint (p);
> >   		  switch (get_constraint_type (cn))
> > diff --git a/gcc/recog.cc b/gcc/recog.cc
> > index 54b317126c2..b604029d5f1 100644
> > --- a/gcc/recog.cc
> > +++ b/gcc/recog.cc
> > @@ -25,6 +25,7 @@ along with GCC; see the file COPYING3.  If not see
> >   #include "target.h"
> >   #include "rtl.h"
> >   #include "tree.h"
> > +#include "stmt.h"
> >   #include "cfghooks.h"
> >   #include "df.h"
> >   #include "memmodel.h"
> > @@ -2333,7 +2334,8 @@ asm_operand_ok (rtx op, const char *constraint, const char **constraints)
> >   	    {
> >   	    case CT_REGISTER:
> >   	      if (!result
> > -		  && reg_class_for_constraint (cn) != NO_REGS
> > +		  && (reg_class_for_constraint (cn) != NO_REGS
> > +		      || constraint[0] == '{')
> >   		  && GET_MODE (op) != BLKmode
> >   		  && register_operand (op, VOIDmode))
> >   		result = 1;
> > @@ -3267,6 +3269,13 @@ constrain_operands (int strict, alternative_mask alternatives)
> >   		  win = true;
> >   		break;
> > +	      case '{':
> > +		if ((REG_P (op) && HARD_REGISTER_P (op)
> > +		     && (int) REGNO (op) == decode_hreg_constraint (p))
> > +		    || !reload_completed)
> > +		  win = true;
> > +		break;
> > +
> >   	      default:
> >   		{
> >   		  enum constraint_num cn = lookup_constraint (p);
> > diff --git a/gcc/stmt.cc b/gcc/stmt.cc
> > index ae1527f0a19..7b073f8ce85 100644
> > --- a/gcc/stmt.cc
> > +++ b/gcc/stmt.cc
> > @@ -39,6 +39,7 @@ along with GCC; see the file COPYING3.  If not see
> >   #include "emit-rtl.h"
> >   #include "pretty-print.h"
> >   #include "diagnostic-core.h"
> > +#include "output.h"
> >   #include "fold-const.h"
> >   #include "varasm.h"
> > @@ -174,6 +175,77 @@ expand_label (tree label)
> >       maybe_set_first_label_num (label_r);
> >   }
> >   
> > +/* Parse a hard register constraint and return its number or -1 in case of an
> > +   error.  BEGIN should point to a string of the form "{regname}".  For the
> > +   sake of simplicity assume that a register name is not longer than 31
> > +   characters, if not error out.  */
> > +
> > +int
> > +decode_hreg_constraint (const char *begin)
> > +{
> > +  if (*begin != '{')
> > +    return -1;
> > +  ++begin;
> > +  const char *end = begin;
> > +  while (*end != '}' && *end != '\0')
> > +    ++end;
> > +  if (*end != '}' || end == begin)
> > +    return -1;
> > +  ptrdiff_t len = end - begin;
> > +  if (len >= 31)
> > +    return -1;
> > +  char regname[32];
> > +  memcpy (regname, begin, len);
> > +  regname[len] = '\0';
> > +  int regno = decode_reg_name (regname);
> > +  return regno;
> > +}
> > +
> > +static bool
> > +eliminable_regno_p (int regnum)
> > +{
> > +  static const struct
> > +  {
> > +    const int from;
> > +    const int to;
> > +  } eliminables[] = ELIMINABLE_REGS;
> > +  for (size_t i = 0; i < ARRAY_SIZE (eliminables); i++)
> > +    if (regnum == eliminables[i].from)
> > +      return true;
> > +  return false;
> > +}
> > +
> > +/* Perform a similar check as done in make_decl_rtl().  */
> > +
> > +static bool
> > +hardreg_ok_p (int reg_number, machine_mode mode, int operand_num)
> > +{
> > +  if (mode == BLKmode)
> > +    error ("data type isn%'t suitable for register %s of operand %i",
> > +	   reg_names[reg_number], operand_num);
> > +  else if (!in_hard_reg_set_p (accessible_reg_set, mode, reg_number))
> > +    error ("register %s for operand %i cannot be accessed"
> > +	   " by the current target", reg_names[reg_number], operand_num);
> > +  else if (!in_hard_reg_set_p (operand_reg_set, mode, reg_number))
> > +    error ("register %s for operand %i is not general enough"
> > +	   " to be used as a register variable", reg_names[reg_number], operand_num);
> > +  else if (!targetm.hard_regno_mode_ok (reg_number, mode))
> > +    error ("register %s for operand %i isn%'t suitable for data type",
> > +	   reg_names[reg_number], operand_num);
> > +  else if (reg_number != HARD_FRAME_POINTER_REGNUM
> > +	   && (reg_number == FRAME_POINTER_REGNUM
> > +#ifdef RETURN_ADDRESS_POINTER_REGNUM
> > +	       || reg_number == RETURN_ADDRESS_POINTER_REGNUM
> > +#endif
> > +	       || reg_number == ARG_POINTER_REGNUM)
> > +	   && eliminable_regno_p (reg_number))
> > +    error ("register for operand %i is an internal GCC "
> > +	   "implementation detail", operand_num);
> > +  else
> > +    return true;
> > +  return false;
> > +}
> > +
> >   /* Parse the output constraint pointed to by *CONSTRAINT_P.  It is the
> >      OPERAND_NUMth output operand, indexed from zero.  There are NINPUTS
> >      inputs and NOUTPUTS outputs to this extended-asm.  Upon return,
> > @@ -190,7 +262,9 @@ expand_label (tree label)
> >   bool
> >   parse_output_constraint (const char **constraint_p, int operand_num,
> >   			 int ninputs, int noutputs, bool *allows_mem,
> > -			 bool *allows_reg, bool *is_inout)
> > +			 bool *allows_reg, bool *is_inout,
> > +			 const std::pair <vec <HARD_REG_SET> *, machine_mode>
> > +			 *hardreg_props)
> >   {
> >     const char *constraint = *constraint_p;
> >     const char *p;
> > @@ -244,6 +318,8 @@ parse_output_constraint (const char **constraint_p, int operand_num,
> >         constraint = *constraint_p;
> >       }
> > +  unsigned int alternative = 2;
> > +
> >     /* Loop through the constraint string.  */
> >     for (p = constraint + 1; *p; )
> >       {
> > @@ -268,7 +344,11 @@ parse_output_constraint (const char **constraint_p, int operand_num,
> >   	case 'E':  case 'F':  case 'G':  case 'H':
> >   	case 's':  case 'i':  case 'n':
> >   	case 'I':  case 'J':  case 'K':  case 'L':  case 'M':
> > -	case 'N':  case 'O':  case 'P':  case ',':
> > +	case 'N':  case 'O':  case 'P':
> > +	  break;
> > +
> > +	case ',':
> > +	  ++alternative;
> >   	  break;
> >   	case '0':  case '1':  case '2':  case '3':  case '4':
> > @@ -289,6 +369,36 @@ parse_output_constraint (const char **constraint_p, int operand_num,
> >   	  *allows_mem = true;
> >   	  break;
> > +	case '{':
> > +	  {
> > +	    int regno = decode_hreg_constraint (p);
> > +	    if (regno < 0)
> > +	      {
> > +		error ("invalid output constraint: %s", p);
> > +		return false;
> > +	      }
> > +	    if (hardreg_props)
> > +	      {
> > +		vec<HARD_REG_SET> *hardregs = hardreg_props->first;
> > +		if (TEST_HARD_REG_BIT ((*hardregs)[0], regno)
> > +		    || TEST_HARD_REG_BIT ((*hardregs)[alternative], regno))
> > +		  {
> > +		    error ("multiple outputs to hard register: %s",
> > +			   reg_names[regno]);
> > +		    return false;
> > +		  }
> > +		else
> > +		  {
> > +		    SET_HARD_REG_BIT ((*hardregs)[1], regno);
> > +		    SET_HARD_REG_BIT ((*hardregs)[alternative], regno);
> > +		  }
> > +		if (!hardreg_ok_p (regno, hardreg_props->second, operand_num))
> > +		  return false;
> > +	      }
> > +	    *allows_reg = true;
> > +	    break;
> > +	  }
> > +
> >   	default:
> >   	  if (!ISALPHA (*p))
> >   	    break;
> > @@ -317,7 +427,9 @@ bool
> >   parse_input_constraint (const char **constraint_p, int input_num,
> >   			int ninputs, int noutputs, int ninout,
> >   			const char * const * constraints,
> > -			bool *allows_mem, bool *allows_reg)
> > +			bool *allows_mem, bool *allows_reg,
> > +			const std::pair <vec<HARD_REG_SET> *, machine_mode>
> > +			*hardreg_props)
> >   {
> >     const char *constraint = *constraint_p;
> >     const char *orig_constraint = constraint;
> > @@ -332,6 +444,8 @@ parse_input_constraint (const char **constraint_p, int input_num,
> >     /* Make sure constraint has neither `=', `+', nor '&'.  */
> > +  unsigned int alternative = 2;
> > +
> >     for (j = 0; j < c_len; j += CONSTRAINT_LEN (constraint[j], constraint+j))
> >       switch (constraint[j])
> >         {
> > @@ -358,7 +472,11 @@ parse_input_constraint (const char **constraint_p, int input_num,
> >         case 'E':  case 'F':  case 'G':  case 'H':
> >         case 's':  case 'i':  case 'n':
> >         case 'I':  case 'J':  case 'K':  case 'L':  case 'M':
> > -      case 'N':  case 'O':  case 'P':  case ',':
> > +      case 'N':  case 'O':  case 'P':
> > +	break;
> > +
> > +      case ',':
> > +	++alternative;
> >   	break;
> >   	/* Whether or not a numeric constraint allows a register is
> > @@ -408,6 +526,35 @@ parse_input_constraint (const char **constraint_p, int input_num,
> >   	*allows_mem = true;
> >   	break;
> > +      case '{':
> > +	{
> > +	  int regno = decode_hreg_constraint (constraint + j);
> > +	  if (regno < 0)
> > +	    {
> > +	      error ("invalid input constraint: %s", constraint + j);
> > +	      return false;
> > +	    }
> > +	  if (hardreg_props)
> > +	    {
> > +	      vec <HARD_REG_SET> *hardregs = hardreg_props->first;
> > +	      if (TEST_HARD_REG_BIT ((*hardregs)[0], regno)
> > +		  || TEST_HARD_REG_BIT ((*hardregs)[alternative], regno))
> > +		{
> > +		  error ("multiple inputs to hard register: %s",
> > +			    reg_names[regno]);
> > +		}
> > +	      else
> > +		{
> > +		  SET_HARD_REG_BIT ((*hardregs)[1], regno);
> > +		  SET_HARD_REG_BIT ((*hardregs)[alternative], regno);
> > +		}
> > +	      if (!hardreg_ok_p (regno, hardreg_props->second, input_num))
> > +		return false;
> > +	    }
> > +	  *allows_reg = true;
> > +	  break;
> > +	}
> > +
> >         default:
> >   	if (! ISALPHA (constraint[j]))
> >   	  {
> > diff --git a/gcc/stmt.h b/gcc/stmt.h
> > index a2caae7121b..a380ecd8cbf 100644
> > --- a/gcc/stmt.h
> > +++ b/gcc/stmt.h
> > @@ -20,11 +20,19 @@ along with GCC; see the file COPYING3.  If not see
> >   #ifndef GCC_STMT_H
> >   #define GCC_STMT_H
> > +#include "target.h"
> > +#include "hard-reg-set.h"
> > +
> >   extern void expand_label (tree);
> >   extern bool parse_output_constraint (const char **, int, int, int,
> > -				     bool *, bool *, bool *);
> > +				     bool *, bool *, bool *,
> > +				     const std::pair <vec <HARD_REG_SET> *,
> > +						      machine_mode> * = nullptr);
> >   extern bool parse_input_constraint (const char **, int, int, int, int,
> > -				    const char * const *, bool *, bool *);
> > +				    const char * const *, bool *, bool *,
> > +				    const std::pair <vec <HARD_REG_SET> *,
> > +						     machine_mode> * = nullptr);
> > +extern int decode_hreg_constraint (const char *);
> >   extern tree resolve_asm_operand_names (tree, tree, tree, tree);
> >   #ifdef HARD_CONST
> >   /* Silly ifdef to avoid having all includers depend on hard-reg-set.h.  */
> > diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-1.c b/gcc/testsuite/gcc.dg/asm-hard-reg-1.c
> > new file mode 100644
> > index 00000000000..6a5a9ada45f
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-1.c
> > @@ -0,0 +1,85 @@
> > +/* { dg-do compile { target aarch64*-*-* arm*-*-* i?86-*-* powerpc*-*-* riscv*-*-* s390*-*-* x86_64-*-* } } */
> > +
> > +#if defined (__aarch64__)
> > +# define GPR "{x4}"
> > +/* { dg-final { scan-assembler-times "foo\tx4" 8 { target { aarch64*-*-* } } } } */
> > +#elif defined (__arm__)
> > +# define GPR "{r4}"
> > +/* { dg-final { scan-assembler-times "foo\tr4" 8 { target { arm*-*-* } } } } */
> > +#elif defined (__i386__)
> > +# define GPR "{ecx}"
> > +/* { dg-final { scan-assembler-times "foo\t%cl" 2 { target { i?86-*-* } } } } */
> > +/* { dg-final { scan-assembler-times "foo\t%cx" 2 { target { i?86-*-* } } } } */
> > +/* { dg-final { scan-assembler-times "foo\t%ecx" 4 { target { i?86-*-* } } } } */
> > +#elif defined (__powerpc__) || defined (__POWERPC__)
> > +# define GPR "{r5}"
> > +/* { dg-final { scan-assembler-times "foo\t5" 8 { target { powerpc*-*-* } } } } */
> > +#elif defined (__riscv)
> > +# define GPR "{t5}"
> > +/* { dg-final { scan-assembler-times "foo\tt5" 8 { target { riscv*-*-* } } } } */
> > +#elif defined (__s390__)
> > +# define GPR "{r4}"
> > +/* { dg-final { scan-assembler-times "foo\t%r4" 8 { target { s390*-*-* } } } } */
> > +#elif defined (__x86_64__)
> > +# define GPR "{rcx}"
> > +/* { dg-final { scan-assembler-times "foo\t%cl" 2 { target { x86_64-*-* } } } } */
> > +/* { dg-final { scan-assembler-times "foo\t%cx" 2 { target { x86_64-*-* } } } } */
> > +/* { dg-final { scan-assembler-times "foo\t%ecx" 2 { target { x86_64-*-* } } } } */
> > +/* { dg-final { scan-assembler-times "foo\t%rcx" 2 { target { x86_64-*-* } } } } */
> > +#endif
> > +
> > +char
> > +test_char (char x)
> > +{
> > +  __asm__ ("foo\t%0" : "+"GPR (x));
> > +  return x;
> > +}
> > +
> > +char
> > +test_char_from_mem (char *x)
> > +{
> > +  __asm__ ("foo\t%0" : "+"GPR (*x));
> > +  return *x;
> > +}
> > +
> > +short
> > +test_short (short x)
> > +{
> > +  __asm__ ("foo\t%0" : "+"GPR (x));
> > +  return x;
> > +}
> > +
> > +short
> > +test_short_from_mem (short *x)
> > +{
> > +  __asm__ ("foo\t%0" : "+"GPR (*x));
> > +  return *x;
> > +}
> > +
> > +int
> > +test_int (int x)
> > +{
> > +  __asm__ ("foo\t%0" : "+"GPR (x));
> > +  return x;
> > +}
> > +
> > +int
> > +test_int_from_mem (int *x)
> > +{
> > +  __asm__ ("foo\t%0" : "+"GPR (*x));
> > +  return *x;
> > +}
> > +
> > +long
> > +test_long (long x)
> > +{
> > +  __asm__ ("foo\t%0" : "+"GPR (x));
> > +  return x;
> > +}
> > +
> > +long
> > +test_long_from_mem (long *x)
> > +{
> > +  __asm__ ("foo\t%0" : "+"GPR (*x));
> > +  return *x;
> > +}
> > diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-2.c b/gcc/testsuite/gcc.dg/asm-hard-reg-2.c
> > new file mode 100644
> > index 00000000000..7dabf9657cb
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-2.c
> > @@ -0,0 +1,33 @@
> > +/* { dg-do compile { target aarch64*-*-* powerpc64*-*-* riscv64-*-* s390*-*-* x86_64-*-* } } */
> > +/* { dg-options "-std=c99" } we need long long */
> > +
> > +#if defined (__aarch64__)
> > +# define GPR "{x4}"
> > +/* { dg-final { scan-assembler-times "foo\tx4" 2 { target { aarch64*-*-* } } } } */
> > +#elif defined (__powerpc__) || defined (__POWERPC__)
> > +# define GPR "{r5}"
> > +/* { dg-final { scan-assembler-times "foo\t5" 2 { target { powerpc64*-*-* } } } } */
> > +#elif defined (__riscv)
> > +# define GPR "{t5}"
> > +/* { dg-final { scan-assembler-times "foo\tt5" 2 { target { riscv64-*-* } } } } */
> > +#elif defined (__s390__)
> > +# define GPR "{r4}"
> > +/* { dg-final { scan-assembler-times "foo\t%r4" 2 { target { s390*-*-* } } } } */
> > +#elif defined (__x86_64__)
> > +# define GPR "{rcx}"
> > +/* { dg-final { scan-assembler-times "foo\t%rcx" 2 { target { x86_64-*-* } } } } */
> > +#endif
> > +
> > +long long
> > +test_longlong (long long x)
> > +{
> > +  __asm__ ("foo\t%0" : "+"GPR (x));
> > +  return x;
> > +}
> > +
> > +long long
> > +test_longlong_from_mem (long long *x)
> > +{
> > +  __asm__ ("foo\t%0" : "+"GPR (*x));
> > +  return *x;
> > +}
> > diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-3.c b/gcc/testsuite/gcc.dg/asm-hard-reg-3.c
> > new file mode 100644
> > index 00000000000..fa4472ae8a8
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-3.c
> > @@ -0,0 +1,25 @@
> > +/* { dg-do compile { target { { aarch64*-*-* powerpc64*-*-* riscv64-*-* s390*-*-* x86_64-*-* } && int128 } } } */
> > +/* { dg-options "-O2" } get rid of -ansi since we use __int128 */
> > +
> > +#if defined (__aarch64__)
> > +# define REG "{x4}"
> > +/* { dg-final { scan-assembler-times "foo\tx4" 1 { target { aarch64*-*-* } } } } */
> > +#elif defined (__powerpc__) || defined (__POWERPC__)
> > +# define REG "{r5}"
> > +/* { dg-final { scan-assembler-times "foo\t5" 1 { target { powerpc*-*-* } } } } */
> > +#elif defined (__riscv)
> > +# define REG "{t5}"
> > +/* { dg-final { scan-assembler-times "foo\tt5" 1 { target { riscv*-*-* } } } } */
> > +#elif defined (__s390__)
> > +# define REG "{r4}"
> > +/* { dg-final { scan-assembler-times "foo\t%r4" 1 { target { s390*-*-* } } } } */
> > +#elif defined (__x86_64__)
> > +# define REG "{xmm0}"
> > +/* { dg-final { scan-assembler-times "foo\t%xmm0" 1 { target { x86_64-*-* } } } } */
> > +#endif
> > +
> > +void
> > +test (void)
> > +{
> > +  __asm__ ("foo\t%0" :: REG ((__int128) 42));
> > +}
> > diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-4.c b/gcc/testsuite/gcc.dg/asm-hard-reg-4.c
> > new file mode 100644
> > index 00000000000..0816df8f719
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-4.c
> > @@ -0,0 +1,50 @@
> > +/* { dg-do compile { target aarch64*-*-* arm*-*-* powerpc*-*-* riscv*-*-* s390*-*-* x86_64-*-* } } */
> > +
> > +#if defined (__aarch64__)
> > +# define FPR "{d5}"
> > +/* { dg-final { scan-assembler-times "foo\tv5" 4 { target { aarch64*-*-* } } } } */
> > +#elif defined (__arm__)
> > +# define FPR "{d5}"
> > +/* { dg-additional-options "-march=armv7-a+fp -mfloat-abi=hard" { target arm*-*-* } } */
> > +/* { dg-final { scan-assembler-times "foo\ts10" 4 { target { arm*-*-* } } } } */
> > +#elif defined (__powerpc__) || defined (__POWERPC__)
> > +# define FPR "{5}"
> > +/* { dg-final { scan-assembler-times "foo\t5" 4 { target { powerpc*-*-* } } } } */
> > +#elif defined (__riscv)
> > +# define FPR "{f5}"
> > +/* { dg-final { scan-assembler-times "foo\tf5" 4 { target { rsicv*-*-* } } } } */
> > +#elif defined (__s390__)
> > +# define FPR "{f5}"
> > +/* { dg-final { scan-assembler-times "foo\t%f5" 4 { target { s390*-*-* } } } } */
> > +#elif defined (__x86_64__)
> > +# define FPR "{xmm5}"
> > +/* { dg-final { scan-assembler-times "foo\t%xmm5" 4 { target { x86_64-*-* } } } } */
> > +#endif
> > +
> > +float
> > +test_float (float x)
> > +{
> > +  __asm__ ("foo\t%0" : "+"FPR (x));
> > +  return x;
> > +}
> > +
> > +float
> > +test_float_from_mem (float *x)
> > +{
> > +  __asm__ ("foo\t%0" : "+"FPR (*x));
> > +  return *x;
> > +}
> > +
> > +double
> > +test_double (double x)
> > +{
> > +  __asm__ ("foo\t%0" : "+"FPR (x));
> > +  return x;
> > +}
> > +
> > +double
> > +test_double_from_mem (double *x)
> > +{
> > +  __asm__ ("foo\t%0" : "+"FPR (*x));
> > +  return *x;
> > +}
> > diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-5.c b/gcc/testsuite/gcc.dg/asm-hard-reg-5.c
> > new file mode 100644
> > index 00000000000..a9e25ce1746
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-5.c
> > @@ -0,0 +1,36 @@
> > +/* { dg-do compile { target aarch64*-*-* powerpc64*-*-* riscv64-*-* s390*-*-* x86_64-*-* } } */
> > +
> > +typedef int V __attribute__ ((vector_size (4 * sizeof (int))));
> > +
> > +#if defined (__aarch64__)
> > +# define VR "{v20}"
> > +/* { dg-final { scan-assembler-times "foo\tv20" 2 { target { aarch64*-*-* } } } } */
> > +#elif defined (__powerpc__) || defined (__POWERPC__)
> > +# define VR "{v5}"
> > +/* { dg-final { scan-assembler-times "foo\t5" 2 { target { powerpc64*-*-* } } } } */
> > +#elif defined (__riscv)
> > +# define VR "{v5}"
> > +/* { dg-additional-options "-march=rv64imv" { target riscv64-*-* } } */
> > +/* { dg-final { scan-assembler-times "foo\tv5" 2 { target { riscv*-*-* } } } } */
> > +#elif defined (__s390__)
> > +# define VR "{v5}"
> > +/* { dg-require-effective-target s390_mvx { target s390*-*-* } } */
> > +/* { dg-final { scan-assembler-times "foo\t%v5" 2 { target s390*-*-* } } } */
> > +#elif defined (__x86_64__)
> > +# define VR "{xmm9}"
> > +/* { dg-final { scan-assembler-times "foo\t%xmm9" 2 { target { x86_64-*-* } } } } */
> > +#endif
> > +
> > +V
> > +test (V x)
> > +{
> > +  __asm__ ("foo\t%0" : "+"VR (x));
> > +  return x;
> > +}
> > +
> > +V
> > +test_from_mem (V *x)
> > +{
> > +  __asm__ ("foo\t%0" : "+"VR (*x));
> > +  return *x;
> > +}
> > diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-6.c b/gcc/testsuite/gcc.dg/asm-hard-reg-6.c
> > new file mode 100644
> > index 00000000000..d9b7fae8097
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-6.c
> > @@ -0,0 +1,60 @@
> > +/* { dg-do compile { target aarch64*-*-* arm*-*-* i?86-*-* powerpc*-*-* riscv*-*-* s390*-*-* x86_64-*-* } } */
> > +/* { dg-options "-O2" } */
> > +
> > +/* Test multiple alternatives.  */
> > +
> > +#if defined (__aarch64__)
> > +# define GPR1 "{x1}"
> > +# define GPR2 "{x2}"
> > +# define GPR3 "{x3}"
> > +/* { dg-final { scan-assembler-times "foo\tx1,x3" 1 { target { aarch64*-*-* } } } } */
> > +/* { dg-final { scan-assembler-times "bar\tx2,\\\[x1\\\]" 1 { target { aarch64*-*-* } } } } */
> > +#elif defined (__arm__)
> > +# define GPR1 "{r1}"
> > +# define GPR2 "{r2}"
> > +# define GPR3 "{r3}"
> > +/* { dg-final { scan-assembler-times "foo\tr1,r3" 1 { target { arm*-*-* } } } } */
> > +/* { dg-final { scan-assembler-times "bar\tr2,\\\[r1\\\]" 1 { target { arm*-*-* } } } } */
> > +#elif defined (__i386__)
> > +# define GPR1 "{eax}"
> > +# define GPR2 "{ebx}"
> > +# define GPR3 "{ecx}"
> > +/* { dg-final { scan-assembler-times "foo\t4\\(%esp\\),%ecx" 1 { target { i?86-*-* } } } } */
> > +/* { dg-final { scan-assembler-times "bar\t%ebx,\\(%eax\\)" 1 { target { i?86-*-* } } } } */
> > +#elif defined (__powerpc__) || defined (__POWERPC__)
> > +# define GPR1 "{r4}"
> > +# define GPR2 "{r5}"
> > +# define GPR3 "{r6}"
> > +/* { dg-final { scan-assembler-times "foo\t4,6" 1 { target { powerpc*-*-* } } } } */
> > +/* { dg-final { scan-assembler-times "bar\t5,0\\(4\\)" 1 { target { powerpc*-*-* } } } } */
> > +#elif defined (__riscv)
> > +# define GPR1 "{t1}"
> > +# define GPR2 "{t2}"
> > +# define GPR3 "{t3}"
> > +/* { dg-final { scan-assembler-times "foo\tt1,t3" 1 { target { riscv*-*-* } } } } */
> > +/* { dg-final { scan-assembler-times "bar\tt2,0\\(a1\\)" 1 { target { riscv*-*-* } } } } */
> > +#elif defined (__s390__)
> > +# define GPR1 "{r0}"
> > +# define GPR2 "{r1}"
> > +# define GPR3 "{r2}"
> > +/* { dg-final { scan-assembler-times "foo\t%r0,%r2" 1 { target { s390*-*-* } } } } */
> > +/* { dg-final { scan-assembler-times "bar\t%r1,0\\(%r3\\)" 1 { target { s390*-*-* } } } } */
> > +#elif defined (__x86_64__)
> > +# define GPR1 "{eax}"
> > +# define GPR2 "{ebx}"
> > +# define GPR3 "{rcx}"
> > +/* { dg-final { scan-assembler-times "foo\t%eax,%rcx" 1 { target { x86_64-*-* } } } } */
> > +/* { dg-final { scan-assembler-times "bar\t%ebx,\\(%rsi\\)" 1 { target { x86_64-*-* } } } } */
> > +#endif
> > +
> > +void
> > +test_reg_reg (int x, long long *y)
> > +{
> > +  __asm__ ("foo\t%0,%1" :: GPR1"m,"GPR2 (x), GPR3",m" (y));
> > +}
> > +
> > +void
> > +test_reg_mem (int x, long long *y)
> > +{
> > +  __asm__ ("bar\t%0,%1" :: GPR1"m,"GPR2 (x), GPR3",m" (*y));
> > +}
> > diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-7.c b/gcc/testsuite/gcc.dg/asm-hard-reg-7.c
> > new file mode 100644
> > index 00000000000..39c4497ecaf
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-7.c
> > @@ -0,0 +1,70 @@
> > +/* { dg-do run { target aarch64*-*-* powerpc64*-*-* riscv64-*-* s390*-*-* x86_64-*-* } } */
> > +/* { dg-options "-O2" } */
> > +
> > +/* Pass parameter x in the first general argument register to the assembler
> > +   instruction.
> > +
> > +   In function bar we fail to do so because after the function call to foo,
> > +   variable argreg1 does not contain the value of x but rather 42 which got
> > +   passed to foo.  Thus, the function always returns 42.  In contrast in
> > +   function baz, variable x is saved over the function call and materializes in
> > +   the asm statement and therefore is returned.  */
> > +
> > +#if defined (__aarch64__)
> > +# define REG register int argreg1 __asm__ ("x0") = x;
> > +# define MOVE1 __asm__ ("mov\t%0,%1" : "=r" (out) : "r" (argreg1));
> > +# define MOVE2 __asm__ ("mov\t%0,%1" : "=r" (out) : "{x0}" (x));
> > +#elif defined (__powerpc__) || defined (__POWERPC__)
> > +# define REG register int argreg1 __asm__ ("r3") = x;
> > +# define MOVE1 __asm__ ("mr\t%0,%1" : "=r" (out) : "r" (argreg1));
> > +# define MOVE2 __asm__ ("mr\t%0,%1" : "=r" (out) : "{r3}" (x));
> > +#elif defined (__riscv)
> > +# define REG register int argreg1 __asm__ ("a0") = x;
> > +# define MOVE1 __asm__ ("mv\t%0,%1" : "=r" (out) : "r" (argreg1));
> > +# define MOVE2 __asm__ ("mv\t%0,%1" : "=r" (out) : "{a0}" (x));
> > +#elif defined (__s390__)
> > +# define REG register int argreg1 __asm__ ("r2") = x;
> > +# define MOVE1 __asm__ ("lr\t%0,%1" : "=r" (out) : "r" (argreg1));
> > +# define MOVE2 __asm__ ("lr\t%0,%1" : "=r" (out) : "{r2}" (x));
> > +#elif defined (__x86_64__)
> > +# define REG register int argreg1 __asm__ ("edi") = x;
> > +# define MOVE1 __asm__ ("mov\t%1,%0" : "=r" (out) : "r" (argreg1));
> > +# define MOVE2 __asm__ ("mov\t%1,%0" : "=r" (out) : "{edi}" (x));
> > +#endif
> > +
> > +__attribute__ ((noipa))
> > +int foo (int unused) { }
> > +
> > +int
> > +bar (int x)
> > +{
> > +  int out;
> > +  REG
> > +  foo (42);
> > +  MOVE1
> > +  return out;
> > +}
> > +
> > +int
> > +baz (int x)
> > +{
> > +  int out;
> > +  foo (42);
> > +  MOVE2
> > +  return out;
> > +}
> > +
> > +int
> > +main (void)
> > +{
> > +  if (bar (0) != 42
> > +      || bar (1) != 42
> > +      || bar (2) != 42
> > +      || bar (32) != 42
> > +      || baz (0) != 0
> > +      || baz (1) != 1
> > +      || baz (2) != 2
> > +      || baz (32) != 32)
> > +    __builtin_abort ();
> > +  return 0;
> > +}
> > diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-error-1.c b/gcc/testsuite/gcc.dg/asm-hard-reg-error-1.c
> > new file mode 100644
> > index 00000000000..6060c0946da
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-error-1.c
> > @@ -0,0 +1,67 @@
> > +/* { dg-do compile { target aarch64*-*-* arm*-*-* i?86-*-* powerpc*-*-* riscv*-*-* s390*-*-* x86_64-*-* } } */
> > +
> > +#if defined (__aarch64__)
> > +# define GPR1_RAW "x0"
> > +# define GPR2 "{x1}"
> > +# define GPR3 "{x2}"
> > +# define INVALID_GPR_A "{x31}"
> > +#elif defined (__arm__)
> > +# define GPR1_RAW "r0"
> > +# define GPR2 "{r1}"
> > +# define GPR3 "{r2}"
> > +# define INVALID_GPR_A "{r16}"
> > +#elif defined (__i386__)
> > +# define GPR1_RAW "%eax"
> > +# define GPR2 "{%ebx}"
> > +# define GPR3 "{%edx}"
> > +# define INVALID_GPR_A "{%eex}"
> > +#elif defined (__powerpc__) || defined (__POWERPC__)
> > +# define GPR1_RAW "r4"
> > +# define GPR2 "{r5}"
> > +# define GPR3 "{r6}"
> > +# define INVALID_GPR_A "{r33}"
> > +#elif defined (__riscv)
> > +# define GPR1_RAW "t4"
> > +# define GPR2 "{t5}"
> > +# define GPR3 "{t6}"
> > +# define INVALID_GPR_A "{t7}"
> > +#elif defined (__s390__)
> > +# define GPR1_RAW "r4"
> > +# define GPR2 "{r5}"
> > +# define GPR3 "{r6}"
> > +# define INVALID_GPR_A "{r17}"
> > +#elif defined (__x86_64__)
> > +# define GPR1_RAW "rax"
> > +# define GPR2 "{rbx}"
> > +# define GPR3 "{rcx}"
> > +# define INVALID_GPR_A "{rex}"
> > +#endif
> > +
> > +#define GPR1 "{"GPR1_RAW"}"
> > +#define INVALID_GPR_B "{"GPR1_RAW
> > +
> > +struct { int a[128]; } s = {0};
> > +
> > +void
> > +test (void)
> > +{
> > +  int x, y;
> > +  register int gpr1 __asm__ (GPR1_RAW) = 0;
> > +
> > +  __asm__ ("" :: "{}" (42)); /* { dg-error "invalid input constraint: \{\}" } */
> > +  __asm__ ("" :: INVALID_GPR_A (42)); /* { dg-error "invalid input constraint" } */
> > +  __asm__ ("" :: INVALID_GPR_B (42)); /* { dg-error "invalid input constraint" } */
> > +
> > +  __asm__ ("" :: GPR1 (s)); /* { dg-error "data type isn't suitable for register .* of operand 0" } */
> > +
> > +  __asm__ ("" :: "r" (gpr1), GPR1 (42)); /* { dg-error "multiple inputs to hard register" } */
> > +  __asm__ ("" :: GPR1 (42), "r" (gpr1)); /* { dg-error "multiple inputs to hard register" } */
> > +  __asm__ ("" :: GPR1 (42), GPR1 (42)); /* { dg-error "multiple inputs to hard register" } */
> > +  __asm__ ("" :: GPR1","GPR2 (42), GPR2","GPR3 (42));
> > +  __asm__ ("" :: GPR1","GPR2 (42), GPR3","GPR2 (42)); /* { dg-error "multiple inputs to hard register" } */
> > +  __asm__ ("" :: GPR1","GPR2 (42), GPR1","GPR3 (42)); /* { dg-error "multiple inputs to hard register" } */
> > +  __asm__ ("" :: GPR1 GPR2 (42), GPR2 (42)); /* { dg-error "multiple inputs to hard register" } */
> > +  __asm__ ("" : "+"GPR1 (x), "="GPR1 (y)); /* { dg-error "multiple outputs to hard register" } */
> > +  __asm__ ("" : "="GPR1 (y) : GPR1 (42), "0" (42)); /* { dg-error "multiple inputs to hard register" } */
> > +  __asm__ ("" : "+"GPR1 (x) : GPR1 (42)); /* { dg-error "multiple inputs to hard register" } */
> > +}
> > diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-error-2.c b/gcc/testsuite/gcc.dg/asm-hard-reg-error-2.c
> > new file mode 100644
> > index 00000000000..efa843e0800
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-error-2.c
> > @@ -0,0 +1,19 @@
> > +/* { dg-do compile { target { { aarch64*-*-* s390x-*-* } && int128 } } } */
> > +/* { dg-options "-O2" } get rid of -ansi since we use __int128 */
> > +
> > +/* Test register pairs.  */
> > +
> > +#if defined (__aarch64__)
> > +# define GPR "{x4}"
> > +# define INVALID_GPR "{x5}"
> > +#elif defined (__s390__)
> > +# define GPR "{r4}"
> > +# define INVALID_GPR "{r5}"
> > +#endif
> > +
> > +void
> > +test (void)
> > +{
> > +  __asm__ ("" :: GPR ((__int128) 42));
> > +  __asm__ ("" :: INVALID_GPR ((__int128) 42)); /* { dg-error "register .* for operand 0 isn't suitable for data type" } */
> > +}
> > diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-error-3.c b/gcc/testsuite/gcc.dg/asm-hard-reg-error-3.c
> > new file mode 100644
> > index 00000000000..ef8af5a6d52
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-error-3.c
> > @@ -0,0 +1,20 @@
> > +/* { dg-do compile { target arm*-*-* s390-*-* } } */
> > +/* { dg-options "-std=c99" } we need long long */
> > +/* { dg-additional-options "-march=armv8-a" { target arm*-*-* } } */
> > +
> > +/* Test register pairs.  */
> > +
> > +#if defined (__arm__)
> > +# define GPR "{r4}"
> > +# define INVALID_GPR "{r5}"
> > +#elif defined (__s390__)
> > +# define GPR "{r4}"
> > +# define INVALID_GPR "{r5}"
> > +#endif
> > +
> > +void
> > +test (void)
> > +{
> > +  __asm__ ("" :: GPR (42ll));
> > +  __asm__ ("" :: INVALID_GPR (42ll)); /* { dg-error "register .* for operand 0 isn't suitable for data type" } */
> > +}
> > diff --git a/gcc/testsuite/gcc.dg/pr87600-2.c b/gcc/testsuite/gcc.dg/pr87600-2.c
> > index e8a9f194b73..860d3f965ef 100644
> > --- a/gcc/testsuite/gcc.dg/pr87600-2.c
> > +++ b/gcc/testsuite/gcc.dg/pr87600-2.c
> > @@ -11,34 +11,6 @@ test0 (void)
> >   {
> >     register long var1 asm (REG1);
> >     register long var2 asm (REG1);
> > -  asm ("blah %0 %1" : "=r" (var1), "=r" (var2)); /* { dg-error "invalid hard register usage between output operands" } */
> > +  asm ("blah %0 %1" : "=r" (var1), "=r" (var2)); /* { dg-error "multiple outputs to hard register" } */
> >     return var1;
> >   }
> > -
> > -long
> > -test1 (void)
> > -{
> > -  register long var1 asm (REG1);
> > -  register long var2 asm (REG2);
> > -  asm ("blah %0 %1" : "=r" (var1) : "0" (var2)); /* { dg-error "invalid hard register usage between output operand and matching constraint operand" } */
> > -  return var1;
> > -}
> > -
> > -long
> > -test2 (void)
> > -{
> > -  register long var1 asm (REG1);
> > -  register long var2 asm (REG1);
> > -  asm ("blah %0 %1" : "=&r" (var1) : "r" (var2)); /* { dg-error "invalid hard register usage between earlyclobber operand and input operand" } */
> > -  return var1;
> > -}
> > -
> > -long
> > -test3 (void)
> > -{
> > -  register long var1 asm (REG1);
> > -  register long var2 asm (REG1);
> > -  long var3;
> > -  asm ("blah %0 %1" : "=&r" (var1), "=r" (var3) : "1" (var2)); /* { dg-error "invalid hard register usage between earlyclobber operand and input operand" } */
> > -  return var1 + var3;
> > -}
> > diff --git a/gcc/testsuite/gcc.dg/pr87600-3.c b/gcc/testsuite/gcc.dg/pr87600-3.c
> > new file mode 100644
> > index 00000000000..2673d004130
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/pr87600-3.c
> > @@ -0,0 +1,35 @@
> > +/* PR rtl-optimization/87600  */
> > +/* { dg-do compile { target aarch64*-*-* arm*-*-* i?86-*-* powerpc*-*-* s390*-*-* x86_64-*-* } } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include "pr87600.h"
> > +
> > +/* The following are all invalid uses of local register variables.  */
> > +
> > +long
> > +test1 (void)
> > +{
> > +  register long var1 asm (REG1);
> > +  register long var2 asm (REG2);
> > +  asm ("blah %0 %1" : "=r" (var1) : "0" (var2)); /* { dg-error "invalid hard register usage between output operand and matching constraint operand" } */
> > +  return var1;
> > +}
> > +
> > +long
> > +test2 (void)
> > +{
> > +  register long var1 asm (REG1);
> > +  register long var2 asm (REG1);
> > +  asm ("blah %0 %1" : "=&r" (var1) : "r" (var2)); /* { dg-error "invalid hard register usage between earlyclobber operand and input operand" } */
> > +  return var1;
> > +}
> > +
> > +long
> > +test3 (void)
> > +{
> > +  register long var1 asm (REG1);
> > +  register long var2 asm (REG1);
> > +  long var3;
> > +  asm ("blah %0 %1" : "=&r" (var1), "=r" (var3) : "1" (var2)); /* { dg-error "invalid hard register usage between earlyclobber operand and input operand" } */
> > +  return var1 + var3;
> > +}
> > diff --git a/gcc/testsuite/gcc.target/s390/asm-hard-reg-1.c b/gcc/testsuite/gcc.target/s390/asm-hard-reg-1.c
> > new file mode 100644
> > index 00000000000..671c0ede6ef
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/s390/asm-hard-reg-1.c
> > @@ -0,0 +1,103 @@
> > +/* { dg-do compile { target { lp64 } } } */
> > +/* { dg-options "-O2 -march=z13 -mzarch" } */
> > +/* { dg-final { check-function-bodies "**" "" "" } } */
> > +
> > +/*
> > +** test_in_1:
> > +**     foo	%r2
> > +**     br	%r14
> > +*/
> > +
> > +int
> > +test_in_1 (int x)
> > +{
> > +  asm ("foo	%0" :: "{r2}" (x));
> > +  return x;
> > +}
> > +
> > +/*
> > +** test_in_2:
> > +**     lgr	(%r[0-9]+),%r2
> > +**     lr	%r2,%r3
> > +**     foo	%r2
> > +**     lgr	%r2,\1
> > +**     br	%r14
> > +*/
> > +
> > +int
> > +test_in_2 (int x, int y)
> > +{
> > +  asm ("foo	%0" :: "{r2}" (y));
> > +  return x;
> > +}
> > +
> > +/*
> > +** test_in_3:
> > +**     stmg	%r12,%r15,96\(%r15\)
> > +**     lay	%r15,-160\(%r15\)
> > +**     lgr	(%r[0-9]+),%r2
> > +**     ahi	%r2,1
> > +**     lgfr	%r2,%r2
> > +**     brasl	%r14,foo@PLT
> > +**     lr	%r3,%r2
> > +**     lr	%r2,\1
> > +**     foo	%r3,%r2
> > +**     lgr	%r2,\1
> > +**     lmg	%r12,%r15,256\(%r15\)
> > +**     br	%r14
> > +*/
> > +
> > +extern int foo (int);
> > +
> > +int
> > +test_in_3 (int x)
> > +{
> > +  asm ("foo	%0,%1\n" :: "{r3}" (foo (x + 1)), "{r2}" (x));
> > +  return x;
> > +}
> > +
> > +/*
> > +** test_out_1:
> > +**     foo	%r3
> > +**     lgfr	%r2,%r3
> > +**     br	%r14
> > +*/
> > +
> > +int
> > +test_out_1 (void)
> > +{
> > +  int x;
> > +  asm ("foo	%0" : "={r3}" (x));
> > +  return x;
> > +}
> > +
> > +/*
> > +** test_out_2:
> > +**     lgr	(%r[0-9]+),%r2
> > +**     foo	%r2
> > +**     ark	(%r[0-9]+),\1,%r2
> > +**     lgfr	%r2,\2
> > +**     br	%r14
> > +*/
> > +
> > +int
> > +test_out_2 (int x)
> > +{
> > +  int y;
> > +  asm ("foo	%0" : "={r2}" (y));
> > +  return x + y;
> > +}
> > +
> > +/*
> > +** test_inout_1:
> > +**     foo	%r2
> > +**     lgfr	%r2,%r2
> > +**     br	%r14
> > +*/
> > +
> > +int
> > +test_inout_1 (int x)
> > +{
> > +  asm ("foo	%0" : "+{r2}" (x));
> > +  return x;
> > +}
> > diff --git a/gcc/testsuite/gcc.target/s390/asm-hard-reg-2.c b/gcc/testsuite/gcc.target/s390/asm-hard-reg-2.c
> > new file mode 100644
> > index 00000000000..a892fe8f0aa
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/s390/asm-hard-reg-2.c
> > @@ -0,0 +1,43 @@
> > +/* { dg-do compile { target { lp64 } } } */
> > +/* { dg-options "-O2 -march=z13 -mzarch" } */
> > +/* { dg-final { check-function-bodies "**" "" "" } } */
> > +/* { dg-final { scan-assembler {\.LC0:\n\t\.long\t1078523331\n} } } */
> > +
> > +
> > +/*
> > +** test_float_into_gpr:
> > +**     lrl	%r4,.LC0
> > +**     foo	%r4
> > +**     br	%r14
> > +*/
> > +
> > +void
> > +test_float_into_gpr (void)
> > +{
> > +  // This is the counterpart to
> > +  //   register float x asm ("r4") = 3.14f;
> > +  //   asm ("foo	%0" :: "r" (x));
> > +  // where the bit-pattern of 3.14f is loaded into GPR.
> > +  asm ("foo	%0" :: "{r4}" (3.14f));
> > +}
> > +
> > +/*
> > +** test_float:
> > +** (
> > +**     ldr	%f4,%f0
> > +**     ldr	%f5,%f2
> > +** |
> > +**     ldr	%f5,%f2
> > +**     ldr	%f4,%f0
> > +** )
> > +**     aebr	%f5,%f4
> > +**     ldr	%f0,%f5
> > +**     br	%r14
> > +*/
> > +
> > +float
> > +test_float (float x, float y)
> > +{
> > +  asm ("aebr	%0,%1" : "+{f5}" (y) : "{f4}" (x));
> > +  return y;
> > +}
> > diff --git a/gcc/testsuite/gcc.target/s390/asm-hard-reg-3.c b/gcc/testsuite/gcc.target/s390/asm-hard-reg-3.c
> > new file mode 100644
> > index 00000000000..5df37b5b717
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/s390/asm-hard-reg-3.c
> > @@ -0,0 +1,42 @@
> > +/* { dg-do compile { target lp64 } } */
> > +/* { dg-options "-O2 -march=z13 -mzarch" } */
> > +/* { dg-final { check-function-bodies "**" "" "" } } */
> > +/* { dg-final { scan-assembler {\.LC0:\n\t\.long\t1074339512\n\t\.long\t1374389535\n} } } */
> > +
> > +/*
> > +** test_double_into_gpr:
> > +**     lgrl	%r4,.LC0
> > +**     foo	%r4
> > +**     br	%r14
> > +*/
> > +
> > +void
> > +test_double_into_gpr (void)
> > +{
> > +  // This is the counterpart to
> > +  //   register double x asm ("r4") = 3.14;
> > +  //   asm ("foo	%0" :: "r" (x));
> > +  // where the bit-pattern of 3.14 is loaded into GPR.
> > +  asm ("foo	%0" :: "{r4}" (3.14));
> > +}
> > +
> > +/*
> > +** test_double:
> > +** (
> > +**     ldr	%f4,%f0
> > +**     ldr	%f5,%f2
> > +** |
> > +**     ldr	%f5,%f2
> > +**     ldr	%f4,%f0
> > +** )
> > +**     adbr	%f5,%f4
> > +**     ldr	%f0,%f5
> > +**     br	%r14
> > +*/
> > +
> > +double
> > +test_double (double x, double y)
> > +{
> > +  asm ("adbr	%0,%1" : "+{f5}" (y) : "{f4}" (x));
> > +  return y;
> > +}
> > diff --git a/gcc/testsuite/lib/scanasm.exp b/gcc/testsuite/lib/scanasm.exp
> > index 42c719c512c..3c28ccec955 100644
> > --- a/gcc/testsuite/lib/scanasm.exp
> > +++ b/gcc/testsuite/lib/scanasm.exp
> > @@ -896,6 +896,10 @@ proc configure_check-function-bodies { config } {
> >   	set up_config(fluff) {^\s*(?://)}
> >       } elseif { [istarget *-*-darwin*] } {
> >   	set up_config(fluff) {^\s*(?:\.|//|@)|^L[0-9ABCESV]}
> > +    } elseif { [istarget s390*-*-*] } {
> > +	# Additionally to the defaults skip lines beginning with a # resulting
> > +	# from inline asm.
> > +	set up_config(fluff) {^\s*(?:\.|//|@|$|#)}
> >       } else {
> >   	# Skip lines beginning with labels ('.L[...]:') or other directives
> >   	# ('.align', '.cfi_startproc', '.quad [...]', '.text', etc.), '//' or
Georg-Johann Lay Aug. 5, 2024, 3:28 p.m. UTC | #3
Am 05.08.24 um 15:59 schrieb Stefan Schulze Frielinghaus:
> On Mon, Aug 05, 2024 at 02:19:50PM +0200, Georg-Johann Lay wrote:
>> Am 05.08.24 um 12:28 schrieb Stefan Schulze Frielinghaus:
>>> This is rather unfortunate but I couldn't find a way how to validate
>>> register names during genoutput.  If no one else has an idea I will
>>> replace gcc_assert with a more expressive error message.
>>
>> [ADDITIONAL_]REGISTER_NAMES isn't available?  Though using that might
>> bypass the effect of target hooks like TARGET_CONDITIONAL_REGISTER_USAGE.
> 
> REGISTER_NAMES references sometimes target variables (see rs6000 e.g.)
> which aren't linked into genoutput and are therefore unavailable.
> 
>> But there are also cases with an asm operand print modifier; you cannot
>> check that, it's checked by TARGET_PRINT_OPERAND etc. which get a
>> hard register and not a string for a register name.
>>
>> Maybe genoutput could add additional information to insn-output.cc or
>> whatever, and the compiler proper checks that and emits diagnostics
>> as needed?
> 
> Though, this would be a run-time check, right?  I was actually hoping
> for a "compile-time" check, i.e., something which errors while compiling
> GCC and not when GCC is executed.  The latter is already implemented.

Yes, it would be a run-time check.  As compiler options may be involved,
that's perhaps the only way.

Though such a test would always run, independent of the code being
compiled, so any problem would pop up immediately, e.g.self-test.
Hence not some nasty ICE that only triggers with specific code in
user land.  The runtime overhead would be negligible.

Johann
Georg-Johann Lay Aug. 9, 2024, 6:21 p.m. UTC | #4
Am 05.08.24 um 12:28 schrieb Stefan Schulze Frielinghaus:
> This is a follow-up of
> https://gcc.gnu.org/pipermail/gcc-patches/2024-June/654013.html

Hi Stefan,

as an addition, maybe a built-in macro is useful that tells the target
code whether the feature is available.

Similar to __GXX_CONSTEXPR_ASM__ for asm constexprs.

Johann

> What has changed?
> 
> - Rebased and fixed an issue in constrain_operands which manifested
> after late-combine.
> 
> - Introduced new test cases for Arm, Intel, POWER, RISCV, S/390 for 32-
> and 64-bit where appropriate (including register pairs etc.).  Test
> gcc.dg/asm-hard-reg-7.c is a bit controversial since I'm testing for an
> anti feature here, i.e., I'm testing for register asm in conjunction
> with calls.  I'm fine with removing it in the end but I wanted to keep
> it in for demonstration purposes at least during discussion of this
> patch.
> 
> - Split test pr87600-2.c into pr87600-2.c and pr87600-3.c since test0
> errors out early, now.  Otherwise, the remaining errors would not be
> reported.  Beside that the error message has slightly changed.
> 
> - Modified genoutput.cc in order to allow hard register constraints in
> machine descriptions.  For example, on s390 the instruction mvcrl makes
> use of the implicit register r0 which we currently deal with as follows:
> 
> (define_insn "*mvcrl"
>    [(set (match_operand:BLK 0 "memory_operand" "=Q")
>         (unspec:BLK [(match_operand:BLK 1 "memory_operand" "Q")
>                      (reg:SI GPR0_REGNUM)]
>                     UNSPEC_MVCRL))]
>    "TARGET_Z15"
>    "mvcrl\t%0,%1"
>    [(set_attr "op_type" "SSE")])
> 
> (define_expand "mvcrl"
>    [(set (reg:SI GPR0_REGNUM) (match_operand:SI 2 "general_operand"))
>     (set (match_operand:BLK 0 "memory_operand" "=Q")
>         (unspec:BLK [(match_operand:BLK 1 "memory_operand" "Q")
>                      (reg:SI GPR0_REGNUM)]
>                     UNSPEC_MVCRL))]
>    "TARGET_Z15"
>    "")
> 
> In the expander we ensure that GPR0 is setup correctly.  With this patch
> we could simply write
> 
> (define_insn "mvcrl"
>    [(set (match_operand:BLK 0 "memory_operand" "=Q")
>          (unspec:BLK [(match_operand:BLK 1 "memory_operand" "Q")
>                       (match_operand:SI 2 "general_operand" "{r0}")]
>                      UNSPEC_MVCRL))]
>    "TARGET_Z15"
>    "mvcrl\t%0,%1"
>    [(set_attr "op_type" "SSE")])
> 
> What I dislike is that I didn't find a way to verify hard register names
> during genoutput, i.e., ensuring that the name is valid after all.  This
> is due to the fact how reg_names is defined which cannot be accessed by
> genoutput.  The same holds true for REGISTER_NAMES et al. which may
> reference some target specific variable (see e.g. POWER).  Thus, in case
> of an invalid register name in a machine description file we do not
> end-up with a genoutput-time error but instead fail at run-time in
> process_alt_operands():
> 
>     case '{':
>         {
>           int regno = parse_constraint_regname (p);
>           gcc_assert (regno >= 0);
>           cl = REGNO_REG_CLASS (regno);
>           CLEAR_HARD_REG_SET (hregset);
>           SET_HARD_REG_BIT (hregset, regno);
>           cl_filter = &hregset;
>           goto reg;
>         }
> 
> This is rather unfortunate but I couldn't find a way how to validate
> register names during genoutput.  If no one else has an idea I will
> replace gcc_assert with a more expressive error message.
> 
> What's next?
> 
> I was thinking about replacing register asm with the new hard register
> constraint.  This would solve problems like demonstrated by
> gcc.dg/asm-hard-reg-7.c.  For example, we could replace the constraint
> 
>     register int x asm ("r5") = 42;
>     asm ("foo   %0" :: "r" (x));
> 
> with
> 
>     register int x asm ("r5") = 42;
>     asm ("foo   %0" :: "{r5}" (x));
> 
> and ignore any further effect of the register asm.  However, I haven't
> really thought this through and there are certainly cases which are
> currently allowed which cannot trivially be converted as e.g. here:
> 
>     register int x asm ("r5") = 42;
>     asm ("foo   %0" :: "rd" (x));
> 
> Multiple alternatives are kind of strange in combination with register
> asm.  For example, on s390 the two constraints "r" and "d" restrict both
> to GPRs.  That is not a show stopper but certainly something which needs
> some consideration.  If you can think of some wild combinations/edge
> cases I would be happy to here about.  Anyhow, this is something for a
> further patch.
> 
> Last but not least, if there is enough consent to accept this feature, I
> will start writing up some documentation.
> 
> Bootstrapped and regtested on Arm, Intel, POWER, RISCV, S/390.  I have
> only verified the 32-bit tests via cross compilers and didn't execute
> them in contrast to 64-bit targets.
> ---
>   gcc/cfgexpand.cc                              |  42 -----
>   gcc/genoutput.cc                              |  12 ++
>   gcc/genpreds.cc                               |   4 +-
>   gcc/gimplify.cc                               | 134 ++++++++++++++-
>   gcc/lra-constraints.cc                        |  13 ++
>   gcc/recog.cc                                  |  11 +-
>   gcc/stmt.cc                                   | 155 +++++++++++++++++-
>   gcc/stmt.h                                    |  12 +-
>   gcc/testsuite/gcc.dg/asm-hard-reg-1.c         |  85 ++++++++++
>   gcc/testsuite/gcc.dg/asm-hard-reg-2.c         |  33 ++++
>   gcc/testsuite/gcc.dg/asm-hard-reg-3.c         |  25 +++
>   gcc/testsuite/gcc.dg/asm-hard-reg-4.c         |  50 ++++++
>   gcc/testsuite/gcc.dg/asm-hard-reg-5.c         |  36 ++++
>   gcc/testsuite/gcc.dg/asm-hard-reg-6.c         |  60 +++++++
>   gcc/testsuite/gcc.dg/asm-hard-reg-7.c         |  70 ++++++++
>   gcc/testsuite/gcc.dg/asm-hard-reg-error-1.c   |  67 ++++++++
>   gcc/testsuite/gcc.dg/asm-hard-reg-error-2.c   |  19 +++
>   gcc/testsuite/gcc.dg/asm-hard-reg-error-3.c   |  20 +++
>   gcc/testsuite/gcc.dg/pr87600-2.c              |  30 +---
>   gcc/testsuite/gcc.dg/pr87600-3.c              |  35 ++++
>   .../gcc.target/s390/asm-hard-reg-1.c          | 103 ++++++++++++
>   .../gcc.target/s390/asm-hard-reg-2.c          |  43 +++++
>   .../gcc.target/s390/asm-hard-reg-3.c          |  42 +++++
>   gcc/testsuite/lib/scanasm.exp                 |   4 +
>   24 files changed, 1020 insertions(+), 85 deletions(-)
>   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-1.c
>   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-2.c
>   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-3.c
>   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-4.c
>   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-5.c
>   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-6.c
>   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-7.c
>   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-error-1.c
>   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-error-2.c
>   create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-error-3.c
>   create mode 100644 gcc/testsuite/gcc.dg/pr87600-3.c
>   create mode 100644 gcc/testsuite/gcc.target/s390/asm-hard-reg-1.c
>   create mode 100644 gcc/testsuite/gcc.target/s390/asm-hard-reg-2.c
>   create mode 100644 gcc/testsuite/gcc.target/s390/asm-hard-reg-3.c
> 
> diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
> index dad3ae1b7c6..8bdc530626c 100644
> --- a/gcc/cfgexpand.cc
> +++ b/gcc/cfgexpand.cc
> @@ -2966,44 +2966,6 @@ expand_asm_loc (tree string, int vol, location_t locus)
>     emit_insn (body);
>   }
>   
> -/* Return the number of times character C occurs in string S.  */
> -static int
> -n_occurrences (int c, const char *s)
> -{
> -  int n = 0;
> -  while (*s)
> -    n += (*s++ == c);
> -  return n;
> -}
> -
> -/* A subroutine of expand_asm_operands.  Check that all operands have
> -   the same number of alternatives.  Return true if so.  */
> -
> -static bool
> -check_operand_nalternatives (const vec<const char *> &constraints)
> -{
> -  unsigned len = constraints.length();
> -  if (len > 0)
> -    {
> -      int nalternatives = n_occurrences (',', constraints[0]);
> -
> -      if (nalternatives + 1 > MAX_RECOG_ALTERNATIVES)
> -	{
> -	  error ("too many alternatives in %<asm%>");
> -	  return false;
> -	}
> -
> -      for (unsigned i = 1; i < len; ++i)
> -	if (n_occurrences (',', constraints[i]) != nalternatives)
> -	  {
> -	    error ("operand constraints for %<asm%> differ "
> -		   "in number of alternatives");
> -	    return false;
> -	  }
> -    }
> -  return true;
> -}
> -
>   /* Check for overlap between registers marked in CLOBBERED_REGS and
>      anything inappropriate in T.  Emit error and return the register
>      variable definition for error, NULL_TREE for ok.  */
> @@ -3169,10 +3131,6 @@ expand_asm_stmt (gasm *stmt)
>   	= TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
>       }
>   
> -  /* ??? Diagnose during gimplification?  */
> -  if (! check_operand_nalternatives (constraints))
> -    return;
> -
>     /* Count the number of meaningful clobbered registers, ignoring what
>        we would ignore later.  */
>     auto_vec<rtx> clobber_rvec;
> diff --git a/gcc/genoutput.cc b/gcc/genoutput.cc
> index efd81766bb5..c1efb043579 100644
> --- a/gcc/genoutput.cc
> +++ b/gcc/genoutput.cc
> @@ -1219,6 +1219,18 @@ mdep_constraint_len (const char *s, file_location loc, int opno)
>         if (!strncmp (s, p->name, p->namelen))
>   	return p->namelen;
>   
> +  if (*s == '{')
> +    {
> +      const char *end = s + 1;
> +      while (*end != '}' && *end != '"' && *end != '\0')
> +	++end;
> +      /* Similarly as in parse_constraint_regname(), consider any hard register
> +	 name longer than a few characters as an error.  */
> +      ptrdiff_t len = end - s;
> +      if (*end == '}' && len > 1 && len < 31)
> +	return len + 1;
> +    }
> +
>     error_at (loc, "error: undefined machine-specific constraint "
>   	    "at this point: \"%s\"", s);
>     message_at (loc, "note:  in operand %d", opno);
> diff --git a/gcc/genpreds.cc b/gcc/genpreds.cc
> index 55d149e8a40..0777cb7a4db 100644
> --- a/gcc/genpreds.cc
> +++ b/gcc/genpreds.cc
> @@ -1148,7 +1148,7 @@ write_insn_constraint_len (void)
>     unsigned int i;
>   
>     puts ("static inline size_t\n"
> -	"insn_constraint_len (char fc, const char *str ATTRIBUTE_UNUSED)\n"
> +	"insn_constraint_len (char fc, const char *str)\n"
>   	"{\n"
>   	"  switch (fc)\n"
>   	"    {");
> @@ -1181,6 +1181,8 @@ write_insn_constraint_len (void)
>   
>     puts ("    default: break;\n"
>   	"    }\n"
> +	"  if (str[0] == '{')\n"
> +	"      return ((const char *) rawmemchr (str + 1, '}') - str) + 1;\n"
>   	"  return 1;\n"
>   	"}\n");
>   }
> diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
> index ab323d764e8..abb705d49ac 100644
> --- a/gcc/gimplify.cc
> +++ b/gcc/gimplify.cc
> @@ -70,6 +70,9 @@ along with GCC; see the file COPYING3.  If not see
>   #include "omp-offload.h"
>   #include "context.h"
>   #include "tree-nested.h"
> +#include "insn-config.h"
> +#include "recog.h"
> +#include "output.h"
>   
>   /* Identifier for a basic condition, mapping it to other basic conditions of
>      its Boolean expression.  Basic conditions given the same uid (in the same
> @@ -6993,6 +6996,42 @@ gimplify_addr_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
>     return ret;
>   }
>   
> +/* Return the number of times character C occurs in string S.  */
> +
> +static int
> +num_occurrences (int c, const char *s)
> +{
> +  int n = 0;
> +  while (*s)
> +    n += (*s++ == c);
> +  return n;
> +}
> +
> +/* A subroutine of gimplify_asm_expr.  Check that all operands have
> +   the same number of alternatives.  Return -1 if this is violated.  Otherwise
> +   return the number of alternatives.  */
> +
> +static int
> +num_alternatives (const_tree link)
> +{
> +  if (link == nullptr)
> +    return 0;
> +
> +  const char *constraint = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (link)));
> +  int num = num_occurrences (',', constraint);
> +
> +  if (num + 1 > MAX_RECOG_ALTERNATIVES)
> +    return -1;
> +
> +  for (link = TREE_CHAIN (link); link; link = TREE_CHAIN (link))
> +    {
> +      constraint = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (link)));
> +      if (num_occurrences (',', constraint) != num)
> +	return -1;
> +    }
> +  return num + 1;
> +}
> +
>   /* Gimplify the operands of an ASM_EXPR.  Input operands should be a gimple
>      value; output operands should be a gimple lvalue.  */
>   
> @@ -7023,6 +7062,36 @@ gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
>     clobbers = NULL;
>     labels = NULL;
>   
> +  int num_alternatives_out = num_alternatives (ASM_OUTPUTS (expr));
> +  int num_alternatives_in = num_alternatives (ASM_INPUTS (expr));
> +  if (num_alternatives_out == -1 || num_alternatives_in == -1
> +      || (num_alternatives_out > 0 && num_alternatives_in > 0
> +	  && num_alternatives_out != num_alternatives_in))
> +    {
> +      error ("operand constraints for %<asm%> differ "
> +	     "in number of alternatives");
> +      return GS_ERROR;
> +    }
> +  int num_alternatives = MAX (num_alternatives_out, num_alternatives_in);
> +
> +  /* Regarding hard register constraints ensure that each hard register is used
> +     at most once over all inputs/outputs and each alternative.  Keep track in
> +     hardregs[0] which hard register is used via an asm register over all
> +     inputs/outputs.  hardregs[i] for i >= 2 describes which hard registers are
> +     used for alternative i-2 over all inputs/outputs.  hardregs[1] is a
> +     reduction of all alternatives, i.e., hardregs[1] |= hardregs[i] for i >= 2
> +     and describes whether a hard register is used in any alternative.  This is
> +     just a shortcut instead of recomputing the union over all alternatives;
> +     possibly multiple times.  */
> +  auto_vec<HARD_REG_SET> hardregs (num_alternatives + 2);
> +  std::pair <vec <HARD_REG_SET> *, machine_mode> hardreg_props = {&hardregs, VOIDmode};
> +  for (int i = 0; i < num_alternatives + 2; ++i)
> +    {
> +      HARD_REG_SET hregset;
> +      CLEAR_HARD_REG_SET (hregset);
> +      hardregs.quick_push (hregset);
> +    }
> +
>     ret = GS_ALL_DONE;
>     link_next = NULL_TREE;
>     for (i = 0, link = ASM_OUTPUTS (expr); link; ++i, link = link_next)
> @@ -7039,8 +7108,13 @@ gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
>         if (constraint_len == 0)
>           continue;
>   
> -      ok = parse_output_constraint (&constraint, i, 0, 0,
> -				    &allows_mem, &allows_reg, &is_inout);
> +      tree outtype = TREE_TYPE (TREE_VALUE (link));
> +      auto hardreg_props_p
> +	= outtype != error_mark_node
> +	? (hardreg_props.second = TYPE_MODE (outtype), &hardreg_props)
> +	: nullptr;
> +      ok = parse_output_constraint (&constraint, i, 0, 0, &allows_mem,
> +				    &allows_reg, &is_inout, hardreg_props_p);
>         if (!ok)
>   	{
>   	  ret = GS_ERROR;
> @@ -7049,7 +7123,6 @@ gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
>   
>         /* If we can't make copies, we can only accept memory.
>   	 Similarly for VLAs.  */
> -      tree outtype = TREE_TYPE (TREE_VALUE (link));
>         if (outtype != error_mark_node
>   	  && (TREE_ADDRESSABLE (outtype)
>   	      || !COMPLETE_TYPE_P (outtype)
> @@ -7111,6 +7184,24 @@ gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
>   	      TREE_VALUE (link) = tem;
>   	      tret = GS_OK;
>   	    }
> +	  if (VAR_P (op) && DECL_HARD_REGISTER (op))
> +	    {
> +	      tree id = DECL_ASSEMBLER_NAME (op);
> +	      const char *asmspec = IDENTIFIER_POINTER (id) + 1;
> +	      int hardreg = decode_reg_name (asmspec);
> +	      if (hardreg >= 0)
> +		{
> +		  if (TEST_HARD_REG_BIT (hardregs[0], hardreg)
> +		      || TEST_HARD_REG_BIT (hardregs[1], hardreg))
> +		    {
> +		      error ("multiple outputs to hard register: %s",
> +			     reg_names[hardreg]);
> +		      return GS_ERROR;
> +		    }
> +		  else
> +		    SET_HARD_REG_BIT (hardregs[0], hardreg);
> +		}
> +	    }
>   	}
>   
>         vec_safe_push (outputs, link);
> @@ -7210,16 +7301,29 @@ gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
>   	}
>       }
>   
> +  for (unsigned int i = 0; i < hardregs.length (); ++i)
> +    CLEAR_HARD_REG_SET (hardregs[i]);
> +
>     link_next = NULL_TREE;
>     for (link = ASM_INPUTS (expr); link; ++i, link = link_next)
>       {
>         link_next = TREE_CHAIN (link);
>         constraint = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (link)));
> -      parse_input_constraint (&constraint, 0, 0, noutputs, 0,
> -			      oconstraints, &allows_mem, &allows_reg);
> +      tree intype = TREE_TYPE (TREE_VALUE (link));
> +      auto hardreg_props_p
> +	= intype != error_mark_node
> +	? (hardreg_props.second = TYPE_MODE (intype), &hardreg_props)
> +	: nullptr;
> +      bool ok = parse_input_constraint (&constraint, 0, 0, noutputs, 0,
> +					oconstraints, &allows_mem, &allows_reg,
> +					hardreg_props_p);
> +      if (!ok)
> +	{
> +	  ret = GS_ERROR;
> +	  is_inout = false;
> +	}
>   
>         /* If we can't make copies, we can only accept memory.  */
> -      tree intype = TREE_TYPE (TREE_VALUE (link));
>         if (intype != error_mark_node
>   	  && (TREE_ADDRESSABLE (intype)
>   	      || !COMPLETE_TYPE_P (intype)
> @@ -7290,6 +7394,24 @@ gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
>   				is_gimple_asm_val, fb_rvalue);
>   	  if (tret == GS_ERROR)
>   	    ret = tret;
> +	  tree inputv = TREE_VALUE (link);
> +	  if (VAR_P (inputv) && DECL_HARD_REGISTER (inputv))
> +	    {
> +	      tree id = DECL_ASSEMBLER_NAME (inputv);
> +	      const char *asmspec = IDENTIFIER_POINTER (id) + 1;
> +	      int hardreg = decode_reg_name (asmspec);
> +	      if (hardreg >= 0)
> +		{
> +		  if (TEST_HARD_REG_BIT (hardregs[1], hardreg))
> +		    {
> +		      error ("multiple inputs to hard register: %s",
> +			     reg_names[hardreg]);
> +		      return GS_ERROR;
> +		    }
> +		  else
> +		    SET_HARD_REG_BIT (hardregs[0], hardreg);
> +		}
> +	    }
>   	}
>   
>         TREE_CHAIN (link) = NULL_TREE;
> diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
> index 92b343fa99a..632c75ef01c 100644
> --- a/gcc/lra-constraints.cc
> +++ b/gcc/lra-constraints.cc
> @@ -114,6 +114,7 @@
>   #include "target.h"
>   #include "rtl.h"
>   #include "tree.h"
> +#include "stmt.h"
>   #include "predict.h"
>   #include "df.h"
>   #include "memmodel.h"
> @@ -2165,6 +2166,7 @@ process_alt_operands (int only_alternative)
>     bool costly_p;
>     enum reg_class cl;
>     const HARD_REG_SET *cl_filter;
> +  HARD_REG_SET hregset;
>   
>     /* Calculate some data common for all alternatives to speed up the
>        function.	*/
> @@ -2536,6 +2538,17 @@ process_alt_operands (int only_alternative)
>   		  cl_filter = nullptr;
>   		  goto reg;
>   
> +		case '{':
> +		    {
> +		      int regno = decode_hreg_constraint (p);
> +		      gcc_assert (regno >= 0);
> +		      cl = REGNO_REG_CLASS (regno);
> +		      CLEAR_HARD_REG_SET (hregset);
> +		      SET_HARD_REG_BIT (hregset, regno);
> +		      cl_filter = &hregset;
> +		      goto reg;
> +		    }
> +
>   		default:
>   		  cn = lookup_constraint (p);
>   		  switch (get_constraint_type (cn))
> diff --git a/gcc/recog.cc b/gcc/recog.cc
> index 54b317126c2..b604029d5f1 100644
> --- a/gcc/recog.cc
> +++ b/gcc/recog.cc
> @@ -25,6 +25,7 @@ along with GCC; see the file COPYING3.  If not see
>   #include "target.h"
>   #include "rtl.h"
>   #include "tree.h"
> +#include "stmt.h"
>   #include "cfghooks.h"
>   #include "df.h"
>   #include "memmodel.h"
> @@ -2333,7 +2334,8 @@ asm_operand_ok (rtx op, const char *constraint, const char **constraints)
>   	    {
>   	    case CT_REGISTER:
>   	      if (!result
> -		  && reg_class_for_constraint (cn) != NO_REGS
> +		  && (reg_class_for_constraint (cn) != NO_REGS
> +		      || constraint[0] == '{')
>   		  && GET_MODE (op) != BLKmode
>   		  && register_operand (op, VOIDmode))
>   		result = 1;
> @@ -3267,6 +3269,13 @@ constrain_operands (int strict, alternative_mask alternatives)
>   		  win = true;
>   		break;
>   
> +	      case '{':
> +		if ((REG_P (op) && HARD_REGISTER_P (op)
> +		     && (int) REGNO (op) == decode_hreg_constraint (p))
> +		    || !reload_completed)
> +		  win = true;
> +		break;
> +
>   	      default:
>   		{
>   		  enum constraint_num cn = lookup_constraint (p);
> diff --git a/gcc/stmt.cc b/gcc/stmt.cc
> index ae1527f0a19..7b073f8ce85 100644
> --- a/gcc/stmt.cc
> +++ b/gcc/stmt.cc
> @@ -39,6 +39,7 @@ along with GCC; see the file COPYING3.  If not see
>   #include "emit-rtl.h"
>   #include "pretty-print.h"
>   #include "diagnostic-core.h"
> +#include "output.h"
>   
>   #include "fold-const.h"
>   #include "varasm.h"
> @@ -174,6 +175,77 @@ expand_label (tree label)
>       maybe_set_first_label_num (label_r);
>   }
>   
> +/* Parse a hard register constraint and return its number or -1 in case of an
> +   error.  BEGIN should point to a string of the form "{regname}".  For the
> +   sake of simplicity assume that a register name is not longer than 31
> +   characters, if not error out.  */
> +
> +int
> +decode_hreg_constraint (const char *begin)
> +{
> +  if (*begin != '{')
> +    return -1;
> +  ++begin;
> +  const char *end = begin;
> +  while (*end != '}' && *end != '\0')
> +    ++end;
> +  if (*end != '}' || end == begin)
> +    return -1;
> +  ptrdiff_t len = end - begin;
> +  if (len >= 31)
> +    return -1;
> +  char regname[32];
> +  memcpy (regname, begin, len);
> +  regname[len] = '\0';
> +  int regno = decode_reg_name (regname);
> +  return regno;
> +}
> +
> +static bool
> +eliminable_regno_p (int regnum)
> +{
> +  static const struct
> +  {
> +    const int from;
> +    const int to;
> +  } eliminables[] = ELIMINABLE_REGS;
> +  for (size_t i = 0; i < ARRAY_SIZE (eliminables); i++)
> +    if (regnum == eliminables[i].from)
> +      return true;
> +  return false;
> +}
> +
> +/* Perform a similar check as done in make_decl_rtl().  */
> +
> +static bool
> +hardreg_ok_p (int reg_number, machine_mode mode, int operand_num)
> +{
> +  if (mode == BLKmode)
> +    error ("data type isn%'t suitable for register %s of operand %i",
> +	   reg_names[reg_number], operand_num);
> +  else if (!in_hard_reg_set_p (accessible_reg_set, mode, reg_number))
> +    error ("register %s for operand %i cannot be accessed"
> +	   " by the current target", reg_names[reg_number], operand_num);
> +  else if (!in_hard_reg_set_p (operand_reg_set, mode, reg_number))
> +    error ("register %s for operand %i is not general enough"
> +	   " to be used as a register variable", reg_names[reg_number], operand_num);
> +  else if (!targetm.hard_regno_mode_ok (reg_number, mode))
> +    error ("register %s for operand %i isn%'t suitable for data type",
> +	   reg_names[reg_number], operand_num);
> +  else if (reg_number != HARD_FRAME_POINTER_REGNUM
> +	   && (reg_number == FRAME_POINTER_REGNUM
> +#ifdef RETURN_ADDRESS_POINTER_REGNUM
> +	       || reg_number == RETURN_ADDRESS_POINTER_REGNUM
> +#endif
> +	       || reg_number == ARG_POINTER_REGNUM)
> +	   && eliminable_regno_p (reg_number))
> +    error ("register for operand %i is an internal GCC "
> +	   "implementation detail", operand_num);
> +  else
> +    return true;
> +  return false;
> +}
> +
>   /* Parse the output constraint pointed to by *CONSTRAINT_P.  It is the
>      OPERAND_NUMth output operand, indexed from zero.  There are NINPUTS
>      inputs and NOUTPUTS outputs to this extended-asm.  Upon return,
> @@ -190,7 +262,9 @@ expand_label (tree label)
>   bool
>   parse_output_constraint (const char **constraint_p, int operand_num,
>   			 int ninputs, int noutputs, bool *allows_mem,
> -			 bool *allows_reg, bool *is_inout)
> +			 bool *allows_reg, bool *is_inout,
> +			 const std::pair <vec <HARD_REG_SET> *, machine_mode>
> +			 *hardreg_props)
>   {
>     const char *constraint = *constraint_p;
>     const char *p;
> @@ -244,6 +318,8 @@ parse_output_constraint (const char **constraint_p, int operand_num,
>         constraint = *constraint_p;
>       }
>   
> +  unsigned int alternative = 2;
> +
>     /* Loop through the constraint string.  */
>     for (p = constraint + 1; *p; )
>       {
> @@ -268,7 +344,11 @@ parse_output_constraint (const char **constraint_p, int operand_num,
>   	case 'E':  case 'F':  case 'G':  case 'H':
>   	case 's':  case 'i':  case 'n':
>   	case 'I':  case 'J':  case 'K':  case 'L':  case 'M':
> -	case 'N':  case 'O':  case 'P':  case ',':
> +	case 'N':  case 'O':  case 'P':
> +	  break;
> +
> +	case ',':
> +	  ++alternative;
>   	  break;
>   
>   	case '0':  case '1':  case '2':  case '3':  case '4':
> @@ -289,6 +369,36 @@ parse_output_constraint (const char **constraint_p, int operand_num,
>   	  *allows_mem = true;
>   	  break;
>   
> +	case '{':
> +	  {
> +	    int regno = decode_hreg_constraint (p);
> +	    if (regno < 0)
> +	      {
> +		error ("invalid output constraint: %s", p);
> +		return false;
> +	      }
> +	    if (hardreg_props)
> +	      {
> +		vec<HARD_REG_SET> *hardregs = hardreg_props->first;
> +		if (TEST_HARD_REG_BIT ((*hardregs)[0], regno)
> +		    || TEST_HARD_REG_BIT ((*hardregs)[alternative], regno))
> +		  {
> +		    error ("multiple outputs to hard register: %s",
> +			   reg_names[regno]);
> +		    return false;
> +		  }
> +		else
> +		  {
> +		    SET_HARD_REG_BIT ((*hardregs)[1], regno);
> +		    SET_HARD_REG_BIT ((*hardregs)[alternative], regno);
> +		  }
> +		if (!hardreg_ok_p (regno, hardreg_props->second, operand_num))
> +		  return false;
> +	      }
> +	    *allows_reg = true;
> +	    break;
> +	  }
> +
>   	default:
>   	  if (!ISALPHA (*p))
>   	    break;
> @@ -317,7 +427,9 @@ bool
>   parse_input_constraint (const char **constraint_p, int input_num,
>   			int ninputs, int noutputs, int ninout,
>   			const char * const * constraints,
> -			bool *allows_mem, bool *allows_reg)
> +			bool *allows_mem, bool *allows_reg,
> +			const std::pair <vec<HARD_REG_SET> *, machine_mode>
> +			*hardreg_props)
>   {
>     const char *constraint = *constraint_p;
>     const char *orig_constraint = constraint;
> @@ -332,6 +444,8 @@ parse_input_constraint (const char **constraint_p, int input_num,
>   
>     /* Make sure constraint has neither `=', `+', nor '&'.  */
>   
> +  unsigned int alternative = 2;
> +
>     for (j = 0; j < c_len; j += CONSTRAINT_LEN (constraint[j], constraint+j))
>       switch (constraint[j])
>         {
> @@ -358,7 +472,11 @@ parse_input_constraint (const char **constraint_p, int input_num,
>         case 'E':  case 'F':  case 'G':  case 'H':
>         case 's':  case 'i':  case 'n':
>         case 'I':  case 'J':  case 'K':  case 'L':  case 'M':
> -      case 'N':  case 'O':  case 'P':  case ',':
> +      case 'N':  case 'O':  case 'P':
> +	break;
> +
> +      case ',':
> +	++alternative;
>   	break;
>   
>   	/* Whether or not a numeric constraint allows a register is
> @@ -408,6 +526,35 @@ parse_input_constraint (const char **constraint_p, int input_num,
>   	*allows_mem = true;
>   	break;
>   
> +      case '{':
> +	{
> +	  int regno = decode_hreg_constraint (constraint + j);
> +	  if (regno < 0)
> +	    {
> +	      error ("invalid input constraint: %s", constraint + j);
> +	      return false;
> +	    }
> +	  if (hardreg_props)
> +	    {
> +	      vec <HARD_REG_SET> *hardregs = hardreg_props->first;
> +	      if (TEST_HARD_REG_BIT ((*hardregs)[0], regno)
> +		  || TEST_HARD_REG_BIT ((*hardregs)[alternative], regno))
> +		{
> +		  error ("multiple inputs to hard register: %s",
> +			    reg_names[regno]);
> +		}
> +	      else
> +		{
> +		  SET_HARD_REG_BIT ((*hardregs)[1], regno);
> +		  SET_HARD_REG_BIT ((*hardregs)[alternative], regno);
> +		}
> +	      if (!hardreg_ok_p (regno, hardreg_props->second, input_num))
> +		return false;
> +	    }
> +	  *allows_reg = true;
> +	  break;
> +	}
> +
>         default:
>   	if (! ISALPHA (constraint[j]))
>   	  {
> diff --git a/gcc/stmt.h b/gcc/stmt.h
> index a2caae7121b..a380ecd8cbf 100644
> --- a/gcc/stmt.h
> +++ b/gcc/stmt.h
> @@ -20,11 +20,19 @@ along with GCC; see the file COPYING3.  If not see
>   #ifndef GCC_STMT_H
>   #define GCC_STMT_H
>   
> +#include "target.h"
> +#include "hard-reg-set.h"
> +
>   extern void expand_label (tree);
>   extern bool parse_output_constraint (const char **, int, int, int,
> -				     bool *, bool *, bool *);
> +				     bool *, bool *, bool *,
> +				     const std::pair <vec <HARD_REG_SET> *,
> +						      machine_mode> * = nullptr);
>   extern bool parse_input_constraint (const char **, int, int, int, int,
> -				    const char * const *, bool *, bool *);
> +				    const char * const *, bool *, bool *,
> +				    const std::pair <vec <HARD_REG_SET> *,
> +						     machine_mode> * = nullptr);
> +extern int decode_hreg_constraint (const char *);
>   extern tree resolve_asm_operand_names (tree, tree, tree, tree);
>   #ifdef HARD_CONST
>   /* Silly ifdef to avoid having all includers depend on hard-reg-set.h.  */
> diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-1.c b/gcc/testsuite/gcc.dg/asm-hard-reg-1.c
> new file mode 100644
> index 00000000000..6a5a9ada45f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-1.c
> @@ -0,0 +1,85 @@
> +/* { dg-do compile { target aarch64*-*-* arm*-*-* i?86-*-* powerpc*-*-* riscv*-*-* s390*-*-* x86_64-*-* } } */
> +
> +#if defined (__aarch64__)
> +# define GPR "{x4}"
> +/* { dg-final { scan-assembler-times "foo\tx4" 8 { target { aarch64*-*-* } } } } */
> +#elif defined (__arm__)
> +# define GPR "{r4}"
> +/* { dg-final { scan-assembler-times "foo\tr4" 8 { target { arm*-*-* } } } } */
> +#elif defined (__i386__)
> +# define GPR "{ecx}"
> +/* { dg-final { scan-assembler-times "foo\t%cl" 2 { target { i?86-*-* } } } } */
> +/* { dg-final { scan-assembler-times "foo\t%cx" 2 { target { i?86-*-* } } } } */
> +/* { dg-final { scan-assembler-times "foo\t%ecx" 4 { target { i?86-*-* } } } } */
> +#elif defined (__powerpc__) || defined (__POWERPC__)
> +# define GPR "{r5}"
> +/* { dg-final { scan-assembler-times "foo\t5" 8 { target { powerpc*-*-* } } } } */
> +#elif defined (__riscv)
> +# define GPR "{t5}"
> +/* { dg-final { scan-assembler-times "foo\tt5" 8 { target { riscv*-*-* } } } } */
> +#elif defined (__s390__)
> +# define GPR "{r4}"
> +/* { dg-final { scan-assembler-times "foo\t%r4" 8 { target { s390*-*-* } } } } */
> +#elif defined (__x86_64__)
> +# define GPR "{rcx}"
> +/* { dg-final { scan-assembler-times "foo\t%cl" 2 { target { x86_64-*-* } } } } */
> +/* { dg-final { scan-assembler-times "foo\t%cx" 2 { target { x86_64-*-* } } } } */
> +/* { dg-final { scan-assembler-times "foo\t%ecx" 2 { target { x86_64-*-* } } } } */
> +/* { dg-final { scan-assembler-times "foo\t%rcx" 2 { target { x86_64-*-* } } } } */
> +#endif
> +
> +char
> +test_char (char x)
> +{
> +  __asm__ ("foo\t%0" : "+"GPR (x));
> +  return x;
> +}
> +
> +char
> +test_char_from_mem (char *x)
> +{
> +  __asm__ ("foo\t%0" : "+"GPR (*x));
> +  return *x;
> +}
> +
> +short
> +test_short (short x)
> +{
> +  __asm__ ("foo\t%0" : "+"GPR (x));
> +  return x;
> +}
> +
> +short
> +test_short_from_mem (short *x)
> +{
> +  __asm__ ("foo\t%0" : "+"GPR (*x));
> +  return *x;
> +}
> +
> +int
> +test_int (int x)
> +{
> +  __asm__ ("foo\t%0" : "+"GPR (x));
> +  return x;
> +}
> +
> +int
> +test_int_from_mem (int *x)
> +{
> +  __asm__ ("foo\t%0" : "+"GPR (*x));
> +  return *x;
> +}
> +
> +long
> +test_long (long x)
> +{
> +  __asm__ ("foo\t%0" : "+"GPR (x));
> +  return x;
> +}
> +
> +long
> +test_long_from_mem (long *x)
> +{
> +  __asm__ ("foo\t%0" : "+"GPR (*x));
> +  return *x;
> +}
> diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-2.c b/gcc/testsuite/gcc.dg/asm-hard-reg-2.c
> new file mode 100644
> index 00000000000..7dabf9657cb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-2.c
> @@ -0,0 +1,33 @@
> +/* { dg-do compile { target aarch64*-*-* powerpc64*-*-* riscv64-*-* s390*-*-* x86_64-*-* } } */
> +/* { dg-options "-std=c99" } we need long long */
> +
> +#if defined (__aarch64__)
> +# define GPR "{x4}"
> +/* { dg-final { scan-assembler-times "foo\tx4" 2 { target { aarch64*-*-* } } } } */
> +#elif defined (__powerpc__) || defined (__POWERPC__)
> +# define GPR "{r5}"
> +/* { dg-final { scan-assembler-times "foo\t5" 2 { target { powerpc64*-*-* } } } } */
> +#elif defined (__riscv)
> +# define GPR "{t5}"
> +/* { dg-final { scan-assembler-times "foo\tt5" 2 { target { riscv64-*-* } } } } */
> +#elif defined (__s390__)
> +# define GPR "{r4}"
> +/* { dg-final { scan-assembler-times "foo\t%r4" 2 { target { s390*-*-* } } } } */
> +#elif defined (__x86_64__)
> +# define GPR "{rcx}"
> +/* { dg-final { scan-assembler-times "foo\t%rcx" 2 { target { x86_64-*-* } } } } */
> +#endif
> +
> +long long
> +test_longlong (long long x)
> +{
> +  __asm__ ("foo\t%0" : "+"GPR (x));
> +  return x;
> +}
> +
> +long long
> +test_longlong_from_mem (long long *x)
> +{
> +  __asm__ ("foo\t%0" : "+"GPR (*x));
> +  return *x;
> +}
> diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-3.c b/gcc/testsuite/gcc.dg/asm-hard-reg-3.c
> new file mode 100644
> index 00000000000..fa4472ae8a8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-3.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile { target { { aarch64*-*-* powerpc64*-*-* riscv64-*-* s390*-*-* x86_64-*-* } && int128 } } } */
> +/* { dg-options "-O2" } get rid of -ansi since we use __int128 */
> +
> +#if defined (__aarch64__)
> +# define REG "{x4}"
> +/* { dg-final { scan-assembler-times "foo\tx4" 1 { target { aarch64*-*-* } } } } */
> +#elif defined (__powerpc__) || defined (__POWERPC__)
> +# define REG "{r5}"
> +/* { dg-final { scan-assembler-times "foo\t5" 1 { target { powerpc*-*-* } } } } */
> +#elif defined (__riscv)
> +# define REG "{t5}"
> +/* { dg-final { scan-assembler-times "foo\tt5" 1 { target { riscv*-*-* } } } } */
> +#elif defined (__s390__)
> +# define REG "{r4}"
> +/* { dg-final { scan-assembler-times "foo\t%r4" 1 { target { s390*-*-* } } } } */
> +#elif defined (__x86_64__)
> +# define REG "{xmm0}"
> +/* { dg-final { scan-assembler-times "foo\t%xmm0" 1 { target { x86_64-*-* } } } } */
> +#endif
> +
> +void
> +test (void)
> +{
> +  __asm__ ("foo\t%0" :: REG ((__int128) 42));
> +}
> diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-4.c b/gcc/testsuite/gcc.dg/asm-hard-reg-4.c
> new file mode 100644
> index 00000000000..0816df8f719
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-4.c
> @@ -0,0 +1,50 @@
> +/* { dg-do compile { target aarch64*-*-* arm*-*-* powerpc*-*-* riscv*-*-* s390*-*-* x86_64-*-* } } */
> +
> +#if defined (__aarch64__)
> +# define FPR "{d5}"
> +/* { dg-final { scan-assembler-times "foo\tv5" 4 { target { aarch64*-*-* } } } } */
> +#elif defined (__arm__)
> +# define FPR "{d5}"
> +/* { dg-additional-options "-march=armv7-a+fp -mfloat-abi=hard" { target arm*-*-* } } */
> +/* { dg-final { scan-assembler-times "foo\ts10" 4 { target { arm*-*-* } } } } */
> +#elif defined (__powerpc__) || defined (__POWERPC__)
> +# define FPR "{5}"
> +/* { dg-final { scan-assembler-times "foo\t5" 4 { target { powerpc*-*-* } } } } */
> +#elif defined (__riscv)
> +# define FPR "{f5}"
> +/* { dg-final { scan-assembler-times "foo\tf5" 4 { target { rsicv*-*-* } } } } */
> +#elif defined (__s390__)
> +# define FPR "{f5}"
> +/* { dg-final { scan-assembler-times "foo\t%f5" 4 { target { s390*-*-* } } } } */
> +#elif defined (__x86_64__)
> +# define FPR "{xmm5}"
> +/* { dg-final { scan-assembler-times "foo\t%xmm5" 4 { target { x86_64-*-* } } } } */
> +#endif
> +
> +float
> +test_float (float x)
> +{
> +  __asm__ ("foo\t%0" : "+"FPR (x));
> +  return x;
> +}
> +
> +float
> +test_float_from_mem (float *x)
> +{
> +  __asm__ ("foo\t%0" : "+"FPR (*x));
> +  return *x;
> +}
> +
> +double
> +test_double (double x)
> +{
> +  __asm__ ("foo\t%0" : "+"FPR (x));
> +  return x;
> +}
> +
> +double
> +test_double_from_mem (double *x)
> +{
> +  __asm__ ("foo\t%0" : "+"FPR (*x));
> +  return *x;
> +}
> diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-5.c b/gcc/testsuite/gcc.dg/asm-hard-reg-5.c
> new file mode 100644
> index 00000000000..a9e25ce1746
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-5.c
> @@ -0,0 +1,36 @@
> +/* { dg-do compile { target aarch64*-*-* powerpc64*-*-* riscv64-*-* s390*-*-* x86_64-*-* } } */
> +
> +typedef int V __attribute__ ((vector_size (4 * sizeof (int))));
> +
> +#if defined (__aarch64__)
> +# define VR "{v20}"
> +/* { dg-final { scan-assembler-times "foo\tv20" 2 { target { aarch64*-*-* } } } } */
> +#elif defined (__powerpc__) || defined (__POWERPC__)
> +# define VR "{v5}"
> +/* { dg-final { scan-assembler-times "foo\t5" 2 { target { powerpc64*-*-* } } } } */
> +#elif defined (__riscv)
> +# define VR "{v5}"
> +/* { dg-additional-options "-march=rv64imv" { target riscv64-*-* } } */
> +/* { dg-final { scan-assembler-times "foo\tv5" 2 { target { riscv*-*-* } } } } */
> +#elif defined (__s390__)
> +# define VR "{v5}"
> +/* { dg-require-effective-target s390_mvx { target s390*-*-* } } */
> +/* { dg-final { scan-assembler-times "foo\t%v5" 2 { target s390*-*-* } } } */
> +#elif defined (__x86_64__)
> +# define VR "{xmm9}"
> +/* { dg-final { scan-assembler-times "foo\t%xmm9" 2 { target { x86_64-*-* } } } } */
> +#endif
> +
> +V
> +test (V x)
> +{
> +  __asm__ ("foo\t%0" : "+"VR (x));
> +  return x;
> +}
> +
> +V
> +test_from_mem (V *x)
> +{
> +  __asm__ ("foo\t%0" : "+"VR (*x));
> +  return *x;
> +}
> diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-6.c b/gcc/testsuite/gcc.dg/asm-hard-reg-6.c
> new file mode 100644
> index 00000000000..d9b7fae8097
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-6.c
> @@ -0,0 +1,60 @@
> +/* { dg-do compile { target aarch64*-*-* arm*-*-* i?86-*-* powerpc*-*-* riscv*-*-* s390*-*-* x86_64-*-* } } */
> +/* { dg-options "-O2" } */
> +
> +/* Test multiple alternatives.  */
> +
> +#if defined (__aarch64__)
> +# define GPR1 "{x1}"
> +# define GPR2 "{x2}"
> +# define GPR3 "{x3}"
> +/* { dg-final { scan-assembler-times "foo\tx1,x3" 1 { target { aarch64*-*-* } } } } */
> +/* { dg-final { scan-assembler-times "bar\tx2,\\\[x1\\\]" 1 { target { aarch64*-*-* } } } } */
> +#elif defined (__arm__)
> +# define GPR1 "{r1}"
> +# define GPR2 "{r2}"
> +# define GPR3 "{r3}"
> +/* { dg-final { scan-assembler-times "foo\tr1,r3" 1 { target { arm*-*-* } } } } */
> +/* { dg-final { scan-assembler-times "bar\tr2,\\\[r1\\\]" 1 { target { arm*-*-* } } } } */
> +#elif defined (__i386__)
> +# define GPR1 "{eax}"
> +# define GPR2 "{ebx}"
> +# define GPR3 "{ecx}"
> +/* { dg-final { scan-assembler-times "foo\t4\\(%esp\\),%ecx" 1 { target { i?86-*-* } } } } */
> +/* { dg-final { scan-assembler-times "bar\t%ebx,\\(%eax\\)" 1 { target { i?86-*-* } } } } */
> +#elif defined (__powerpc__) || defined (__POWERPC__)
> +# define GPR1 "{r4}"
> +# define GPR2 "{r5}"
> +# define GPR3 "{r6}"
> +/* { dg-final { scan-assembler-times "foo\t4,6" 1 { target { powerpc*-*-* } } } } */
> +/* { dg-final { scan-assembler-times "bar\t5,0\\(4\\)" 1 { target { powerpc*-*-* } } } } */
> +#elif defined (__riscv)
> +# define GPR1 "{t1}"
> +# define GPR2 "{t2}"
> +# define GPR3 "{t3}"
> +/* { dg-final { scan-assembler-times "foo\tt1,t3" 1 { target { riscv*-*-* } } } } */
> +/* { dg-final { scan-assembler-times "bar\tt2,0\\(a1\\)" 1 { target { riscv*-*-* } } } } */
> +#elif defined (__s390__)
> +# define GPR1 "{r0}"
> +# define GPR2 "{r1}"
> +# define GPR3 "{r2}"
> +/* { dg-final { scan-assembler-times "foo\t%r0,%r2" 1 { target { s390*-*-* } } } } */
> +/* { dg-final { scan-assembler-times "bar\t%r1,0\\(%r3\\)" 1 { target { s390*-*-* } } } } */
> +#elif defined (__x86_64__)
> +# define GPR1 "{eax}"
> +# define GPR2 "{ebx}"
> +# define GPR3 "{rcx}"
> +/* { dg-final { scan-assembler-times "foo\t%eax,%rcx" 1 { target { x86_64-*-* } } } } */
> +/* { dg-final { scan-assembler-times "bar\t%ebx,\\(%rsi\\)" 1 { target { x86_64-*-* } } } } */
> +#endif
> +
> +void
> +test_reg_reg (int x, long long *y)
> +{
> +  __asm__ ("foo\t%0,%1" :: GPR1"m,"GPR2 (x), GPR3",m" (y));
> +}
> +
> +void
> +test_reg_mem (int x, long long *y)
> +{
> +  __asm__ ("bar\t%0,%1" :: GPR1"m,"GPR2 (x), GPR3",m" (*y));
> +}
> diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-7.c b/gcc/testsuite/gcc.dg/asm-hard-reg-7.c
> new file mode 100644
> index 00000000000..39c4497ecaf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-7.c
> @@ -0,0 +1,70 @@
> +/* { dg-do run { target aarch64*-*-* powerpc64*-*-* riscv64-*-* s390*-*-* x86_64-*-* } } */
> +/* { dg-options "-O2" } */
> +
> +/* Pass parameter x in the first general argument register to the assembler
> +   instruction.
> +
> +   In function bar we fail to do so because after the function call to foo,
> +   variable argreg1 does not contain the value of x but rather 42 which got
> +   passed to foo.  Thus, the function always returns 42.  In contrast in
> +   function baz, variable x is saved over the function call and materializes in
> +   the asm statement and therefore is returned.  */
> +
> +#if defined (__aarch64__)
> +# define REG register int argreg1 __asm__ ("x0") = x;
> +# define MOVE1 __asm__ ("mov\t%0,%1" : "=r" (out) : "r" (argreg1));
> +# define MOVE2 __asm__ ("mov\t%0,%1" : "=r" (out) : "{x0}" (x));
> +#elif defined (__powerpc__) || defined (__POWERPC__)
> +# define REG register int argreg1 __asm__ ("r3") = x;
> +# define MOVE1 __asm__ ("mr\t%0,%1" : "=r" (out) : "r" (argreg1));
> +# define MOVE2 __asm__ ("mr\t%0,%1" : "=r" (out) : "{r3}" (x));
> +#elif defined (__riscv)
> +# define REG register int argreg1 __asm__ ("a0") = x;
> +# define MOVE1 __asm__ ("mv\t%0,%1" : "=r" (out) : "r" (argreg1));
> +# define MOVE2 __asm__ ("mv\t%0,%1" : "=r" (out) : "{a0}" (x));
> +#elif defined (__s390__)
> +# define REG register int argreg1 __asm__ ("r2") = x;
> +# define MOVE1 __asm__ ("lr\t%0,%1" : "=r" (out) : "r" (argreg1));
> +# define MOVE2 __asm__ ("lr\t%0,%1" : "=r" (out) : "{r2}" (x));
> +#elif defined (__x86_64__)
> +# define REG register int argreg1 __asm__ ("edi") = x;
> +# define MOVE1 __asm__ ("mov\t%1,%0" : "=r" (out) : "r" (argreg1));
> +# define MOVE2 __asm__ ("mov\t%1,%0" : "=r" (out) : "{edi}" (x));
> +#endif
> +
> +__attribute__ ((noipa))
> +int foo (int unused) { }
> +
> +int
> +bar (int x)
> +{
> +  int out;
> +  REG
> +  foo (42);
> +  MOVE1
> +  return out;
> +}
> +
> +int
> +baz (int x)
> +{
> +  int out;
> +  foo (42);
> +  MOVE2
> +  return out;
> +}
> +
> +int
> +main (void)
> +{
> +  if (bar (0) != 42
> +      || bar (1) != 42
> +      || bar (2) != 42
> +      || bar (32) != 42
> +      || baz (0) != 0
> +      || baz (1) != 1
> +      || baz (2) != 2
> +      || baz (32) != 32)
> +    __builtin_abort ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-error-1.c b/gcc/testsuite/gcc.dg/asm-hard-reg-error-1.c
> new file mode 100644
> index 00000000000..6060c0946da
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-error-1.c
> @@ -0,0 +1,67 @@
> +/* { dg-do compile { target aarch64*-*-* arm*-*-* i?86-*-* powerpc*-*-* riscv*-*-* s390*-*-* x86_64-*-* } } */
> +
> +#if defined (__aarch64__)
> +# define GPR1_RAW "x0"
> +# define GPR2 "{x1}"
> +# define GPR3 "{x2}"
> +# define INVALID_GPR_A "{x31}"
> +#elif defined (__arm__)
> +# define GPR1_RAW "r0"
> +# define GPR2 "{r1}"
> +# define GPR3 "{r2}"
> +# define INVALID_GPR_A "{r16}"
> +#elif defined (__i386__)
> +# define GPR1_RAW "%eax"
> +# define GPR2 "{%ebx}"
> +# define GPR3 "{%edx}"
> +# define INVALID_GPR_A "{%eex}"
> +#elif defined (__powerpc__) || defined (__POWERPC__)
> +# define GPR1_RAW "r4"
> +# define GPR2 "{r5}"
> +# define GPR3 "{r6}"
> +# define INVALID_GPR_A "{r33}"
> +#elif defined (__riscv)
> +# define GPR1_RAW "t4"
> +# define GPR2 "{t5}"
> +# define GPR3 "{t6}"
> +# define INVALID_GPR_A "{t7}"
> +#elif defined (__s390__)
> +# define GPR1_RAW "r4"
> +# define GPR2 "{r5}"
> +# define GPR3 "{r6}"
> +# define INVALID_GPR_A "{r17}"
> +#elif defined (__x86_64__)
> +# define GPR1_RAW "rax"
> +# define GPR2 "{rbx}"
> +# define GPR3 "{rcx}"
> +# define INVALID_GPR_A "{rex}"
> +#endif
> +
> +#define GPR1 "{"GPR1_RAW"}"
> +#define INVALID_GPR_B "{"GPR1_RAW
> +
> +struct { int a[128]; } s = {0};
> +
> +void
> +test (void)
> +{
> +  int x, y;
> +  register int gpr1 __asm__ (GPR1_RAW) = 0;
> +
> +  __asm__ ("" :: "{}" (42)); /* { dg-error "invalid input constraint: \{\}" } */
> +  __asm__ ("" :: INVALID_GPR_A (42)); /* { dg-error "invalid input constraint" } */
> +  __asm__ ("" :: INVALID_GPR_B (42)); /* { dg-error "invalid input constraint" } */
> +
> +  __asm__ ("" :: GPR1 (s)); /* { dg-error "data type isn't suitable for register .* of operand 0" } */
> +
> +  __asm__ ("" :: "r" (gpr1), GPR1 (42)); /* { dg-error "multiple inputs to hard register" } */
> +  __asm__ ("" :: GPR1 (42), "r" (gpr1)); /* { dg-error "multiple inputs to hard register" } */
> +  __asm__ ("" :: GPR1 (42), GPR1 (42)); /* { dg-error "multiple inputs to hard register" } */
> +  __asm__ ("" :: GPR1","GPR2 (42), GPR2","GPR3 (42));
> +  __asm__ ("" :: GPR1","GPR2 (42), GPR3","GPR2 (42)); /* { dg-error "multiple inputs to hard register" } */
> +  __asm__ ("" :: GPR1","GPR2 (42), GPR1","GPR3 (42)); /* { dg-error "multiple inputs to hard register" } */
> +  __asm__ ("" :: GPR1 GPR2 (42), GPR2 (42)); /* { dg-error "multiple inputs to hard register" } */
> +  __asm__ ("" : "+"GPR1 (x), "="GPR1 (y)); /* { dg-error "multiple outputs to hard register" } */
> +  __asm__ ("" : "="GPR1 (y) : GPR1 (42), "0" (42)); /* { dg-error "multiple inputs to hard register" } */
> +  __asm__ ("" : "+"GPR1 (x) : GPR1 (42)); /* { dg-error "multiple inputs to hard register" } */
> +}
> diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-error-2.c b/gcc/testsuite/gcc.dg/asm-hard-reg-error-2.c
> new file mode 100644
> index 00000000000..efa843e0800
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-error-2.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile { target { { aarch64*-*-* s390x-*-* } && int128 } } } */
> +/* { dg-options "-O2" } get rid of -ansi since we use __int128 */
> +
> +/* Test register pairs.  */
> +
> +#if defined (__aarch64__)
> +# define GPR "{x4}"
> +# define INVALID_GPR "{x5}"
> +#elif defined (__s390__)
> +# define GPR "{r4}"
> +# define INVALID_GPR "{r5}"
> +#endif
> +
> +void
> +test (void)
> +{
> +  __asm__ ("" :: GPR ((__int128) 42));
> +  __asm__ ("" :: INVALID_GPR ((__int128) 42)); /* { dg-error "register .* for operand 0 isn't suitable for data type" } */
> +}
> diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-error-3.c b/gcc/testsuite/gcc.dg/asm-hard-reg-error-3.c
> new file mode 100644
> index 00000000000..ef8af5a6d52
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-error-3.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile { target arm*-*-* s390-*-* } } */
> +/* { dg-options "-std=c99" } we need long long */
> +/* { dg-additional-options "-march=armv8-a" { target arm*-*-* } } */
> +
> +/* Test register pairs.  */
> +
> +#if defined (__arm__)
> +# define GPR "{r4}"
> +# define INVALID_GPR "{r5}"
> +#elif defined (__s390__)
> +# define GPR "{r4}"
> +# define INVALID_GPR "{r5}"
> +#endif
> +
> +void
> +test (void)
> +{
> +  __asm__ ("" :: GPR (42ll));
> +  __asm__ ("" :: INVALID_GPR (42ll)); /* { dg-error "register .* for operand 0 isn't suitable for data type" } */
> +}
> diff --git a/gcc/testsuite/gcc.dg/pr87600-2.c b/gcc/testsuite/gcc.dg/pr87600-2.c
> index e8a9f194b73..860d3f965ef 100644
> --- a/gcc/testsuite/gcc.dg/pr87600-2.c
> +++ b/gcc/testsuite/gcc.dg/pr87600-2.c
> @@ -11,34 +11,6 @@ test0 (void)
>   {
>     register long var1 asm (REG1);
>     register long var2 asm (REG1);
> -  asm ("blah %0 %1" : "=r" (var1), "=r" (var2)); /* { dg-error "invalid hard register usage between output operands" } */
> +  asm ("blah %0 %1" : "=r" (var1), "=r" (var2)); /* { dg-error "multiple outputs to hard register" } */
>     return var1;
>   }
> -
> -long
> -test1 (void)
> -{
> -  register long var1 asm (REG1);
> -  register long var2 asm (REG2);
> -  asm ("blah %0 %1" : "=r" (var1) : "0" (var2)); /* { dg-error "invalid hard register usage between output operand and matching constraint operand" } */
> -  return var1;
> -}
> -
> -long
> -test2 (void)
> -{
> -  register long var1 asm (REG1);
> -  register long var2 asm (REG1);
> -  asm ("blah %0 %1" : "=&r" (var1) : "r" (var2)); /* { dg-error "invalid hard register usage between earlyclobber operand and input operand" } */
> -  return var1;
> -}
> -
> -long
> -test3 (void)
> -{
> -  register long var1 asm (REG1);
> -  register long var2 asm (REG1);
> -  long var3;
> -  asm ("blah %0 %1" : "=&r" (var1), "=r" (var3) : "1" (var2)); /* { dg-error "invalid hard register usage between earlyclobber operand and input operand" } */
> -  return var1 + var3;
> -}
> diff --git a/gcc/testsuite/gcc.dg/pr87600-3.c b/gcc/testsuite/gcc.dg/pr87600-3.c
> new file mode 100644
> index 00000000000..2673d004130
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr87600-3.c
> @@ -0,0 +1,35 @@
> +/* PR rtl-optimization/87600  */
> +/* { dg-do compile { target aarch64*-*-* arm*-*-* i?86-*-* powerpc*-*-* s390*-*-* x86_64-*-* } } */
> +/* { dg-options "-O2" } */
> +
> +#include "pr87600.h"
> +
> +/* The following are all invalid uses of local register variables.  */
> +
> +long
> +test1 (void)
> +{
> +  register long var1 asm (REG1);
> +  register long var2 asm (REG2);
> +  asm ("blah %0 %1" : "=r" (var1) : "0" (var2)); /* { dg-error "invalid hard register usage between output operand and matching constraint operand" } */
> +  return var1;
> +}
> +
> +long
> +test2 (void)
> +{
> +  register long var1 asm (REG1);
> +  register long var2 asm (REG1);
> +  asm ("blah %0 %1" : "=&r" (var1) : "r" (var2)); /* { dg-error "invalid hard register usage between earlyclobber operand and input operand" } */
> +  return var1;
> +}
> +
> +long
> +test3 (void)
> +{
> +  register long var1 asm (REG1);
> +  register long var2 asm (REG1);
> +  long var3;
> +  asm ("blah %0 %1" : "=&r" (var1), "=r" (var3) : "1" (var2)); /* { dg-error "invalid hard register usage between earlyclobber operand and input operand" } */
> +  return var1 + var3;
> +}
> diff --git a/gcc/testsuite/gcc.target/s390/asm-hard-reg-1.c b/gcc/testsuite/gcc.target/s390/asm-hard-reg-1.c
> new file mode 100644
> index 00000000000..671c0ede6ef
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/asm-hard-reg-1.c
> @@ -0,0 +1,103 @@
> +/* { dg-do compile { target { lp64 } } } */
> +/* { dg-options "-O2 -march=z13 -mzarch" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +/*
> +** test_in_1:
> +**     foo	%r2
> +**     br	%r14
> +*/
> +
> +int
> +test_in_1 (int x)
> +{
> +  asm ("foo	%0" :: "{r2}" (x));
> +  return x;
> +}
> +
> +/*
> +** test_in_2:
> +**     lgr	(%r[0-9]+),%r2
> +**     lr	%r2,%r3
> +**     foo	%r2
> +**     lgr	%r2,\1
> +**     br	%r14
> +*/
> +
> +int
> +test_in_2 (int x, int y)
> +{
> +  asm ("foo	%0" :: "{r2}" (y));
> +  return x;
> +}
> +
> +/*
> +** test_in_3:
> +**     stmg	%r12,%r15,96\(%r15\)
> +**     lay	%r15,-160\(%r15\)
> +**     lgr	(%r[0-9]+),%r2
> +**     ahi	%r2,1
> +**     lgfr	%r2,%r2
> +**     brasl	%r14,foo@PLT
> +**     lr	%r3,%r2
> +**     lr	%r2,\1
> +**     foo	%r3,%r2
> +**     lgr	%r2,\1
> +**     lmg	%r12,%r15,256\(%r15\)
> +**     br	%r14
> +*/
> +
> +extern int foo (int);
> +
> +int
> +test_in_3 (int x)
> +{
> +  asm ("foo	%0,%1\n" :: "{r3}" (foo (x + 1)), "{r2}" (x));
> +  return x;
> +}
> +
> +/*
> +** test_out_1:
> +**     foo	%r3
> +**     lgfr	%r2,%r3
> +**     br	%r14
> +*/
> +
> +int
> +test_out_1 (void)
> +{
> +  int x;
> +  asm ("foo	%0" : "={r3}" (x));
> +  return x;
> +}
> +
> +/*
> +** test_out_2:
> +**     lgr	(%r[0-9]+),%r2
> +**     foo	%r2
> +**     ark	(%r[0-9]+),\1,%r2
> +**     lgfr	%r2,\2
> +**     br	%r14
> +*/
> +
> +int
> +test_out_2 (int x)
> +{
> +  int y;
> +  asm ("foo	%0" : "={r2}" (y));
> +  return x + y;
> +}
> +
> +/*
> +** test_inout_1:
> +**     foo	%r2
> +**     lgfr	%r2,%r2
> +**     br	%r14
> +*/
> +
> +int
> +test_inout_1 (int x)
> +{
> +  asm ("foo	%0" : "+{r2}" (x));
> +  return x;
> +}
> diff --git a/gcc/testsuite/gcc.target/s390/asm-hard-reg-2.c b/gcc/testsuite/gcc.target/s390/asm-hard-reg-2.c
> new file mode 100644
> index 00000000000..a892fe8f0aa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/asm-hard-reg-2.c
> @@ -0,0 +1,43 @@
> +/* { dg-do compile { target { lp64 } } } */
> +/* { dg-options "-O2 -march=z13 -mzarch" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +/* { dg-final { scan-assembler {\.LC0:\n\t\.long\t1078523331\n} } } */
> +
> +
> +/*
> +** test_float_into_gpr:
> +**     lrl	%r4,.LC0
> +**     foo	%r4
> +**     br	%r14
> +*/
> +
> +void
> +test_float_into_gpr (void)
> +{
> +  // This is the counterpart to
> +  //   register float x asm ("r4") = 3.14f;
> +  //   asm ("foo	%0" :: "r" (x));
> +  // where the bit-pattern of 3.14f is loaded into GPR.
> +  asm ("foo	%0" :: "{r4}" (3.14f));
> +}
> +
> +/*
> +** test_float:
> +** (
> +**     ldr	%f4,%f0
> +**     ldr	%f5,%f2
> +** |
> +**     ldr	%f5,%f2
> +**     ldr	%f4,%f0
> +** )
> +**     aebr	%f5,%f4
> +**     ldr	%f0,%f5
> +**     br	%r14
> +*/
> +
> +float
> +test_float (float x, float y)
> +{
> +  asm ("aebr	%0,%1" : "+{f5}" (y) : "{f4}" (x));
> +  return y;
> +}
> diff --git a/gcc/testsuite/gcc.target/s390/asm-hard-reg-3.c b/gcc/testsuite/gcc.target/s390/asm-hard-reg-3.c
> new file mode 100644
> index 00000000000..5df37b5b717
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/asm-hard-reg-3.c
> @@ -0,0 +1,42 @@
> +/* { dg-do compile { target lp64 } } */
> +/* { dg-options "-O2 -march=z13 -mzarch" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +/* { dg-final { scan-assembler {\.LC0:\n\t\.long\t1074339512\n\t\.long\t1374389535\n} } } */
> +
> +/*
> +** test_double_into_gpr:
> +**     lgrl	%r4,.LC0
> +**     foo	%r4
> +**     br	%r14
> +*/
> +
> +void
> +test_double_into_gpr (void)
> +{
> +  // This is the counterpart to
> +  //   register double x asm ("r4") = 3.14;
> +  //   asm ("foo	%0" :: "r" (x));
> +  // where the bit-pattern of 3.14 is loaded into GPR.
> +  asm ("foo	%0" :: "{r4}" (3.14));
> +}
> +
> +/*
> +** test_double:
> +** (
> +**     ldr	%f4,%f0
> +**     ldr	%f5,%f2
> +** |
> +**     ldr	%f5,%f2
> +**     ldr	%f4,%f0
> +** )
> +**     adbr	%f5,%f4
> +**     ldr	%f0,%f5
> +**     br	%r14
> +*/
> +
> +double
> +test_double (double x, double y)
> +{
> +  asm ("adbr	%0,%1" : "+{f5}" (y) : "{f4}" (x));
> +  return y;
> +}
> diff --git a/gcc/testsuite/lib/scanasm.exp b/gcc/testsuite/lib/scanasm.exp
> index 42c719c512c..3c28ccec955 100644
> --- a/gcc/testsuite/lib/scanasm.exp
> +++ b/gcc/testsuite/lib/scanasm.exp
> @@ -896,6 +896,10 @@ proc configure_check-function-bodies { config } {
>   	set up_config(fluff) {^\s*(?://)}
>       } elseif { [istarget *-*-darwin*] } {
>   	set up_config(fluff) {^\s*(?:\.|//|@)|^L[0-9ABCESV]}
> +    } elseif { [istarget s390*-*-*] } {
> +	# Additionally to the defaults skip lines beginning with a # resulting
> +	# from inline asm.
> +	set up_config(fluff) {^\s*(?:\.|//|@|$|#)}
>       } else {
>   	# Skip lines beginning with labels ('.L[...]:') or other directives
>   	# ('.align', '.cfi_startproc', '.quad [...]', '.text', etc.), '//' or
diff mbox series

Patch

diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
index dad3ae1b7c6..8bdc530626c 100644
--- a/gcc/cfgexpand.cc
+++ b/gcc/cfgexpand.cc
@@ -2966,44 +2966,6 @@  expand_asm_loc (tree string, int vol, location_t locus)
   emit_insn (body);
 }
 
-/* Return the number of times character C occurs in string S.  */
-static int
-n_occurrences (int c, const char *s)
-{
-  int n = 0;
-  while (*s)
-    n += (*s++ == c);
-  return n;
-}
-
-/* A subroutine of expand_asm_operands.  Check that all operands have
-   the same number of alternatives.  Return true if so.  */
-
-static bool
-check_operand_nalternatives (const vec<const char *> &constraints)
-{
-  unsigned len = constraints.length();
-  if (len > 0)
-    {
-      int nalternatives = n_occurrences (',', constraints[0]);
-
-      if (nalternatives + 1 > MAX_RECOG_ALTERNATIVES)
-	{
-	  error ("too many alternatives in %<asm%>");
-	  return false;
-	}
-
-      for (unsigned i = 1; i < len; ++i)
-	if (n_occurrences (',', constraints[i]) != nalternatives)
-	  {
-	    error ("operand constraints for %<asm%> differ "
-		   "in number of alternatives");
-	    return false;
-	  }
-    }
-  return true;
-}
-
 /* Check for overlap between registers marked in CLOBBERED_REGS and
    anything inappropriate in T.  Emit error and return the register
    variable definition for error, NULL_TREE for ok.  */
@@ -3169,10 +3131,6 @@  expand_asm_stmt (gasm *stmt)
 	= TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
     }
 
-  /* ??? Diagnose during gimplification?  */
-  if (! check_operand_nalternatives (constraints))
-    return;
-
   /* Count the number of meaningful clobbered registers, ignoring what
      we would ignore later.  */
   auto_vec<rtx> clobber_rvec;
diff --git a/gcc/genoutput.cc b/gcc/genoutput.cc
index efd81766bb5..c1efb043579 100644
--- a/gcc/genoutput.cc
+++ b/gcc/genoutput.cc
@@ -1219,6 +1219,18 @@  mdep_constraint_len (const char *s, file_location loc, int opno)
       if (!strncmp (s, p->name, p->namelen))
 	return p->namelen;
 
+  if (*s == '{')
+    {
+      const char *end = s + 1;
+      while (*end != '}' && *end != '"' && *end != '\0')
+	++end;
+      /* Similarly as in parse_constraint_regname(), consider any hard register
+	 name longer than a few characters as an error.  */
+      ptrdiff_t len = end - s;
+      if (*end == '}' && len > 1 && len < 31)
+	return len + 1;
+    }
+
   error_at (loc, "error: undefined machine-specific constraint "
 	    "at this point: \"%s\"", s);
   message_at (loc, "note:  in operand %d", opno);
diff --git a/gcc/genpreds.cc b/gcc/genpreds.cc
index 55d149e8a40..0777cb7a4db 100644
--- a/gcc/genpreds.cc
+++ b/gcc/genpreds.cc
@@ -1148,7 +1148,7 @@  write_insn_constraint_len (void)
   unsigned int i;
 
   puts ("static inline size_t\n"
-	"insn_constraint_len (char fc, const char *str ATTRIBUTE_UNUSED)\n"
+	"insn_constraint_len (char fc, const char *str)\n"
 	"{\n"
 	"  switch (fc)\n"
 	"    {");
@@ -1181,6 +1181,8 @@  write_insn_constraint_len (void)
 
   puts ("    default: break;\n"
 	"    }\n"
+	"  if (str[0] == '{')\n"
+	"      return ((const char *) rawmemchr (str + 1, '}') - str) + 1;\n"
 	"  return 1;\n"
 	"}\n");
 }
diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index ab323d764e8..abb705d49ac 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -70,6 +70,9 @@  along with GCC; see the file COPYING3.  If not see
 #include "omp-offload.h"
 #include "context.h"
 #include "tree-nested.h"
+#include "insn-config.h"
+#include "recog.h"
+#include "output.h"
 
 /* Identifier for a basic condition, mapping it to other basic conditions of
    its Boolean expression.  Basic conditions given the same uid (in the same
@@ -6993,6 +6996,42 @@  gimplify_addr_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
   return ret;
 }
 
+/* Return the number of times character C occurs in string S.  */
+
+static int
+num_occurrences (int c, const char *s)
+{
+  int n = 0;
+  while (*s)
+    n += (*s++ == c);
+  return n;
+}
+
+/* A subroutine of gimplify_asm_expr.  Check that all operands have
+   the same number of alternatives.  Return -1 if this is violated.  Otherwise
+   return the number of alternatives.  */
+
+static int
+num_alternatives (const_tree link)
+{
+  if (link == nullptr)
+    return 0;
+
+  const char *constraint = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (link)));
+  int num = num_occurrences (',', constraint);
+
+  if (num + 1 > MAX_RECOG_ALTERNATIVES)
+    return -1;
+
+  for (link = TREE_CHAIN (link); link; link = TREE_CHAIN (link))
+    {
+      constraint = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (link)));
+      if (num_occurrences (',', constraint) != num)
+	return -1;
+    }
+  return num + 1;
+}
+
 /* Gimplify the operands of an ASM_EXPR.  Input operands should be a gimple
    value; output operands should be a gimple lvalue.  */
 
@@ -7023,6 +7062,36 @@  gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
   clobbers = NULL;
   labels = NULL;
 
+  int num_alternatives_out = num_alternatives (ASM_OUTPUTS (expr));
+  int num_alternatives_in = num_alternatives (ASM_INPUTS (expr));
+  if (num_alternatives_out == -1 || num_alternatives_in == -1
+      || (num_alternatives_out > 0 && num_alternatives_in > 0
+	  && num_alternatives_out != num_alternatives_in))
+    {
+      error ("operand constraints for %<asm%> differ "
+	     "in number of alternatives");
+      return GS_ERROR;
+    }
+  int num_alternatives = MAX (num_alternatives_out, num_alternatives_in);
+
+  /* Regarding hard register constraints ensure that each hard register is used
+     at most once over all inputs/outputs and each alternative.  Keep track in
+     hardregs[0] which hard register is used via an asm register over all
+     inputs/outputs.  hardregs[i] for i >= 2 describes which hard registers are
+     used for alternative i-2 over all inputs/outputs.  hardregs[1] is a
+     reduction of all alternatives, i.e., hardregs[1] |= hardregs[i] for i >= 2
+     and describes whether a hard register is used in any alternative.  This is
+     just a shortcut instead of recomputing the union over all alternatives;
+     possibly multiple times.  */
+  auto_vec<HARD_REG_SET> hardregs (num_alternatives + 2);
+  std::pair <vec <HARD_REG_SET> *, machine_mode> hardreg_props = {&hardregs, VOIDmode};
+  for (int i = 0; i < num_alternatives + 2; ++i)
+    {
+      HARD_REG_SET hregset;
+      CLEAR_HARD_REG_SET (hregset);
+      hardregs.quick_push (hregset);
+    }
+
   ret = GS_ALL_DONE;
   link_next = NULL_TREE;
   for (i = 0, link = ASM_OUTPUTS (expr); link; ++i, link = link_next)
@@ -7039,8 +7108,13 @@  gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
       if (constraint_len == 0)
         continue;
 
-      ok = parse_output_constraint (&constraint, i, 0, 0,
-				    &allows_mem, &allows_reg, &is_inout);
+      tree outtype = TREE_TYPE (TREE_VALUE (link));
+      auto hardreg_props_p
+	= outtype != error_mark_node
+	? (hardreg_props.second = TYPE_MODE (outtype), &hardreg_props)
+	: nullptr;
+      ok = parse_output_constraint (&constraint, i, 0, 0, &allows_mem,
+				    &allows_reg, &is_inout, hardreg_props_p);
       if (!ok)
 	{
 	  ret = GS_ERROR;
@@ -7049,7 +7123,6 @@  gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
 
       /* If we can't make copies, we can only accept memory.
 	 Similarly for VLAs.  */
-      tree outtype = TREE_TYPE (TREE_VALUE (link));
       if (outtype != error_mark_node
 	  && (TREE_ADDRESSABLE (outtype)
 	      || !COMPLETE_TYPE_P (outtype)
@@ -7111,6 +7184,24 @@  gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
 	      TREE_VALUE (link) = tem;
 	      tret = GS_OK;
 	    }
+	  if (VAR_P (op) && DECL_HARD_REGISTER (op))
+	    {
+	      tree id = DECL_ASSEMBLER_NAME (op);
+	      const char *asmspec = IDENTIFIER_POINTER (id) + 1;
+	      int hardreg = decode_reg_name (asmspec);
+	      if (hardreg >= 0)
+		{
+		  if (TEST_HARD_REG_BIT (hardregs[0], hardreg)
+		      || TEST_HARD_REG_BIT (hardregs[1], hardreg))
+		    {
+		      error ("multiple outputs to hard register: %s",
+			     reg_names[hardreg]);
+		      return GS_ERROR;
+		    }
+		  else
+		    SET_HARD_REG_BIT (hardregs[0], hardreg);
+		}
+	    }
 	}
 
       vec_safe_push (outputs, link);
@@ -7210,16 +7301,29 @@  gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
 	}
     }
 
+  for (unsigned int i = 0; i < hardregs.length (); ++i)
+    CLEAR_HARD_REG_SET (hardregs[i]);
+
   link_next = NULL_TREE;
   for (link = ASM_INPUTS (expr); link; ++i, link = link_next)
     {
       link_next = TREE_CHAIN (link);
       constraint = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (link)));
-      parse_input_constraint (&constraint, 0, 0, noutputs, 0,
-			      oconstraints, &allows_mem, &allows_reg);
+      tree intype = TREE_TYPE (TREE_VALUE (link));
+      auto hardreg_props_p
+	= intype != error_mark_node
+	? (hardreg_props.second = TYPE_MODE (intype), &hardreg_props)
+	: nullptr;
+      bool ok = parse_input_constraint (&constraint, 0, 0, noutputs, 0,
+					oconstraints, &allows_mem, &allows_reg,
+					hardreg_props_p);
+      if (!ok)
+	{
+	  ret = GS_ERROR;
+	  is_inout = false;
+	}
 
       /* If we can't make copies, we can only accept memory.  */
-      tree intype = TREE_TYPE (TREE_VALUE (link));
       if (intype != error_mark_node
 	  && (TREE_ADDRESSABLE (intype)
 	      || !COMPLETE_TYPE_P (intype)
@@ -7290,6 +7394,24 @@  gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
 				is_gimple_asm_val, fb_rvalue);
 	  if (tret == GS_ERROR)
 	    ret = tret;
+	  tree inputv = TREE_VALUE (link);
+	  if (VAR_P (inputv) && DECL_HARD_REGISTER (inputv))
+	    {
+	      tree id = DECL_ASSEMBLER_NAME (inputv);
+	      const char *asmspec = IDENTIFIER_POINTER (id) + 1;
+	      int hardreg = decode_reg_name (asmspec);
+	      if (hardreg >= 0)
+		{
+		  if (TEST_HARD_REG_BIT (hardregs[1], hardreg))
+		    {
+		      error ("multiple inputs to hard register: %s",
+			     reg_names[hardreg]);
+		      return GS_ERROR;
+		    }
+		  else
+		    SET_HARD_REG_BIT (hardregs[0], hardreg);
+		}
+	    }
 	}
 
       TREE_CHAIN (link) = NULL_TREE;
diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index 92b343fa99a..632c75ef01c 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -114,6 +114,7 @@ 
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stmt.h"
 #include "predict.h"
 #include "df.h"
 #include "memmodel.h"
@@ -2165,6 +2166,7 @@  process_alt_operands (int only_alternative)
   bool costly_p;
   enum reg_class cl;
   const HARD_REG_SET *cl_filter;
+  HARD_REG_SET hregset;
 
   /* Calculate some data common for all alternatives to speed up the
      function.	*/
@@ -2536,6 +2538,17 @@  process_alt_operands (int only_alternative)
 		  cl_filter = nullptr;
 		  goto reg;
 
+		case '{':
+		    {
+		      int regno = decode_hreg_constraint (p);
+		      gcc_assert (regno >= 0);
+		      cl = REGNO_REG_CLASS (regno);
+		      CLEAR_HARD_REG_SET (hregset);
+		      SET_HARD_REG_BIT (hregset, regno);
+		      cl_filter = &hregset;
+		      goto reg;
+		    }
+
 		default:
 		  cn = lookup_constraint (p);
 		  switch (get_constraint_type (cn))
diff --git a/gcc/recog.cc b/gcc/recog.cc
index 54b317126c2..b604029d5f1 100644
--- a/gcc/recog.cc
+++ b/gcc/recog.cc
@@ -25,6 +25,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stmt.h"
 #include "cfghooks.h"
 #include "df.h"
 #include "memmodel.h"
@@ -2333,7 +2334,8 @@  asm_operand_ok (rtx op, const char *constraint, const char **constraints)
 	    {
 	    case CT_REGISTER:
 	      if (!result
-		  && reg_class_for_constraint (cn) != NO_REGS
+		  && (reg_class_for_constraint (cn) != NO_REGS
+		      || constraint[0] == '{')
 		  && GET_MODE (op) != BLKmode
 		  && register_operand (op, VOIDmode))
 		result = 1;
@@ -3267,6 +3269,13 @@  constrain_operands (int strict, alternative_mask alternatives)
 		  win = true;
 		break;
 
+	      case '{':
+		if ((REG_P (op) && HARD_REGISTER_P (op)
+		     && (int) REGNO (op) == decode_hreg_constraint (p))
+		    || !reload_completed)
+		  win = true;
+		break;
+
 	      default:
 		{
 		  enum constraint_num cn = lookup_constraint (p);
diff --git a/gcc/stmt.cc b/gcc/stmt.cc
index ae1527f0a19..7b073f8ce85 100644
--- a/gcc/stmt.cc
+++ b/gcc/stmt.cc
@@ -39,6 +39,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "emit-rtl.h"
 #include "pretty-print.h"
 #include "diagnostic-core.h"
+#include "output.h"
 
 #include "fold-const.h"
 #include "varasm.h"
@@ -174,6 +175,77 @@  expand_label (tree label)
     maybe_set_first_label_num (label_r);
 }
 
+/* Parse a hard register constraint and return its number or -1 in case of an
+   error.  BEGIN should point to a string of the form "{regname}".  For the
+   sake of simplicity assume that a register name is not longer than 31
+   characters, if not error out.  */
+
+int
+decode_hreg_constraint (const char *begin)
+{
+  if (*begin != '{')
+    return -1;
+  ++begin;
+  const char *end = begin;
+  while (*end != '}' && *end != '\0')
+    ++end;
+  if (*end != '}' || end == begin)
+    return -1;
+  ptrdiff_t len = end - begin;
+  if (len >= 31)
+    return -1;
+  char regname[32];
+  memcpy (regname, begin, len);
+  regname[len] = '\0';
+  int regno = decode_reg_name (regname);
+  return regno;
+}
+
+static bool
+eliminable_regno_p (int regnum)
+{
+  static const struct
+  {
+    const int from;
+    const int to;
+  } eliminables[] = ELIMINABLE_REGS;
+  for (size_t i = 0; i < ARRAY_SIZE (eliminables); i++)
+    if (regnum == eliminables[i].from)
+      return true;
+  return false;
+}
+
+/* Perform a similar check as done in make_decl_rtl().  */
+
+static bool
+hardreg_ok_p (int reg_number, machine_mode mode, int operand_num)
+{
+  if (mode == BLKmode)
+    error ("data type isn%'t suitable for register %s of operand %i",
+	   reg_names[reg_number], operand_num);
+  else if (!in_hard_reg_set_p (accessible_reg_set, mode, reg_number))
+    error ("register %s for operand %i cannot be accessed"
+	   " by the current target", reg_names[reg_number], operand_num);
+  else if (!in_hard_reg_set_p (operand_reg_set, mode, reg_number))
+    error ("register %s for operand %i is not general enough"
+	   " to be used as a register variable", reg_names[reg_number], operand_num);
+  else if (!targetm.hard_regno_mode_ok (reg_number, mode))
+    error ("register %s for operand %i isn%'t suitable for data type",
+	   reg_names[reg_number], operand_num);
+  else if (reg_number != HARD_FRAME_POINTER_REGNUM
+	   && (reg_number == FRAME_POINTER_REGNUM
+#ifdef RETURN_ADDRESS_POINTER_REGNUM
+	       || reg_number == RETURN_ADDRESS_POINTER_REGNUM
+#endif
+	       || reg_number == ARG_POINTER_REGNUM)
+	   && eliminable_regno_p (reg_number))
+    error ("register for operand %i is an internal GCC "
+	   "implementation detail", operand_num);
+  else
+    return true;
+  return false;
+}
+
 /* Parse the output constraint pointed to by *CONSTRAINT_P.  It is the
    OPERAND_NUMth output operand, indexed from zero.  There are NINPUTS
    inputs and NOUTPUTS outputs to this extended-asm.  Upon return,
@@ -190,7 +262,9 @@  expand_label (tree label)
 bool
 parse_output_constraint (const char **constraint_p, int operand_num,
 			 int ninputs, int noutputs, bool *allows_mem,
-			 bool *allows_reg, bool *is_inout)
+			 bool *allows_reg, bool *is_inout,
+			 const std::pair <vec <HARD_REG_SET> *, machine_mode>
+			 *hardreg_props)
 {
   const char *constraint = *constraint_p;
   const char *p;
@@ -244,6 +318,8 @@  parse_output_constraint (const char **constraint_p, int operand_num,
       constraint = *constraint_p;
     }
 
+  unsigned int alternative = 2;
+
   /* Loop through the constraint string.  */
   for (p = constraint + 1; *p; )
     {
@@ -268,7 +344,11 @@  parse_output_constraint (const char **constraint_p, int operand_num,
 	case 'E':  case 'F':  case 'G':  case 'H':
 	case 's':  case 'i':  case 'n':
 	case 'I':  case 'J':  case 'K':  case 'L':  case 'M':
-	case 'N':  case 'O':  case 'P':  case ',':
+	case 'N':  case 'O':  case 'P':
+	  break;
+
+	case ',':
+	  ++alternative;
 	  break;
 
 	case '0':  case '1':  case '2':  case '3':  case '4':
@@ -289,6 +369,36 @@  parse_output_constraint (const char **constraint_p, int operand_num,
 	  *allows_mem = true;
 	  break;
 
+	case '{':
+	  {
+	    int regno = decode_hreg_constraint (p);
+	    if (regno < 0)
+	      {
+		error ("invalid output constraint: %s", p);
+		return false;
+	      }
+	    if (hardreg_props)
+	      {
+		vec<HARD_REG_SET> *hardregs = hardreg_props->first;
+		if (TEST_HARD_REG_BIT ((*hardregs)[0], regno)
+		    || TEST_HARD_REG_BIT ((*hardregs)[alternative], regno))
+		  {
+		    error ("multiple outputs to hard register: %s",
+			   reg_names[regno]);
+		    return false;
+		  }
+		else
+		  {
+		    SET_HARD_REG_BIT ((*hardregs)[1], regno);
+		    SET_HARD_REG_BIT ((*hardregs)[alternative], regno);
+		  }
+		if (!hardreg_ok_p (regno, hardreg_props->second, operand_num))
+		  return false;
+	      }
+	    *allows_reg = true;
+	    break;
+	  }
+
 	default:
 	  if (!ISALPHA (*p))
 	    break;
@@ -317,7 +427,9 @@  bool
 parse_input_constraint (const char **constraint_p, int input_num,
 			int ninputs, int noutputs, int ninout,
 			const char * const * constraints,
-			bool *allows_mem, bool *allows_reg)
+			bool *allows_mem, bool *allows_reg,
+			const std::pair <vec<HARD_REG_SET> *, machine_mode>
+			*hardreg_props)
 {
   const char *constraint = *constraint_p;
   const char *orig_constraint = constraint;
@@ -332,6 +444,8 @@  parse_input_constraint (const char **constraint_p, int input_num,
 
   /* Make sure constraint has neither `=', `+', nor '&'.  */
 
+  unsigned int alternative = 2;
+
   for (j = 0; j < c_len; j += CONSTRAINT_LEN (constraint[j], constraint+j))
     switch (constraint[j])
       {
@@ -358,7 +472,11 @@  parse_input_constraint (const char **constraint_p, int input_num,
       case 'E':  case 'F':  case 'G':  case 'H':
       case 's':  case 'i':  case 'n':
       case 'I':  case 'J':  case 'K':  case 'L':  case 'M':
-      case 'N':  case 'O':  case 'P':  case ',':
+      case 'N':  case 'O':  case 'P':
+	break;
+
+      case ',':
+	++alternative;
 	break;
 
 	/* Whether or not a numeric constraint allows a register is
@@ -408,6 +526,35 @@  parse_input_constraint (const char **constraint_p, int input_num,
 	*allows_mem = true;
 	break;
 
+      case '{':
+	{
+	  int regno = decode_hreg_constraint (constraint + j);
+	  if (regno < 0)
+	    {
+	      error ("invalid input constraint: %s", constraint + j);
+	      return false;
+	    }
+	  if (hardreg_props)
+	    {
+	      vec <HARD_REG_SET> *hardregs = hardreg_props->first;
+	      if (TEST_HARD_REG_BIT ((*hardregs)[0], regno)
+		  || TEST_HARD_REG_BIT ((*hardregs)[alternative], regno))
+		{
+		  error ("multiple inputs to hard register: %s",
+			    reg_names[regno]);
+		}
+	      else
+		{
+		  SET_HARD_REG_BIT ((*hardregs)[1], regno);
+		  SET_HARD_REG_BIT ((*hardregs)[alternative], regno);
+		}
+	      if (!hardreg_ok_p (regno, hardreg_props->second, input_num))
+		return false;
+	    }
+	  *allows_reg = true;
+	  break;
+	}
+
       default:
 	if (! ISALPHA (constraint[j]))
 	  {
diff --git a/gcc/stmt.h b/gcc/stmt.h
index a2caae7121b..a380ecd8cbf 100644
--- a/gcc/stmt.h
+++ b/gcc/stmt.h
@@ -20,11 +20,19 @@  along with GCC; see the file COPYING3.  If not see
 #ifndef GCC_STMT_H
 #define GCC_STMT_H
 
+#include "target.h"
+#include "hard-reg-set.h"
+
 extern void expand_label (tree);
 extern bool parse_output_constraint (const char **, int, int, int,
-				     bool *, bool *, bool *);
+				     bool *, bool *, bool *,
+				     const std::pair <vec <HARD_REG_SET> *,
+						      machine_mode> * = nullptr);
 extern bool parse_input_constraint (const char **, int, int, int, int,
-				    const char * const *, bool *, bool *);
+				    const char * const *, bool *, bool *,
+				    const std::pair <vec <HARD_REG_SET> *,
+						     machine_mode> * = nullptr);
+extern int decode_hreg_constraint (const char *);
 extern tree resolve_asm_operand_names (tree, tree, tree, tree);
 #ifdef HARD_CONST
 /* Silly ifdef to avoid having all includers depend on hard-reg-set.h.  */
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-1.c b/gcc/testsuite/gcc.dg/asm-hard-reg-1.c
new file mode 100644
index 00000000000..6a5a9ada45f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-1.c
@@ -0,0 +1,85 @@ 
+/* { dg-do compile { target aarch64*-*-* arm*-*-* i?86-*-* powerpc*-*-* riscv*-*-* s390*-*-* x86_64-*-* } } */
+
+#if defined (__aarch64__)
+# define GPR "{x4}"
+/* { dg-final { scan-assembler-times "foo\tx4" 8 { target { aarch64*-*-* } } } } */
+#elif defined (__arm__)
+# define GPR "{r4}"
+/* { dg-final { scan-assembler-times "foo\tr4" 8 { target { arm*-*-* } } } } */
+#elif defined (__i386__)
+# define GPR "{ecx}"
+/* { dg-final { scan-assembler-times "foo\t%cl" 2 { target { i?86-*-* } } } } */
+/* { dg-final { scan-assembler-times "foo\t%cx" 2 { target { i?86-*-* } } } } */
+/* { dg-final { scan-assembler-times "foo\t%ecx" 4 { target { i?86-*-* } } } } */
+#elif defined (__powerpc__) || defined (__POWERPC__)
+# define GPR "{r5}"
+/* { dg-final { scan-assembler-times "foo\t5" 8 { target { powerpc*-*-* } } } } */
+#elif defined (__riscv)
+# define GPR "{t5}"
+/* { dg-final { scan-assembler-times "foo\tt5" 8 { target { riscv*-*-* } } } } */
+#elif defined (__s390__)
+# define GPR "{r4}"
+/* { dg-final { scan-assembler-times "foo\t%r4" 8 { target { s390*-*-* } } } } */
+#elif defined (__x86_64__)
+# define GPR "{rcx}"
+/* { dg-final { scan-assembler-times "foo\t%cl" 2 { target { x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "foo\t%cx" 2 { target { x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "foo\t%ecx" 2 { target { x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "foo\t%rcx" 2 { target { x86_64-*-* } } } } */
+#endif
+
+char
+test_char (char x)
+{
+  __asm__ ("foo\t%0" : "+"GPR (x));
+  return x;
+}
+
+char
+test_char_from_mem (char *x)
+{
+  __asm__ ("foo\t%0" : "+"GPR (*x));
+  return *x;
+}
+
+short
+test_short (short x)
+{
+  __asm__ ("foo\t%0" : "+"GPR (x));
+  return x;
+}
+
+short
+test_short_from_mem (short *x)
+{
+  __asm__ ("foo\t%0" : "+"GPR (*x));
+  return *x;
+}
+
+int
+test_int (int x)
+{
+  __asm__ ("foo\t%0" : "+"GPR (x));
+  return x;
+}
+
+int
+test_int_from_mem (int *x)
+{
+  __asm__ ("foo\t%0" : "+"GPR (*x));
+  return *x;
+}
+
+long
+test_long (long x)
+{
+  __asm__ ("foo\t%0" : "+"GPR (x));
+  return x;
+}
+
+long
+test_long_from_mem (long *x)
+{
+  __asm__ ("foo\t%0" : "+"GPR (*x));
+  return *x;
+}
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-2.c b/gcc/testsuite/gcc.dg/asm-hard-reg-2.c
new file mode 100644
index 00000000000..7dabf9657cb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-2.c
@@ -0,0 +1,33 @@ 
+/* { dg-do compile { target aarch64*-*-* powerpc64*-*-* riscv64-*-* s390*-*-* x86_64-*-* } } */
+/* { dg-options "-std=c99" } we need long long */
+
+#if defined (__aarch64__)
+# define GPR "{x4}"
+/* { dg-final { scan-assembler-times "foo\tx4" 2 { target { aarch64*-*-* } } } } */
+#elif defined (__powerpc__) || defined (__POWERPC__)
+# define GPR "{r5}"
+/* { dg-final { scan-assembler-times "foo\t5" 2 { target { powerpc64*-*-* } } } } */
+#elif defined (__riscv)
+# define GPR "{t5}"
+/* { dg-final { scan-assembler-times "foo\tt5" 2 { target { riscv64-*-* } } } } */
+#elif defined (__s390__)
+# define GPR "{r4}"
+/* { dg-final { scan-assembler-times "foo\t%r4" 2 { target { s390*-*-* } } } } */
+#elif defined (__x86_64__)
+# define GPR "{rcx}"
+/* { dg-final { scan-assembler-times "foo\t%rcx" 2 { target { x86_64-*-* } } } } */
+#endif
+
+long long
+test_longlong (long long x)
+{
+  __asm__ ("foo\t%0" : "+"GPR (x));
+  return x;
+}
+
+long long
+test_longlong_from_mem (long long *x)
+{
+  __asm__ ("foo\t%0" : "+"GPR (*x));
+  return *x;
+}
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-3.c b/gcc/testsuite/gcc.dg/asm-hard-reg-3.c
new file mode 100644
index 00000000000..fa4472ae8a8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-3.c
@@ -0,0 +1,25 @@ 
+/* { dg-do compile { target { { aarch64*-*-* powerpc64*-*-* riscv64-*-* s390*-*-* x86_64-*-* } && int128 } } } */
+/* { dg-options "-O2" } get rid of -ansi since we use __int128 */
+
+#if defined (__aarch64__)
+# define REG "{x4}"
+/* { dg-final { scan-assembler-times "foo\tx4" 1 { target { aarch64*-*-* } } } } */
+#elif defined (__powerpc__) || defined (__POWERPC__)
+# define REG "{r5}"
+/* { dg-final { scan-assembler-times "foo\t5" 1 { target { powerpc*-*-* } } } } */
+#elif defined (__riscv)
+# define REG "{t5}"
+/* { dg-final { scan-assembler-times "foo\tt5" 1 { target { riscv*-*-* } } } } */
+#elif defined (__s390__)
+# define REG "{r4}"
+/* { dg-final { scan-assembler-times "foo\t%r4" 1 { target { s390*-*-* } } } } */
+#elif defined (__x86_64__)
+# define REG "{xmm0}"
+/* { dg-final { scan-assembler-times "foo\t%xmm0" 1 { target { x86_64-*-* } } } } */
+#endif
+
+void
+test (void)
+{
+  __asm__ ("foo\t%0" :: REG ((__int128) 42));
+}
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-4.c b/gcc/testsuite/gcc.dg/asm-hard-reg-4.c
new file mode 100644
index 00000000000..0816df8f719
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-4.c
@@ -0,0 +1,50 @@ 
+/* { dg-do compile { target aarch64*-*-* arm*-*-* powerpc*-*-* riscv*-*-* s390*-*-* x86_64-*-* } } */
+
+#if defined (__aarch64__)
+# define FPR "{d5}"
+/* { dg-final { scan-assembler-times "foo\tv5" 4 { target { aarch64*-*-* } } } } */
+#elif defined (__arm__)
+# define FPR "{d5}"
+/* { dg-additional-options "-march=armv7-a+fp -mfloat-abi=hard" { target arm*-*-* } } */
+/* { dg-final { scan-assembler-times "foo\ts10" 4 { target { arm*-*-* } } } } */
+#elif defined (__powerpc__) || defined (__POWERPC__)
+# define FPR "{5}"
+/* { dg-final { scan-assembler-times "foo\t5" 4 { target { powerpc*-*-* } } } } */
+#elif defined (__riscv)
+# define FPR "{f5}"
+/* { dg-final { scan-assembler-times "foo\tf5" 4 { target { rsicv*-*-* } } } } */
+#elif defined (__s390__)
+# define FPR "{f5}"
+/* { dg-final { scan-assembler-times "foo\t%f5" 4 { target { s390*-*-* } } } } */
+#elif defined (__x86_64__)
+# define FPR "{xmm5}"
+/* { dg-final { scan-assembler-times "foo\t%xmm5" 4 { target { x86_64-*-* } } } } */
+#endif
+
+float
+test_float (float x)
+{
+  __asm__ ("foo\t%0" : "+"FPR (x));
+  return x;
+}
+
+float
+test_float_from_mem (float *x)
+{
+  __asm__ ("foo\t%0" : "+"FPR (*x));
+  return *x;
+}
+
+double
+test_double (double x)
+{
+  __asm__ ("foo\t%0" : "+"FPR (x));
+  return x;
+}
+
+double
+test_double_from_mem (double *x)
+{
+  __asm__ ("foo\t%0" : "+"FPR (*x));
+  return *x;
+}
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-5.c b/gcc/testsuite/gcc.dg/asm-hard-reg-5.c
new file mode 100644
index 00000000000..a9e25ce1746
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-5.c
@@ -0,0 +1,36 @@ 
+/* { dg-do compile { target aarch64*-*-* powerpc64*-*-* riscv64-*-* s390*-*-* x86_64-*-* } } */
+
+typedef int V __attribute__ ((vector_size (4 * sizeof (int))));
+
+#if defined (__aarch64__)
+# define VR "{v20}"
+/* { dg-final { scan-assembler-times "foo\tv20" 2 { target { aarch64*-*-* } } } } */
+#elif defined (__powerpc__) || defined (__POWERPC__)
+# define VR "{v5}"
+/* { dg-final { scan-assembler-times "foo\t5" 2 { target { powerpc64*-*-* } } } } */
+#elif defined (__riscv)
+# define VR "{v5}"
+/* { dg-additional-options "-march=rv64imv" { target riscv64-*-* } } */
+/* { dg-final { scan-assembler-times "foo\tv5" 2 { target { riscv*-*-* } } } } */
+#elif defined (__s390__)
+# define VR "{v5}"
+/* { dg-require-effective-target s390_mvx { target s390*-*-* } } */
+/* { dg-final { scan-assembler-times "foo\t%v5" 2 { target s390*-*-* } } } */
+#elif defined (__x86_64__)
+# define VR "{xmm9}"
+/* { dg-final { scan-assembler-times "foo\t%xmm9" 2 { target { x86_64-*-* } } } } */
+#endif
+
+V
+test (V x)
+{
+  __asm__ ("foo\t%0" : "+"VR (x));
+  return x;
+}
+
+V
+test_from_mem (V *x)
+{
+  __asm__ ("foo\t%0" : "+"VR (*x));
+  return *x;
+}
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-6.c b/gcc/testsuite/gcc.dg/asm-hard-reg-6.c
new file mode 100644
index 00000000000..d9b7fae8097
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-6.c
@@ -0,0 +1,60 @@ 
+/* { dg-do compile { target aarch64*-*-* arm*-*-* i?86-*-* powerpc*-*-* riscv*-*-* s390*-*-* x86_64-*-* } } */
+/* { dg-options "-O2" } */
+
+/* Test multiple alternatives.  */
+
+#if defined (__aarch64__)
+# define GPR1 "{x1}"
+# define GPR2 "{x2}"
+# define GPR3 "{x3}"
+/* { dg-final { scan-assembler-times "foo\tx1,x3" 1 { target { aarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times "bar\tx2,\\\[x1\\\]" 1 { target { aarch64*-*-* } } } } */
+#elif defined (__arm__)
+# define GPR1 "{r1}"
+# define GPR2 "{r2}"
+# define GPR3 "{r3}"
+/* { dg-final { scan-assembler-times "foo\tr1,r3" 1 { target { arm*-*-* } } } } */
+/* { dg-final { scan-assembler-times "bar\tr2,\\\[r1\\\]" 1 { target { arm*-*-* } } } } */
+#elif defined (__i386__)
+# define GPR1 "{eax}"
+# define GPR2 "{ebx}"
+# define GPR3 "{ecx}"
+/* { dg-final { scan-assembler-times "foo\t4\\(%esp\\),%ecx" 1 { target { i?86-*-* } } } } */
+/* { dg-final { scan-assembler-times "bar\t%ebx,\\(%eax\\)" 1 { target { i?86-*-* } } } } */
+#elif defined (__powerpc__) || defined (__POWERPC__)
+# define GPR1 "{r4}"
+# define GPR2 "{r5}"
+# define GPR3 "{r6}"
+/* { dg-final { scan-assembler-times "foo\t4,6" 1 { target { powerpc*-*-* } } } } */
+/* { dg-final { scan-assembler-times "bar\t5,0\\(4\\)" 1 { target { powerpc*-*-* } } } } */
+#elif defined (__riscv)
+# define GPR1 "{t1}"
+# define GPR2 "{t2}"
+# define GPR3 "{t3}"
+/* { dg-final { scan-assembler-times "foo\tt1,t3" 1 { target { riscv*-*-* } } } } */
+/* { dg-final { scan-assembler-times "bar\tt2,0\\(a1\\)" 1 { target { riscv*-*-* } } } } */
+#elif defined (__s390__)
+# define GPR1 "{r0}"
+# define GPR2 "{r1}"
+# define GPR3 "{r2}"
+/* { dg-final { scan-assembler-times "foo\t%r0,%r2" 1 { target { s390*-*-* } } } } */
+/* { dg-final { scan-assembler-times "bar\t%r1,0\\(%r3\\)" 1 { target { s390*-*-* } } } } */
+#elif defined (__x86_64__)
+# define GPR1 "{eax}"
+# define GPR2 "{ebx}"
+# define GPR3 "{rcx}"
+/* { dg-final { scan-assembler-times "foo\t%eax,%rcx" 1 { target { x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "bar\t%ebx,\\(%rsi\\)" 1 { target { x86_64-*-* } } } } */
+#endif
+
+void
+test_reg_reg (int x, long long *y)
+{
+  __asm__ ("foo\t%0,%1" :: GPR1"m,"GPR2 (x), GPR3",m" (y));
+}
+
+void
+test_reg_mem (int x, long long *y)
+{
+  __asm__ ("bar\t%0,%1" :: GPR1"m,"GPR2 (x), GPR3",m" (*y));
+}
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-7.c b/gcc/testsuite/gcc.dg/asm-hard-reg-7.c
new file mode 100644
index 00000000000..39c4497ecaf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-7.c
@@ -0,0 +1,70 @@ 
+/* { dg-do run { target aarch64*-*-* powerpc64*-*-* riscv64-*-* s390*-*-* x86_64-*-* } } */
+/* { dg-options "-O2" } */
+
+/* Pass parameter x in the first general argument register to the assembler
+   instruction.
+
+   In function bar we fail to do so because after the function call to foo,
+   variable argreg1 does not contain the value of x but rather 42 which got
+   passed to foo.  Thus, the function always returns 42.  In contrast in
+   function baz, variable x is saved over the function call and materializes in
+   the asm statement and therefore is returned.  */
+
+#if defined (__aarch64__)
+# define REG register int argreg1 __asm__ ("x0") = x;
+# define MOVE1 __asm__ ("mov\t%0,%1" : "=r" (out) : "r" (argreg1));
+# define MOVE2 __asm__ ("mov\t%0,%1" : "=r" (out) : "{x0}" (x));
+#elif defined (__powerpc__) || defined (__POWERPC__)
+# define REG register int argreg1 __asm__ ("r3") = x;
+# define MOVE1 __asm__ ("mr\t%0,%1" : "=r" (out) : "r" (argreg1));
+# define MOVE2 __asm__ ("mr\t%0,%1" : "=r" (out) : "{r3}" (x));
+#elif defined (__riscv)
+# define REG register int argreg1 __asm__ ("a0") = x;
+# define MOVE1 __asm__ ("mv\t%0,%1" : "=r" (out) : "r" (argreg1));
+# define MOVE2 __asm__ ("mv\t%0,%1" : "=r" (out) : "{a0}" (x));
+#elif defined (__s390__)
+# define REG register int argreg1 __asm__ ("r2") = x;
+# define MOVE1 __asm__ ("lr\t%0,%1" : "=r" (out) : "r" (argreg1));
+# define MOVE2 __asm__ ("lr\t%0,%1" : "=r" (out) : "{r2}" (x));
+#elif defined (__x86_64__)
+# define REG register int argreg1 __asm__ ("edi") = x;
+# define MOVE1 __asm__ ("mov\t%1,%0" : "=r" (out) : "r" (argreg1));
+# define MOVE2 __asm__ ("mov\t%1,%0" : "=r" (out) : "{edi}" (x));
+#endif
+
+__attribute__ ((noipa))
+int foo (int unused) { }
+
+int
+bar (int x)
+{
+  int out;
+  REG
+  foo (42);
+  MOVE1
+  return out;
+}
+
+int
+baz (int x)
+{
+  int out;
+  foo (42);
+  MOVE2
+  return out;
+}
+
+int
+main (void)
+{
+  if (bar (0) != 42
+      || bar (1) != 42
+      || bar (2) != 42
+      || bar (32) != 42
+      || baz (0) != 0
+      || baz (1) != 1
+      || baz (2) != 2
+      || baz (32) != 32)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-error-1.c b/gcc/testsuite/gcc.dg/asm-hard-reg-error-1.c
new file mode 100644
index 00000000000..6060c0946da
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-error-1.c
@@ -0,0 +1,67 @@ 
+/* { dg-do compile { target aarch64*-*-* arm*-*-* i?86-*-* powerpc*-*-* riscv*-*-* s390*-*-* x86_64-*-* } } */
+
+#if defined (__aarch64__)
+# define GPR1_RAW "x0"
+# define GPR2 "{x1}"
+# define GPR3 "{x2}"
+# define INVALID_GPR_A "{x31}"
+#elif defined (__arm__)
+# define GPR1_RAW "r0"
+# define GPR2 "{r1}"
+# define GPR3 "{r2}"
+# define INVALID_GPR_A "{r16}"
+#elif defined (__i386__)
+# define GPR1_RAW "%eax"
+# define GPR2 "{%ebx}"
+# define GPR3 "{%edx}"
+# define INVALID_GPR_A "{%eex}"
+#elif defined (__powerpc__) || defined (__POWERPC__)
+# define GPR1_RAW "r4"
+# define GPR2 "{r5}"
+# define GPR3 "{r6}"
+# define INVALID_GPR_A "{r33}"
+#elif defined (__riscv)
+# define GPR1_RAW "t4"
+# define GPR2 "{t5}"
+# define GPR3 "{t6}"
+# define INVALID_GPR_A "{t7}"
+#elif defined (__s390__)
+# define GPR1_RAW "r4"
+# define GPR2 "{r5}"
+# define GPR3 "{r6}"
+# define INVALID_GPR_A "{r17}"
+#elif defined (__x86_64__)
+# define GPR1_RAW "rax"
+# define GPR2 "{rbx}"
+# define GPR3 "{rcx}"
+# define INVALID_GPR_A "{rex}"
+#endif
+
+#define GPR1 "{"GPR1_RAW"}"
+#define INVALID_GPR_B "{"GPR1_RAW
+
+struct { int a[128]; } s = {0};
+
+void
+test (void)
+{
+  int x, y;
+  register int gpr1 __asm__ (GPR1_RAW) = 0;
+
+  __asm__ ("" :: "{}" (42)); /* { dg-error "invalid input constraint: \{\}" } */
+  __asm__ ("" :: INVALID_GPR_A (42)); /* { dg-error "invalid input constraint" } */
+  __asm__ ("" :: INVALID_GPR_B (42)); /* { dg-error "invalid input constraint" } */
+
+  __asm__ ("" :: GPR1 (s)); /* { dg-error "data type isn't suitable for register .* of operand 0" } */
+
+  __asm__ ("" :: "r" (gpr1), GPR1 (42)); /* { dg-error "multiple inputs to hard register" } */
+  __asm__ ("" :: GPR1 (42), "r" (gpr1)); /* { dg-error "multiple inputs to hard register" } */
+  __asm__ ("" :: GPR1 (42), GPR1 (42)); /* { dg-error "multiple inputs to hard register" } */
+  __asm__ ("" :: GPR1","GPR2 (42), GPR2","GPR3 (42));
+  __asm__ ("" :: GPR1","GPR2 (42), GPR3","GPR2 (42)); /* { dg-error "multiple inputs to hard register" } */
+  __asm__ ("" :: GPR1","GPR2 (42), GPR1","GPR3 (42)); /* { dg-error "multiple inputs to hard register" } */
+  __asm__ ("" :: GPR1 GPR2 (42), GPR2 (42)); /* { dg-error "multiple inputs to hard register" } */
+  __asm__ ("" : "+"GPR1 (x), "="GPR1 (y)); /* { dg-error "multiple outputs to hard register" } */
+  __asm__ ("" : "="GPR1 (y) : GPR1 (42), "0" (42)); /* { dg-error "multiple inputs to hard register" } */
+  __asm__ ("" : "+"GPR1 (x) : GPR1 (42)); /* { dg-error "multiple inputs to hard register" } */
+}
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-error-2.c b/gcc/testsuite/gcc.dg/asm-hard-reg-error-2.c
new file mode 100644
index 00000000000..efa843e0800
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-error-2.c
@@ -0,0 +1,19 @@ 
+/* { dg-do compile { target { { aarch64*-*-* s390x-*-* } && int128 } } } */
+/* { dg-options "-O2" } get rid of -ansi since we use __int128 */
+
+/* Test register pairs.  */
+
+#if defined (__aarch64__)
+# define GPR "{x4}"
+# define INVALID_GPR "{x5}"
+#elif defined (__s390__)
+# define GPR "{r4}"
+# define INVALID_GPR "{r5}"
+#endif
+
+void
+test (void)
+{
+  __asm__ ("" :: GPR ((__int128) 42));
+  __asm__ ("" :: INVALID_GPR ((__int128) 42)); /* { dg-error "register .* for operand 0 isn't suitable for data type" } */
+}
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-error-3.c b/gcc/testsuite/gcc.dg/asm-hard-reg-error-3.c
new file mode 100644
index 00000000000..ef8af5a6d52
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-error-3.c
@@ -0,0 +1,20 @@ 
+/* { dg-do compile { target arm*-*-* s390-*-* } } */
+/* { dg-options "-std=c99" } we need long long */
+/* { dg-additional-options "-march=armv8-a" { target arm*-*-* } } */
+
+/* Test register pairs.  */
+
+#if defined (__arm__)
+# define GPR "{r4}"
+# define INVALID_GPR "{r5}"
+#elif defined (__s390__)
+# define GPR "{r4}"
+# define INVALID_GPR "{r5}"
+#endif
+
+void
+test (void)
+{
+  __asm__ ("" :: GPR (42ll));
+  __asm__ ("" :: INVALID_GPR (42ll)); /* { dg-error "register .* for operand 0 isn't suitable for data type" } */
+}
diff --git a/gcc/testsuite/gcc.dg/pr87600-2.c b/gcc/testsuite/gcc.dg/pr87600-2.c
index e8a9f194b73..860d3f965ef 100644
--- a/gcc/testsuite/gcc.dg/pr87600-2.c
+++ b/gcc/testsuite/gcc.dg/pr87600-2.c
@@ -11,34 +11,6 @@  test0 (void)
 {
   register long var1 asm (REG1);
   register long var2 asm (REG1);
-  asm ("blah %0 %1" : "=r" (var1), "=r" (var2)); /* { dg-error "invalid hard register usage between output operands" } */
+  asm ("blah %0 %1" : "=r" (var1), "=r" (var2)); /* { dg-error "multiple outputs to hard register" } */
   return var1;
 }
-
-long
-test1 (void)
-{
-  register long var1 asm (REG1);
-  register long var2 asm (REG2);
-  asm ("blah %0 %1" : "=r" (var1) : "0" (var2)); /* { dg-error "invalid hard register usage between output operand and matching constraint operand" } */
-  return var1;
-}
-
-long
-test2 (void)
-{
-  register long var1 asm (REG1);
-  register long var2 asm (REG1);
-  asm ("blah %0 %1" : "=&r" (var1) : "r" (var2)); /* { dg-error "invalid hard register usage between earlyclobber operand and input operand" } */
-  return var1;
-}
-
-long
-test3 (void)
-{
-  register long var1 asm (REG1);
-  register long var2 asm (REG1);
-  long var3;
-  asm ("blah %0 %1" : "=&r" (var1), "=r" (var3) : "1" (var2)); /* { dg-error "invalid hard register usage between earlyclobber operand and input operand" } */
-  return var1 + var3;
-}
diff --git a/gcc/testsuite/gcc.dg/pr87600-3.c b/gcc/testsuite/gcc.dg/pr87600-3.c
new file mode 100644
index 00000000000..2673d004130
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr87600-3.c
@@ -0,0 +1,35 @@ 
+/* PR rtl-optimization/87600  */
+/* { dg-do compile { target aarch64*-*-* arm*-*-* i?86-*-* powerpc*-*-* s390*-*-* x86_64-*-* } } */
+/* { dg-options "-O2" } */
+
+#include "pr87600.h"
+
+/* The following are all invalid uses of local register variables.  */
+
+long
+test1 (void)
+{
+  register long var1 asm (REG1);
+  register long var2 asm (REG2);
+  asm ("blah %0 %1" : "=r" (var1) : "0" (var2)); /* { dg-error "invalid hard register usage between output operand and matching constraint operand" } */
+  return var1;
+}
+
+long
+test2 (void)
+{
+  register long var1 asm (REG1);
+  register long var2 asm (REG1);
+  asm ("blah %0 %1" : "=&r" (var1) : "r" (var2)); /* { dg-error "invalid hard register usage between earlyclobber operand and input operand" } */
+  return var1;
+}
+
+long
+test3 (void)
+{
+  register long var1 asm (REG1);
+  register long var2 asm (REG1);
+  long var3;
+  asm ("blah %0 %1" : "=&r" (var1), "=r" (var3) : "1" (var2)); /* { dg-error "invalid hard register usage between earlyclobber operand and input operand" } */
+  return var1 + var3;
+}
diff --git a/gcc/testsuite/gcc.target/s390/asm-hard-reg-1.c b/gcc/testsuite/gcc.target/s390/asm-hard-reg-1.c
new file mode 100644
index 00000000000..671c0ede6ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/asm-hard-reg-1.c
@@ -0,0 +1,103 @@ 
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O2 -march=z13 -mzarch" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+** test_in_1:
+**     foo	%r2
+**     br	%r14
+*/
+
+int
+test_in_1 (int x)
+{
+  asm ("foo	%0" :: "{r2}" (x));
+  return x;
+}
+
+/*
+** test_in_2:
+**     lgr	(%r[0-9]+),%r2
+**     lr	%r2,%r3
+**     foo	%r2
+**     lgr	%r2,\1
+**     br	%r14
+*/
+
+int
+test_in_2 (int x, int y)
+{
+  asm ("foo	%0" :: "{r2}" (y));
+  return x;
+}
+
+/*
+** test_in_3:
+**     stmg	%r12,%r15,96\(%r15\)
+**     lay	%r15,-160\(%r15\)
+**     lgr	(%r[0-9]+),%r2
+**     ahi	%r2,1
+**     lgfr	%r2,%r2
+**     brasl	%r14,foo@PLT
+**     lr	%r3,%r2
+**     lr	%r2,\1
+**     foo	%r3,%r2
+**     lgr	%r2,\1
+**     lmg	%r12,%r15,256\(%r15\)
+**     br	%r14
+*/
+
+extern int foo (int);
+
+int
+test_in_3 (int x)
+{
+  asm ("foo	%0,%1\n" :: "{r3}" (foo (x + 1)), "{r2}" (x));
+  return x;
+}
+
+/*
+** test_out_1:
+**     foo	%r3
+**     lgfr	%r2,%r3
+**     br	%r14
+*/
+
+int
+test_out_1 (void)
+{
+  int x;
+  asm ("foo	%0" : "={r3}" (x));
+  return x;
+}
+
+/*
+** test_out_2:
+**     lgr	(%r[0-9]+),%r2
+**     foo	%r2
+**     ark	(%r[0-9]+),\1,%r2
+**     lgfr	%r2,\2
+**     br	%r14
+*/
+
+int
+test_out_2 (int x)
+{
+  int y;
+  asm ("foo	%0" : "={r2}" (y));
+  return x + y;
+}
+
+/*
+** test_inout_1:
+**     foo	%r2
+**     lgfr	%r2,%r2
+**     br	%r14
+*/
+
+int
+test_inout_1 (int x)
+{
+  asm ("foo	%0" : "+{r2}" (x));
+  return x;
+}
diff --git a/gcc/testsuite/gcc.target/s390/asm-hard-reg-2.c b/gcc/testsuite/gcc.target/s390/asm-hard-reg-2.c
new file mode 100644
index 00000000000..a892fe8f0aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/asm-hard-reg-2.c
@@ -0,0 +1,43 @@ 
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O2 -march=z13 -mzarch" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+/* { dg-final { scan-assembler {\.LC0:\n\t\.long\t1078523331\n} } } */
+
+
+/*
+** test_float_into_gpr:
+**     lrl	%r4,.LC0
+**     foo	%r4
+**     br	%r14
+*/
+
+void
+test_float_into_gpr (void)
+{
+  // This is the counterpart to
+  //   register float x asm ("r4") = 3.14f;
+  //   asm ("foo	%0" :: "r" (x));
+  // where the bit-pattern of 3.14f is loaded into GPR.
+  asm ("foo	%0" :: "{r4}" (3.14f));
+}
+
+/*
+** test_float:
+** (
+**     ldr	%f4,%f0
+**     ldr	%f5,%f2
+** |
+**     ldr	%f5,%f2
+**     ldr	%f4,%f0
+** )
+**     aebr	%f5,%f4
+**     ldr	%f0,%f5
+**     br	%r14
+*/
+
+float
+test_float (float x, float y)
+{
+  asm ("aebr	%0,%1" : "+{f5}" (y) : "{f4}" (x));
+  return y;
+}
diff --git a/gcc/testsuite/gcc.target/s390/asm-hard-reg-3.c b/gcc/testsuite/gcc.target/s390/asm-hard-reg-3.c
new file mode 100644
index 00000000000..5df37b5b717
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/asm-hard-reg-3.c
@@ -0,0 +1,42 @@ 
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2 -march=z13 -mzarch" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+/* { dg-final { scan-assembler {\.LC0:\n\t\.long\t1074339512\n\t\.long\t1374389535\n} } } */
+
+/*
+** test_double_into_gpr:
+**     lgrl	%r4,.LC0
+**     foo	%r4
+**     br	%r14
+*/
+
+void
+test_double_into_gpr (void)
+{
+  // This is the counterpart to
+  //   register double x asm ("r4") = 3.14;
+  //   asm ("foo	%0" :: "r" (x));
+  // where the bit-pattern of 3.14 is loaded into GPR.
+  asm ("foo	%0" :: "{r4}" (3.14));
+}
+
+/*
+** test_double:
+** (
+**     ldr	%f4,%f0
+**     ldr	%f5,%f2
+** |
+**     ldr	%f5,%f2
+**     ldr	%f4,%f0
+** )
+**     adbr	%f5,%f4
+**     ldr	%f0,%f5
+**     br	%r14
+*/
+
+double
+test_double (double x, double y)
+{
+  asm ("adbr	%0,%1" : "+{f5}" (y) : "{f4}" (x));
+  return y;
+}
diff --git a/gcc/testsuite/lib/scanasm.exp b/gcc/testsuite/lib/scanasm.exp
index 42c719c512c..3c28ccec955 100644
--- a/gcc/testsuite/lib/scanasm.exp
+++ b/gcc/testsuite/lib/scanasm.exp
@@ -896,6 +896,10 @@  proc configure_check-function-bodies { config } {
 	set up_config(fluff) {^\s*(?://)}
     } elseif { [istarget *-*-darwin*] } {
 	set up_config(fluff) {^\s*(?:\.|//|@)|^L[0-9ABCESV]}
+    } elseif { [istarget s390*-*-*] } {
+	# Additionally to the defaults skip lines beginning with a # resulting
+	# from inline asm.
+	set up_config(fluff) {^\s*(?:\.|//|@|$|#)}
     } else {
 	# Skip lines beginning with labels ('.L[...]:') or other directives
 	# ('.align', '.cfi_startproc', '.quad [...]', '.text', etc.), '//' or