diff mbox series

Expand: Pass down equality only flag to cmpmem expand

Message ID deee9182-fe94-42a3-b53a-6336f6b1bec3@linux.ibm.com
State New
Headers show
Series Expand: Pass down equality only flag to cmpmem expand | expand

Commit Message

HAO CHEN GUI Nov. 28, 2023, 7:43 a.m. UTC
Hi,
  This patch passes down the equality only flags from
emit_block_cmp_hints to cmpmem optab so that the target specific expand
can generate optimized insns for equality only compare. Targets
(e.g. rs6000) can generate more efficient insn sequence if the block
compare is equality only.

  Bootstrapped and tested on x86 and powerpc64-linux BE and LE with
no regressions. Is this OK for trunk?

Thanks
Gui Haochen

ChangeLog
Expand: Pass down equality only flag to cmpmem expand

Targets (e.g. rs6000) can generate more efficient insn sequence if the
block compare is equality only.  This patch passes down the equality
only flags from emit_block_cmp_hints to cmpmem optab so that the target
specific expand can generate optimized insns for equality only compare.

gcc/
	* expr.cc (expand_cmpstrn_or_cmpmem): Rename to...
	(expand_cmpstrn): ...this.
	(expand_cmpmem): New function.  Pass down equality only flag to
	cmpmem expand.
	(emit_block_cmp_via_cmpmem): Add an argument for equality only
	flag and call expand_cmpmem instead of expand_cmpstrn_or_cmpmem.
	(emit_block_cmp_hints): Call emit_block_cmp_via_cmpmem with
	equality only flag.
	* expr.h (expand_cmpstrn, expand_cmpmem): Declare.
	* builtins.cc (expand_builtin_strcmp, expand_builtin_strncmp):
	Call expand_cmpstrn instead of expand_cmpstrn_or_cmpmem.
	* config/i386/i386.md (cmpmemsi): Add the sixth operand for
	equality only flag.
	* config/rs6000/rs6000.md (cmpmemsi): Likewise.
	* config/s390/s390.md (cmpmemsi): Likewise.
	* doc/md.texi (cmpmem): Modify the document and add an operand
	for equality only flag.

patch.diff

Comments

Kewen.Lin Nov. 28, 2023, 10:03 a.m. UTC | #1
Hi Haochen,

on 2023/11/28 15:43, HAO CHEN GUI wrote:
> Hi,
>   This patch passes down the equality only flags from
> emit_block_cmp_hints to cmpmem optab so that the target specific expand
> can generate optimized insns for equality only compare. Targets
> (e.g. rs6000) can generate more efficient insn sequence if the block
> compare is equality only.

Add more CCs since this patch changes generic part of code.

> 
>   Bootstrapped and tested on x86 and powerpc64-linux BE and LE with
> no regressions. Is this OK for trunk?
> 
> Thanks
> Gui Haochen
> 
> ChangeLog
> Expand: Pass down equality only flag to cmpmem expand
> 
> Targets (e.g. rs6000) can generate more efficient insn sequence if the
> block compare is equality only.  This patch passes down the equality
> only flags from emit_block_cmp_hints to cmpmem optab so that the target
> specific expand can generate optimized insns for equality only compare.
> 
> gcc/
> 	* expr.cc (expand_cmpstrn_or_cmpmem): Rename to...

Maybe we can still keep this function expand_cmpstrn_or_cmpmem but with
an additional argument like (int equality_only = -1, default as -1 means
the underlying optab expansion doesn't support equality_only, 1 and 0
stands for what you proposed), to avoid to duplicate expand_cmpstrn_or_cmpmem.

> 	(expand_cmpstrn): ...this.
> 	(expand_cmpmem): New function.  Pass down equality only flag to
> 	cmpmem expand.
> 	(emit_block_cmp_via_cmpmem): Add an argument for equality only
> 	flag and call expand_cmpmem instead of expand_cmpstrn_or_cmpmem.
> 	(emit_block_cmp_hints): Call emit_block_cmp_via_cmpmem with
> 	equality only flag.
> 	* expr.h (expand_cmpstrn, expand_cmpmem): Declare.
> 	* builtins.cc (expand_builtin_strcmp, expand_builtin_strncmp):
> 	Call expand_cmpstrn instead of expand_cmpstrn_or_cmpmem.
> 	* config/i386/i386.md (cmpmemsi): Add the sixth operand for
> 	equality only flag.
> 	* config/rs6000/rs6000.md (cmpmemsi): Likewise.
> 	* config/s390/s390.md (cmpmemsi): Likewise.
> 	* doc/md.texi (cmpmem): Modify the document and add an operand
> 	for equality only flag.
> 
> patch.diff
> diff --git a/gcc/builtins.cc b/gcc/builtins.cc
> index 5ece0d23eb9..c2dbc25433d 100644
> --- a/gcc/builtins.cc
> +++ b/gcc/builtins.cc
> @@ -4819,7 +4819,7 @@ expand_builtin_strcmp (tree exp, ATTRIBUTE_UNUSED rtx target)
>        if (len && !TREE_SIDE_EFFECTS (len))
>  	{
>  	  arg3_rtx = expand_normal (len);
> -	  result = expand_cmpstrn_or_cmpmem
> +	  result = expand_cmpstrn
>  	    (cmpstrn_icode, target, arg1_rtx, arg2_rtx, TREE_TYPE (len),
>  	     arg3_rtx, MIN (arg1_align, arg2_align));
>  	}
> @@ -4929,9 +4929,9 @@ expand_builtin_strncmp (tree exp, ATTRIBUTE_UNUSED rtx target,
>    rtx arg1_rtx = get_memory_rtx (arg1, len);
>    rtx arg2_rtx = get_memory_rtx (arg2, len);
>    rtx arg3_rtx = expand_normal (len);
> -  result = expand_cmpstrn_or_cmpmem (cmpstrn_icode, target, arg1_rtx,
> -				     arg2_rtx, TREE_TYPE (len), arg3_rtx,
> -				     MIN (arg1_align, arg2_align));
> +  result = expand_cmpstrn (cmpstrn_icode, target, arg1_rtx, arg2_rtx,
> +			   TREE_TYPE (len), arg3_rtx,
> +			   MIN (arg1_align, arg2_align));
> 
>    tree fndecl = get_callee_fndecl (exp);
>    if (result)
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 1b5a794b9e5..775cba5d93d 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -23195,7 +23195,8 @@ (define_expand "cmpmemsi"
>          (compare:SI (match_operand:BLK 1 "memory_operand" "")
>                      (match_operand:BLK 2 "memory_operand" "") ) )
>     (use (match_operand 3 "general_operand"))
> -   (use (match_operand 4 "immediate_operand"))]
> +   (use (match_operand 4 "immediate_operand"))
> +   (use (match_operand 5 ""))]

As the other operands are with predicates, maybe i386 folks want to
have "const_0_to_1_operand".

BR,
Kewen

>    ""
>  {
>    if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1],
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 2a1b5ecfaee..e66330f876e 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -10097,7 +10097,8 @@ (define_expand "cmpmemsi"
>                 (compare:SI (match_operand:BLK 1)
>                             (match_operand:BLK 2)))
>  	      (use (match_operand:SI 3))
> -	      (use (match_operand:SI 4))])]
> +	      (use (match_operand:SI 4))
> +	      (use (match_operand:SI 5))])]
>    "TARGET_POPCNTD"
>  {
>    if (expand_block_compare (operands))
> diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
> index 4bdb679daf2..506e79fb035 100644
> --- a/gcc/config/s390/s390.md
> +++ b/gcc/config/s390/s390.md
> @@ -3790,7 +3790,8 @@ (define_expand "cmpmemsi"
>          (compare:SI (match_operand:BLK 1 "memory_operand" "")
>                      (match_operand:BLK 2 "memory_operand" "") ) )
>     (use (match_operand:SI 3 "general_operand" ""))
> -   (use (match_operand:SI 4 "" ""))]
> +   (use (match_operand:SI 4 "" ""))
> +   (use (match_operand:SI 5 "" ""))]
>    ""
>  {
>    if (s390_expand_cmpmem (operands[0], operands[1],
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index e01cdcbe22c..06955cd7e78 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -6992,14 +6992,19 @@ result of the comparison.
> 
>  @cindex @code{cmpmem@var{m}} instruction pattern
>  @item @samp{cmpmem@var{m}}
> -Block compare instruction, with five operands like the operands
> -of @samp{cmpstr@var{m}}.  The two memory blocks specified are compared
> -byte by byte in lexicographic order starting at the beginning of each
> -block.  Unlike @samp{cmpstr@var{m}} the instruction can prefetch
> -any bytes in the two memory blocks.  Also unlike @samp{cmpstr@var{m}}
> -the comparison will not stop if both bytes are zero.  The effect of
> -the instruction is to store a value in operand 0 whose sign indicates
> -the result of the comparison.
> +Block compare instruction, with six operands.  The first five operands are
> +like the operands of @samp{cmpstr@var{m}}.  The last operand indicates
> +whether the comparison is equality or not.  Value one means it's an
> +equality only compare and zero means it's a non-equality compare.
> +
> +The two memory blocks specified are compared byte by byte in lexicographic
> +order starting at the beginning of each block.  Unlike @samp{cmpstr@var{m}}
> +the instruction can prefetch any bytes in the two memory blocks.  Also
> +unlike @samp{cmpstr@var{m}} the comparison will not stop if both bytes are
> +zero.  When last operand is zero, the effect of the instruction is to store
> +a value in operand 0 whose sign indicates the result of the comparison.
> +When last operand is one, zero in operand 0 indicates two blocks are equal.
> +All other values in operand 0 indicate two blocks are not equal.
> 
>  @cindex @code{strlen@var{m}} instruction pattern
>  @item @samp{strlen@var{m}}
> diff --git a/gcc/expr.cc b/gcc/expr.cc
> index 6dd9b8f2ce6..3cdc5181bd3 100644
> --- a/gcc/expr.cc
> +++ b/gcc/expr.cc
> @@ -2381,14 +2381,13 @@ emit_block_op_via_libcall (enum built_in_function fncode, rtx dst, rtx src,
>    return expand_call (call_expr, NULL_RTX, false);
>  }
> 
> -/* Try to expand cmpstrn or cmpmem operation ICODE with the given operands.
> +/* Try to expand cmpstrn operation ICODE with the given operands.
>     ARG3_TYPE is the type of ARG3_RTX.  Return the result rtx on success,
>     otherwise return null.  */
> 
>  rtx
> -expand_cmpstrn_or_cmpmem (insn_code icode, rtx target, rtx arg1_rtx,
> -			  rtx arg2_rtx, tree arg3_type, rtx arg3_rtx,
> -			  HOST_WIDE_INT align)
> +expand_cmpstrn (insn_code icode, rtx target, rtx arg1_rtx, rtx arg2_rtx,
> +		tree arg3_type, rtx arg3_rtx, HOST_WIDE_INT align)
>  {
>    machine_mode insn_mode = insn_data[icode].operand[0].mode;
> 
> @@ -2407,6 +2406,34 @@ expand_cmpstrn_or_cmpmem (insn_code icode, rtx target, rtx arg1_rtx,
>    return NULL_RTX;
>  }
> 
> +/* Similar as expand_cmpstrn, the last operand indicates whether it is a
> +   equality comparison or not.  */
> +rtx
> +expand_cmpmem (insn_code icode, rtx target, rtx arg1_rtx, rtx arg2_rtx,
> +	       tree arg3_type, rtx arg3_rtx, HOST_WIDE_INT align,
> +	       bool equality_only)
> +{
> +  machine_mode insn_mode = insn_data[icode].operand[0].mode;
> +
> +  if (target && (!REG_P (target) || HARD_REGISTER_P (target)))
> +    target = NULL_RTX;
> +
> +  class expand_operand ops[6];
> +  create_output_operand (&ops[0], target, insn_mode);
> +  create_fixed_operand (&ops[1], arg1_rtx);
> +  create_fixed_operand (&ops[2], arg2_rtx);
> +  create_convert_operand_from (&ops[3], arg3_rtx, TYPE_MODE (arg3_type),
> +			       TYPE_UNSIGNED (arg3_type));
> +  create_integer_operand (&ops[4], align);
> +  if (equality_only)
> +    create_integer_operand (&ops[5], 1);
> +  else
> +    create_integer_operand (&ops[5], 0);
> +  if (maybe_expand_insn (icode, 6, ops))
> +    return ops[0].value;
> +  return NULL_RTX;
> +}
> +
>  /* Expand a block compare between X and Y with length LEN using the
>     cmpmem optab, placing the result in TARGET.  LEN_TYPE is the type
>     of the expression that was used to calculate the length.  ALIGN
> @@ -2414,7 +2441,7 @@ expand_cmpstrn_or_cmpmem (insn_code icode, rtx target, rtx arg1_rtx,
> 
>  static rtx
>  emit_block_cmp_via_cmpmem (rtx x, rtx y, rtx len, tree len_type, rtx target,
> -			   unsigned align)
> +			   unsigned align, bool equality_only)
>  {
>    /* Note: The cmpstrnsi pattern, if it exists, is not suitable for
>       implementing memcmp because it will stop if it encounters two
> @@ -2424,7 +2451,8 @@ emit_block_cmp_via_cmpmem (rtx x, rtx y, rtx len, tree len_type, rtx target,
>    if (icode == CODE_FOR_nothing)
>      return NULL_RTX;
> 
> -  return expand_cmpstrn_or_cmpmem (icode, target, x, y, len_type, len, align);
> +  return expand_cmpmem (icode, target, x, y, len_type, len, align,
> +			equality_only);
>  }
> 
>  /* Emit code to compare a block Y to a block X.  This may be done with
> @@ -2469,7 +2497,8 @@ emit_block_cmp_hints (rtx x, rtx y, rtx len, tree len_type, rtx target,
>      result = compare_by_pieces (x, y, INTVAL (len), target, align,
>  				y_cfn, y_cfndata);
>    else
> -    result = emit_block_cmp_via_cmpmem (x, y, len, len_type, target, align);
> +    result = emit_block_cmp_via_cmpmem (x, y, len, len_type, target, align,
> +					equality_only);
> 
>    return result;
>  }
> diff --git a/gcc/expr.h b/gcc/expr.h
> index 2a172867fdb..64dbbcfcaad 100644
> --- a/gcc/expr.h
> +++ b/gcc/expr.h
> @@ -199,8 +199,9 @@ extern void use_regs (rtx *, int, int);
>  extern void use_group_regs (rtx *, rtx);
> 
>  #ifdef GCC_INSN_CODES_H
> -extern rtx expand_cmpstrn_or_cmpmem (insn_code, rtx, rtx, rtx, tree, rtx,
> -				     HOST_WIDE_INT);
> +extern rtx expand_cmpstrn (insn_code, rtx, rtx, rtx, tree, rtx, HOST_WIDE_INT);
> +extern rtx expand_cmpmem (insn_code, rtx, rtx, rtx, tree, rtx, HOST_WIDE_INT,
> +			  bool);
>  #endif
> 
>  /* Write zeros through the storage of OBJECT.
Jeff Law Dec. 8, 2023, 12:05 a.m. UTC | #2
On 11/28/23 00:43, HAO CHEN GUI wrote:
> Hi,
>    This patch passes down the equality only flags from
> emit_block_cmp_hints to cmpmem optab so that the target specific expand
> can generate optimized insns for equality only compare. Targets
> (e.g. rs6000) can generate more efficient insn sequence if the block
> compare is equality only.
> 
>    Bootstrapped and tested on x86 and powerpc64-linux BE and LE with
> no regressions. Is this OK for trunk?
> 
> Thanks
> Gui Haochen
> 
> ChangeLog
> Expand: Pass down equality only flag to cmpmem expand
> 
> Targets (e.g. rs6000) can generate more efficient insn sequence if the
> block compare is equality only.  This patch passes down the equality
> only flags from emit_block_cmp_hints to cmpmem optab so that the target
> specific expand can generate optimized insns for equality only compare.
> 
> gcc/
> 	* expr.cc (expand_cmpstrn_or_cmpmem): Rename to...
> 	(expand_cmpstrn): ...this.
> 	(expand_cmpmem): New function.  Pass down equality only flag to
> 	cmpmem expand.
> 	(emit_block_cmp_via_cmpmem): Add an argument for equality only
> 	flag and call expand_cmpmem instead of expand_cmpstrn_or_cmpmem.
> 	(emit_block_cmp_hints): Call emit_block_cmp_via_cmpmem with
> 	equality only flag.
> 	* expr.h (expand_cmpstrn, expand_cmpmem): Declare.
> 	* builtins.cc (expand_builtin_strcmp, expand_builtin_strncmp):
> 	Call expand_cmpstrn instead of expand_cmpstrn_or_cmpmem.
> 	* config/i386/i386.md (cmpmemsi): Add the sixth operand for
> 	equality only flag.
> 	* config/rs6000/rs6000.md (cmpmemsi): Likewise.
> 	* config/s390/s390.md (cmpmemsi): Likewise.
> 	* doc/md.texi (cmpmem): Modify the document and add an operand
> 	for equality only flag.
This doesn't appear to fix a bug (no bug refrenced) and I don't see a 
version of this patch posted before stage1 closed (Nov 19).  So I think 
this needs to defer until gcc-15.

jeff
Jeff Law July 9, 2024, 11:35 p.m. UTC | #3
On 11/28/23 12:43 AM, HAO CHEN GUI wrote:
> Hi,
>    This patch passes down the equality only flags from
> emit_block_cmp_hints to cmpmem optab so that the target specific expand
> can generate optimized insns for equality only compare. Targets
> (e.g. rs6000) can generate more efficient insn sequence if the block
> compare is equality only.
> 
>    Bootstrapped and tested on x86 and powerpc64-linux BE and LE with
> no regressions. Is this OK for trunk?
> 
> Thanks
> Gui Haochen
> 
> ChangeLog
> Expand: Pass down equality only flag to cmpmem expand
> 
> Targets (e.g. rs6000) can generate more efficient insn sequence if the
> block compare is equality only.  This patch passes down the equality
> only flags from emit_block_cmp_hints to cmpmem optab so that the target
> specific expand can generate optimized insns for equality only compare.
> 
> gcc/
> 	* expr.cc (expand_cmpstrn_or_cmpmem): Rename to...
> 	(expand_cmpstrn): ...this.
> 	(expand_cmpmem): New function.  Pass down equality only flag to
> 	cmpmem expand.
> 	(emit_block_cmp_via_cmpmem): Add an argument for equality only
> 	flag and call expand_cmpmem instead of expand_cmpstrn_or_cmpmem.
> 	(emit_block_cmp_hints): Call emit_block_cmp_via_cmpmem with
> 	equality only flag.
> 	* expr.h (expand_cmpstrn, expand_cmpmem): Declare.
> 	* builtins.cc (expand_builtin_strcmp, expand_builtin_strncmp):
> 	Call expand_cmpstrn instead of expand_cmpstrn_or_cmpmem.
> 	* config/i386/i386.md (cmpmemsi): Add the sixth operand for
> 	equality only flag.
> 	* config/rs6000/rs6000.md (cmpmemsi): Likewise.
> 	* config/s390/s390.md (cmpmemsi): Likewise.
> 	* doc/md.texi (cmpmem): Modify the document and add an operand
> 	for equality only flag.
Is this patch still relevant?  It was submitted after stage1 closed for 
gcc-14.  With the trunk open for development, you should probably rebase 
and repost if the patch is still relevant/useful.

Conceptually knowing that we just want to do an equality comparison 
seems useful.  I think there are other places where we track this 
information and utilize it to improve initial code generation.

jeff
HAO CHEN GUI July 10, 2024, 1:38 a.m. UTC | #4
Hi Jeff,

在 2024/7/10 7:35, Jeff Law 写道:
> Is this patch still relevant?  It was submitted after stage1 closed for gcc-14.  With the trunk open for development, you should probably rebase and repost if the patch is still relevant/useful.
> 
> Conceptually knowing that we just want to do an equality comparison seems useful.  I think there are other places where we track this information and utilize it to improve initial code generation.

The patch and its sequential patches are suspending as I am working
on other issues. I will come back after completing the task at hand.

Thanks
Gui Haochen
diff mbox series

Patch

diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index 5ece0d23eb9..c2dbc25433d 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -4819,7 +4819,7 @@  expand_builtin_strcmp (tree exp, ATTRIBUTE_UNUSED rtx target)
       if (len && !TREE_SIDE_EFFECTS (len))
 	{
 	  arg3_rtx = expand_normal (len);
-	  result = expand_cmpstrn_or_cmpmem
+	  result = expand_cmpstrn
 	    (cmpstrn_icode, target, arg1_rtx, arg2_rtx, TREE_TYPE (len),
 	     arg3_rtx, MIN (arg1_align, arg2_align));
 	}
@@ -4929,9 +4929,9 @@  expand_builtin_strncmp (tree exp, ATTRIBUTE_UNUSED rtx target,
   rtx arg1_rtx = get_memory_rtx (arg1, len);
   rtx arg2_rtx = get_memory_rtx (arg2, len);
   rtx arg3_rtx = expand_normal (len);
-  result = expand_cmpstrn_or_cmpmem (cmpstrn_icode, target, arg1_rtx,
-				     arg2_rtx, TREE_TYPE (len), arg3_rtx,
-				     MIN (arg1_align, arg2_align));
+  result = expand_cmpstrn (cmpstrn_icode, target, arg1_rtx, arg2_rtx,
+			   TREE_TYPE (len), arg3_rtx,
+			   MIN (arg1_align, arg2_align));

   tree fndecl = get_callee_fndecl (exp);
   if (result)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 1b5a794b9e5..775cba5d93d 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -23195,7 +23195,8 @@  (define_expand "cmpmemsi"
         (compare:SI (match_operand:BLK 1 "memory_operand" "")
                     (match_operand:BLK 2 "memory_operand" "") ) )
    (use (match_operand 3 "general_operand"))
-   (use (match_operand 4 "immediate_operand"))]
+   (use (match_operand 4 "immediate_operand"))
+   (use (match_operand 5 ""))]
   ""
 {
   if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1],
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 2a1b5ecfaee..e66330f876e 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -10097,7 +10097,8 @@  (define_expand "cmpmemsi"
                (compare:SI (match_operand:BLK 1)
                            (match_operand:BLK 2)))
 	      (use (match_operand:SI 3))
-	      (use (match_operand:SI 4))])]
+	      (use (match_operand:SI 4))
+	      (use (match_operand:SI 5))])]
   "TARGET_POPCNTD"
 {
   if (expand_block_compare (operands))
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 4bdb679daf2..506e79fb035 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -3790,7 +3790,8 @@  (define_expand "cmpmemsi"
         (compare:SI (match_operand:BLK 1 "memory_operand" "")
                     (match_operand:BLK 2 "memory_operand" "") ) )
    (use (match_operand:SI 3 "general_operand" ""))
-   (use (match_operand:SI 4 "" ""))]
+   (use (match_operand:SI 4 "" ""))
+   (use (match_operand:SI 5 "" ""))]
   ""
 {
   if (s390_expand_cmpmem (operands[0], operands[1],
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index e01cdcbe22c..06955cd7e78 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -6992,14 +6992,19 @@  result of the comparison.

 @cindex @code{cmpmem@var{m}} instruction pattern
 @item @samp{cmpmem@var{m}}
-Block compare instruction, with five operands like the operands
-of @samp{cmpstr@var{m}}.  The two memory blocks specified are compared
-byte by byte in lexicographic order starting at the beginning of each
-block.  Unlike @samp{cmpstr@var{m}} the instruction can prefetch
-any bytes in the two memory blocks.  Also unlike @samp{cmpstr@var{m}}
-the comparison will not stop if both bytes are zero.  The effect of
-the instruction is to store a value in operand 0 whose sign indicates
-the result of the comparison.
+Block compare instruction, with six operands.  The first five operands are
+like the operands of @samp{cmpstr@var{m}}.  The last operand indicates
+whether the comparison is equality or not.  Value one means it's an
+equality only compare and zero means it's a non-equality compare.
+
+The two memory blocks specified are compared byte by byte in lexicographic
+order starting at the beginning of each block.  Unlike @samp{cmpstr@var{m}}
+the instruction can prefetch any bytes in the two memory blocks.  Also
+unlike @samp{cmpstr@var{m}} the comparison will not stop if both bytes are
+zero.  When last operand is zero, the effect of the instruction is to store
+a value in operand 0 whose sign indicates the result of the comparison.
+When last operand is one, zero in operand 0 indicates two blocks are equal.
+All other values in operand 0 indicate two blocks are not equal.

 @cindex @code{strlen@var{m}} instruction pattern
 @item @samp{strlen@var{m}}
diff --git a/gcc/expr.cc b/gcc/expr.cc
index 6dd9b8f2ce6..3cdc5181bd3 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -2381,14 +2381,13 @@  emit_block_op_via_libcall (enum built_in_function fncode, rtx dst, rtx src,
   return expand_call (call_expr, NULL_RTX, false);
 }

-/* Try to expand cmpstrn or cmpmem operation ICODE with the given operands.
+/* Try to expand cmpstrn operation ICODE with the given operands.
    ARG3_TYPE is the type of ARG3_RTX.  Return the result rtx on success,
    otherwise return null.  */

 rtx
-expand_cmpstrn_or_cmpmem (insn_code icode, rtx target, rtx arg1_rtx,
-			  rtx arg2_rtx, tree arg3_type, rtx arg3_rtx,
-			  HOST_WIDE_INT align)
+expand_cmpstrn (insn_code icode, rtx target, rtx arg1_rtx, rtx arg2_rtx,
+		tree arg3_type, rtx arg3_rtx, HOST_WIDE_INT align)
 {
   machine_mode insn_mode = insn_data[icode].operand[0].mode;

@@ -2407,6 +2406,34 @@  expand_cmpstrn_or_cmpmem (insn_code icode, rtx target, rtx arg1_rtx,
   return NULL_RTX;
 }

+/* Similar as expand_cmpstrn, the last operand indicates whether it is a
+   equality comparison or not.  */
+rtx
+expand_cmpmem (insn_code icode, rtx target, rtx arg1_rtx, rtx arg2_rtx,
+	       tree arg3_type, rtx arg3_rtx, HOST_WIDE_INT align,
+	       bool equality_only)
+{
+  machine_mode insn_mode = insn_data[icode].operand[0].mode;
+
+  if (target && (!REG_P (target) || HARD_REGISTER_P (target)))
+    target = NULL_RTX;
+
+  class expand_operand ops[6];
+  create_output_operand (&ops[0], target, insn_mode);
+  create_fixed_operand (&ops[1], arg1_rtx);
+  create_fixed_operand (&ops[2], arg2_rtx);
+  create_convert_operand_from (&ops[3], arg3_rtx, TYPE_MODE (arg3_type),
+			       TYPE_UNSIGNED (arg3_type));
+  create_integer_operand (&ops[4], align);
+  if (equality_only)
+    create_integer_operand (&ops[5], 1);
+  else
+    create_integer_operand (&ops[5], 0);
+  if (maybe_expand_insn (icode, 6, ops))
+    return ops[0].value;
+  return NULL_RTX;
+}
+
 /* Expand a block compare between X and Y with length LEN using the
    cmpmem optab, placing the result in TARGET.  LEN_TYPE is the type
    of the expression that was used to calculate the length.  ALIGN
@@ -2414,7 +2441,7 @@  expand_cmpstrn_or_cmpmem (insn_code icode, rtx target, rtx arg1_rtx,

 static rtx
 emit_block_cmp_via_cmpmem (rtx x, rtx y, rtx len, tree len_type, rtx target,
-			   unsigned align)
+			   unsigned align, bool equality_only)
 {
   /* Note: The cmpstrnsi pattern, if it exists, is not suitable for
      implementing memcmp because it will stop if it encounters two
@@ -2424,7 +2451,8 @@  emit_block_cmp_via_cmpmem (rtx x, rtx y, rtx len, tree len_type, rtx target,
   if (icode == CODE_FOR_nothing)
     return NULL_RTX;

-  return expand_cmpstrn_or_cmpmem (icode, target, x, y, len_type, len, align);
+  return expand_cmpmem (icode, target, x, y, len_type, len, align,
+			equality_only);
 }

 /* Emit code to compare a block Y to a block X.  This may be done with
@@ -2469,7 +2497,8 @@  emit_block_cmp_hints (rtx x, rtx y, rtx len, tree len_type, rtx target,
     result = compare_by_pieces (x, y, INTVAL (len), target, align,
 				y_cfn, y_cfndata);
   else
-    result = emit_block_cmp_via_cmpmem (x, y, len, len_type, target, align);
+    result = emit_block_cmp_via_cmpmem (x, y, len, len_type, target, align,
+					equality_only);

   return result;
 }
diff --git a/gcc/expr.h b/gcc/expr.h
index 2a172867fdb..64dbbcfcaad 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -199,8 +199,9 @@  extern void use_regs (rtx *, int, int);
 extern void use_group_regs (rtx *, rtx);

 #ifdef GCC_INSN_CODES_H
-extern rtx expand_cmpstrn_or_cmpmem (insn_code, rtx, rtx, rtx, tree, rtx,
-				     HOST_WIDE_INT);
+extern rtx expand_cmpstrn (insn_code, rtx, rtx, rtx, tree, rtx, HOST_WIDE_INT);
+extern rtx expand_cmpmem (insn_code, rtx, rtx, rtx, tree, rtx, HOST_WIDE_INT,
+			  bool);
 #endif

 /* Write zeros through the storage of OBJECT.