diff mbox series

[RFC/RFA,v5,05/12] i386: Implement new expander for efficient CRC computation.

Message ID CAE65F3OO=6r3W99msWZLhg+RLXYXAs1ssnvYKejNg4HihxaMoA@mail.gmail.com
State New
Headers show
Series CRC optimization. | expand

Commit Message

Mariam Arutunian Oct. 18, 2024, 3:01 p.m. UTC
This patch introduces two new expanders for the i386 backend,
dedicated to generating optimized code for CRC computations.
The new expanders are designed to leverage specific hardware capabilities
to achieve faster CRC calculations,
particularly using the pclmulqdq or crc32 instructions when supported by
the target architecture.

Expander 1: Bit-Forward CRC (crc<SWI124dup:mode><SWI124:mode>4)
For targets that support both pclmulqdq instruction (TARGET_PCLMUL) and are
64-bit (TARGET_64BIT),
the expander will generate code that uses the pclmulqdq instruction for CRC
computation.

Expander 2: Bit-Reversed CRC (crc_rev<SWI124dup:mode><SWI124:mode>4)
The expander first checks if the target supports the CRC32 instruction set
(TARGET_CRC32)
and the polynomial in use is 0x1EDC6F41 (iSCSI). If the conditions are met,
it emits calls to the corresponding crc32 instruction (crc32b, crc32w, or
crc32l depending on the data size).
If the target does not support crc32 but supports pclmulqdq, it then uses
the pclmulqdq instruction for bit-reversed CRC computation.

Otherwise table-based CRC is generated.

  gcc/config/i386/

    * i386-protos.h (ix86_expand_crc_using_pclmul): New extern function
declaration.
    (ix86_expand_reversed_crc_using_pclmul):  Likewise.
    * i386.cc (ix86_expand_crc_using_pclmul): New function.
    (ix86_expand_reversed_crc_using_pclmul):  Likewise.
    * i386.md (UNSPEC_CRC, UNSPEC_CRC_REV):  New unspecs.
    (SWI124dup): New iterator.
    (crc<SWI124dup:mode><SWI124:mode>4): New expander for bit-forward CRC.
    (crc_rev<SWI124dup:mode><SWI124:mode>4): New expander for reversed CRC.

  gcc/testsuite/gcc.target/i386/

    * crc-crc32-data16.c: New test.
    * crc-crc32-data32.c: Likewise.
    * crc-crc32-data8.c: Likewise.
    * crc-1-pclmul.c: Likewise.
    * crc-10-pclmul.c: Likewise.
    * crc-12-pclmul.c: Likewise.
    * crc-13-pclmul.c: Likewise.
    * crc-14-pclmul.c: Likewise.
    * crc-17-pclmul.c: Likewise.
    * crc-18-pclmul.c: Likewise.
    * crc-21-pclmul.c: Likewise.
    * crc-22-pclmul.c: Likewise.
    * crc-23-pclmul.c: Likewise.
    * crc-4-pclmul.c: Likewise.
    * crc-5-pclmul.c: Likewise.
    * crc-6-pclmul.c: Likewise.
    * crc-7-pclmul.c: Likewise.
    * crc-8-pclmul.c: Likewise.
    * crc-9-pclmul.c: Likewise.
    * crc-CCIT-data16-pclmul.c: Likewise.
    * crc-CCIT-data8-pclmul.c: Likewise.
    * crc-coremark-16bitdata-pclmul.c: Likewise.

Signed-off-by: Mariam Arutunian <mariamarutunian@gmail.com>
Mentored-by: Jeff Law <jlaw@ventanamicro.com>
diff mbox series

Patch

---
 gcc/config/i386/i386-protos.h                 |   2 +
 gcc/config/i386/i386.cc                       | 129 ++++++++++++++++++
 gcc/config/i386/i386.md                       |  59 ++++++++
 gcc/testsuite/gcc.target/i386/crc-1-pclmul.c  |   8 ++
 gcc/testsuite/gcc.target/i386/crc-10-pclmul.c |   8 ++
 gcc/testsuite/gcc.target/i386/crc-12-pclmul.c |   9 ++
 gcc/testsuite/gcc.target/i386/crc-13-pclmul.c |   8 ++
 gcc/testsuite/gcc.target/i386/crc-14-pclmul.c |   8 ++
 gcc/testsuite/gcc.target/i386/crc-17-pclmul.c |   8 ++
 gcc/testsuite/gcc.target/i386/crc-18-pclmul.c |   8 ++
 gcc/testsuite/gcc.target/i386/crc-21-pclmul.c |   8 ++
 gcc/testsuite/gcc.target/i386/crc-22-pclmul.c |   8 ++
 gcc/testsuite/gcc.target/i386/crc-23-pclmul.c |   8 ++
 gcc/testsuite/gcc.target/i386/crc-4-pclmul.c  |   8 ++
 gcc/testsuite/gcc.target/i386/crc-5-pclmul.c  |   9 ++
 gcc/testsuite/gcc.target/i386/crc-6-pclmul.c  |   8 ++
 gcc/testsuite/gcc.target/i386/crc-7-pclmul.c  |   8 ++
 gcc/testsuite/gcc.target/i386/crc-8-pclmul.c  |   8 ++
 gcc/testsuite/gcc.target/i386/crc-9-pclmul.c  |   8 ++
 .../gcc.target/i386/crc-CCIT-data16-pclmul.c  |   9 ++
 .../gcc.target/i386/crc-CCIT-data8-pclmul.c   |   9 ++
 .../i386/crc-coremark-16bitdata-pclmul.c      |   9 ++
 .../gcc.target/i386/crc-crc32-data16.c        |  53 +++++++
 .../gcc.target/i386/crc-crc32-data32.c        |  53 +++++++
 .../gcc.target/i386/crc-crc32-data8.c         |  53 +++++++
 25 files changed, 506 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/crc-1-pclmul.c
 create mode 100644 gcc/testsuite/gcc.target/i386/crc-10-pclmul.c
 create mode 100644 gcc/testsuite/gcc.target/i386/crc-12-pclmul.c
 create mode 100644 gcc/testsuite/gcc.target/i386/crc-13-pclmul.c
 create mode 100644 gcc/testsuite/gcc.target/i386/crc-14-pclmul.c
 create mode 100644 gcc/testsuite/gcc.target/i386/crc-17-pclmul.c
 create mode 100644 gcc/testsuite/gcc.target/i386/crc-18-pclmul.c
 create mode 100644 gcc/testsuite/gcc.target/i386/crc-21-pclmul.c
 create mode 100644 gcc/testsuite/gcc.target/i386/crc-22-pclmul.c
 create mode 100644 gcc/testsuite/gcc.target/i386/crc-23-pclmul.c
 create mode 100644 gcc/testsuite/gcc.target/i386/crc-4-pclmul.c
 create mode 100644 gcc/testsuite/gcc.target/i386/crc-5-pclmul.c
 create mode 100644 gcc/testsuite/gcc.target/i386/crc-6-pclmul.c
 create mode 100644 gcc/testsuite/gcc.target/i386/crc-7-pclmul.c
 create mode 100644 gcc/testsuite/gcc.target/i386/crc-8-pclmul.c
 create mode 100644 gcc/testsuite/gcc.target/i386/crc-9-pclmul.c
 create mode 100644 gcc/testsuite/gcc.target/i386/crc-CCIT-data16-pclmul.c
 create mode 100644 gcc/testsuite/gcc.target/i386/crc-CCIT-data8-pclmul.c
 create mode 100644 gcc/testsuite/gcc.target/i386/crc-coremark-16bitdata-pclmul.c
 create mode 100644 gcc/testsuite/gcc.target/i386/crc-crc32-data16.c
 create mode 100644 gcc/testsuite/gcc.target/i386/crc-crc32-data32.c
 create mode 100644 gcc/testsuite/gcc.target/i386/crc-crc32-data8.c

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 3a7bc949e56..9190788b9a7 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -303,6 +303,8 @@  extern bool ix86_expand_vector_init_duplicate (bool, machine_mode, rtx,
 extern bool ix86_expand_vector_init_one_nonzero (bool, machine_mode, rtx,
 						 rtx, int);
 extern bool ix86_extract_perm_from_pool_constant (int*, rtx);
+extern void ix86_expand_crc_using_pclmul (rtx *);
+extern void ix86_expand_reversed_crc_using_pclmul (rtx *);
 
 /* In i386-c.cc  */
 extern void ix86_target_macros (void);
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 707b75a6d5d..559af97c595 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -26280,6 +26280,135 @@  ix86_run_selftests (void)
 
 } // namespace selftest
 
+/* Generate assembly to calculate CRC using pclmulqdq instruction.
+   OPERANDS[1] is input CRC,
+   OPERANDS[2] is data (message),
+   OPERANDS[3] is the polynomial without the leading 1.  */
+
+void
+ix86_expand_crc_using_pclmul (rtx *operands)
+{
+/* Check and keep arguments.  */
+  gcc_assert (!CONST_INT_P (operands[0]));
+  gcc_assert (CONST_INT_P (operands[3]));
+  rtx crc = operands[1];
+  rtx data = operands[2];
+  unsigned HOST_WIDE_INT crc_size = GET_MODE_BITSIZE (GET_MODE (operands[0]));
+  gcc_assert (crc_size <= 32);
+  unsigned HOST_WIDE_INT data_size = GET_MODE_BITSIZE (GET_MODE (data));
+  unsigned HOST_WIDE_INT DImode_size = GET_MODE_BITSIZE (DImode);
+
+  /* Calculate the quotient.  */
+  unsigned HOST_WIDE_INT
+      q = gf2n_poly_long_div_quotient (UINTVAL (operands[3]), crc_size);
+
+  if (crc_size > data_size)
+    crc = expand_shift (RSHIFT_EXPR, DImode, crc, crc_size - data_size,
+			NULL_RTX, 1);
+
+  /* Keep the quotient in V2DImode.  */
+  rtx q_v2di = gen_reg_rtx (V2DImode);
+  rtx quotient = gen_reg_rtx (DImode);
+  convert_move (quotient, gen_int_mode (q, DImode), 0);
+  emit_insn (gen_vec_concatv2di (q_v2di, quotient, const0_rtx));
+
+  /* crc ^ data and keep in V2DImode.  */
+  rtx cd_xor = expand_binop (DImode, xor_optab, crc, data, NULL_RTX, 1,
+			     OPTAB_WIDEN);
+  rtx res = gen_reg_rtx (V2DImode);
+  emit_insn (gen_vec_concatv2di (res, cd_xor, const0_rtx));
+  /* Perform carry-less multiplication.  */
+  emit_insn (gen_pclmulqdq (res, res, q_v2di, gen_int_mode (0, DImode)));
+
+  res = expand_shift (RSHIFT_EXPR, V2DImode, res, crc_size, NULL_RTX, 0);
+
+  /* Keep the polynomial in V2DImode.  */
+  rtx polynomial = gen_reg_rtx (DImode);
+  convert_move (polynomial, operands[3], 0);
+  rtx p_v2di = gen_reg_rtx (V2DImode);
+  emit_insn (gen_vec_concatv2di (p_v2di, polynomial, const0_rtx));
+
+  /* Perform carry-less multiplication and get low part.  */
+  emit_insn (gen_pclmulqdq (res, res, p_v2di, gen_int_mode (0, DImode)));
+  rtx crc_part = gen_reg_rtx (DImode);
+  emit_insn (gen_vec_extractv2didi (crc_part, res, const0_rtx));
+
+  if (crc_size > data_size)
+    {
+      rtx shift = expand_shift (LSHIFT_EXPR, DImode, operands[1], data_size,
+				NULL_RTX, 1);
+      crc_part = expand_binop (DImode, xor_optab, crc_part, shift, NULL_RTX, 1,
+			       OPTAB_DIRECT);
+    }
+  /* Zero upper bits beyond crc_size.  */
+  res = expand_shift (RSHIFT_EXPR, DImode, crc_part, DImode_size - crc_size,
+		      NULL_RTX, 1);
+  res = expand_shift (LSHIFT_EXPR, DImode, crc_part, DImode_size - crc_size,
+		      NULL_RTX, 0);
+  emit_move_insn (operands[0], gen_lowpart (GET_MODE (operands[0]), crc_part));
+}
+
+/* Generate assembly to calculate reversed CRC using pclmulqdq instruction.
+   OPERANDS[1] is input CRC,
+   OPERANDS[2] is data (message),
+   OPERANDS[3] is the polynomial without the leading 1.  */
+
+void
+ix86_expand_reversed_crc_using_pclmul (rtx *operands)
+{
+  /* Check and keep arguments.  */
+  gcc_assert (!CONST_INT_P (operands[0]));
+  gcc_assert (CONST_INT_P (operands[3]));
+  rtx crc = operands[1];
+  rtx data = operands[2];
+  unsigned HOST_WIDE_INT crc_size = GET_MODE_BITSIZE (GET_MODE (operands[0]));
+  gcc_assert (crc_size <= 32);
+  unsigned HOST_WIDE_INT data_size = GET_MODE_BITSIZE (GET_MODE (data));
+
+  /* Calculate the quotient.  */
+  unsigned HOST_WIDE_INT
+      q = gf2n_poly_long_div_quotient (UINTVAL (operands[3]), crc_size);
+
+  /* Reflect the calculated quotient.  */
+  q = reflect_hwi (q, crc_size + 1);
+  rtx q_v2di = gen_reg_rtx (V2DImode);
+  rtx quotient = gen_reg_rtx (DImode);
+  convert_move (quotient, gen_int_mode (q, DImode), 0);
+  emit_insn (gen_vec_concatv2di (q_v2di, quotient, const0_rtx));
+
+  /* crc ^ data and keep in V2DImode.  */
+  rtx cd_xor = expand_binop (DImode, xor_optab, crc, data, NULL_RTX, 1,
+			     OPTAB_WIDEN);
+
+  /* Perform carry-less multiplication.  */
+  rtx res = gen_reg_rtx (V2DImode);
+  emit_insn (gen_vec_concatv2di (res, cd_xor, const0_rtx));
+  emit_insn (gen_pclmulqdq (res, res, q_v2di, gen_int_mode (0, DImode)));
+
+  res = expand_shift (LSHIFT_EXPR, V2DImode, res, 64 - data_size, NULL_RTX, 0);
+
+  /* Reflect the polynomial and keep in V2DImode.  */
+  unsigned HOST_WIDE_INT reflected_op3 = reflect_hwi (UINTVAL (operands[3]),
+						       crc_size);
+  rtx ref_polynomial = gen_reg_rtx (DImode);
+  convert_move (ref_polynomial, gen_int_mode (reflected_op3 << 1, DImode), 0);
+  rtx p_v2di = gen_reg_rtx (V2DImode);
+  emit_insn (gen_vec_concatv2di (p_v2di, ref_polynomial, const0_rtx));
+
+  /* Perform carry-less multiplication and get high part.  */
+  emit_insn (gen_pclmulqdq (res, res, p_v2di, gen_int_mode (0, DImode)));
+  rtx res_high = gen_reg_rtx (DImode);
+  emit_insn (gen_vec_extractv2didi (res_high, res, const1_rtx));
+
+  if (crc_size > data_size)
+    {
+      rtx shift = expand_shift (RSHIFT_EXPR, DImode, crc, data_size,
+				NULL_RTX, 1);
+      res_high = expand_binop (DImode, xor_optab, res_high, shift, NULL_RTX, 1,
+			       OPTAB_DIRECT);
+    }
+  emit_move_insn (operands[0], gen_lowpart (GET_MODE (operands[0]), res_high));
+}
 #endif /* CHECKING_P */
 
 static const scoped_attribute_specs *const ix86_attribute_table[] =
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 0fae3c1eb87..429f00422a1 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -191,6 +191,10 @@ 
   ;; For CRC32 support
   UNSPEC_CRC32
 
+  ;; For CRC support
+  UNSPEC_CRC
+  UNSPEC_CRC_REV
+
   ;; For LZCNT suppoprt
   UNSPEC_LZCNT
 
@@ -28605,6 +28609,61 @@ 
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "DI")])
 
+
+;; Same as SWI124.  Added for different combinations.
+(define_mode_iterator SWI124dup [QI HI SI])
+
+;; CRC 8, 16, 32 for TARGET_64
+(define_expand "crc<SWI124dup:mode><SWI124:mode>4"
+	;; return value (calculated CRC)
+  [(set (match_operand:SWI124 0 "register_operand" "=r")
+		      ;; initial CRC
+	(unspec:SWI124 [(match_operand:SWI124 1 "register_operand" "r")
+		      ;; data
+		      (match_operand:SWI124dup 2 "register_operand" "r")
+		      ;; polynomial without leading 1
+		      (match_operand:SWI124 3)]
+		      UNSPEC_CRC))]
+  /* The case when data's size is bigger than CRC's size is not supported.  */
+  "TARGET_PCLMUL && TARGET_64BIT && <SWI124:MODE>mode >= <SWI124dup:MODE>mode"
+{
+   ix86_expand_crc_using_pclmul (operands);
+   DONE;
+})
+
+;; Reversed CRC 8, 16, 32 for TARGET_64
+(define_expand "crc_rev<SWI124dup:mode><SWI124:mode>4"
+	;; return value (calculated CRC)
+  [(set (match_operand:SWI124 0 "register_operand" "=r")
+		      ;; initial CRC
+	(unspec:SWI124 [(match_operand:SWI124 1 "register_operand" "r")
+		      ;; data
+		      (match_operand:SWI124dup 2 "register_operand" "r")
+		      ;; polynomial without leading 1
+		      (match_operand:SWI124 3)]
+		      UNSPEC_CRC_REV))]
+  /* The case when data's size is bigger than CRC's size is not supported.  */
+  "((TARGET_PCLMUL && TARGET_64BIT) || TARGET_CRC32)
+    && <SWI124:MODE>mode >= <SWI124dup:MODE>mode"
+{ /* If it is iSCSI polynomial (0x1EDC6F41), generate crc32 instruction.  */
+  if (TARGET_CRC32 && INTVAL (operands[3]) == 517762881)
+    {
+      rtx crc_part = gen_reg_rtx (SImode);
+      rtx crc = operands[1];
+      rtx data = operands[2];
+      emit_insn (gen_sse4_2_crc32<SWI124dup:mode> (crc_part, crc, data));
+      emit_move_insn (operands[0],
+		      gen_lowpart (GET_MODE (operands[0]), crc_part));
+    }
+  else if (TARGET_PCLMUL && TARGET_64BIT)
+    ix86_expand_reversed_crc_using_pclmul (operands);
+  else
+    expand_reversed_crc_table_based (operands[0], operands[1], operands[2],
+				     operands[3], GET_MODE (operands[2]),
+				     generate_reflecting_code_standard);
+  DONE;
+})
+
 (define_insn "rdpmc"
   [(set (match_operand:DI 0 "register_operand" "=A")
   	(unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")]
diff --git a/gcc/testsuite/gcc.target/i386/crc-1-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-1-pclmul.c
new file mode 100644
index 00000000000..21edf417f0c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-1-pclmul.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc -fdisable-tree-phiopt2 -fdisable-tree-phiopt3" } */
+
+#include "../../gcc.dg/torture/crc-1.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/i386/crc-10-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-10-pclmul.c
new file mode 100644
index 00000000000..39bfd386d9c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-10-pclmul.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+
+#include "../../gcc.dg/torture/crc-10.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-12-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-12-pclmul.c
new file mode 100644
index 00000000000..1ac9a6bf56d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-12-pclmul.c
@@ -0,0 +1,9 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc -fdisable-tree-phiopt2 -fdisable-tree-phiopt3" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-12.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-13-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-13-pclmul.c
new file mode 100644
index 00000000000..c290539fad3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-13-pclmul.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+
+#include "../../gcc.dg/torture/crc-13.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-14-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-14-pclmul.c
new file mode 100644
index 00000000000..cc62ee471c7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-14-pclmul.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+
+#include "../../gcc.dg/torture/crc-14.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-17-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-17-pclmul.c
new file mode 100644
index 00000000000..660bff55e1b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-17-pclmul.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+
+#include "../../gcc.dg/torture/crc-17.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-18-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-18-pclmul.c
new file mode 100644
index 00000000000..e54392b32a9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-18-pclmul.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+
+#include "../../gcc.dg/torture/crc-18.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-21-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-21-pclmul.c
new file mode 100644
index 00000000000..4db1504cd49
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-21-pclmul.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+
+#include "../../gcc.dg/torture/crc-21.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-22-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-22-pclmul.c
new file mode 100644
index 00000000000..d0f27d96069
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-22-pclmul.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+
+#include "../../gcc.dg/torture/crc-22.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-23-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-23-pclmul.c
new file mode 100644
index 00000000000..666f3620597
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-23-pclmul.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+
+#include "../../gcc.dg/torture/crc-23.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-4-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-4-pclmul.c
new file mode 100644
index 00000000000..398ecaa4aba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-4-pclmul.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+
+#include "../../gcc.dg/torture/crc-4.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-5-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-5-pclmul.c
new file mode 100644
index 00000000000..b80368e81bc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-5-pclmul.c
@@ -0,0 +1,9 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -w -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-5.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/i386/crc-6-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-6-pclmul.c
new file mode 100644
index 00000000000..20b851e3596
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-6-pclmul.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+
+#include "../../gcc.dg/torture/crc-6.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/i386/crc-7-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-7-pclmul.c
new file mode 100644
index 00000000000..3e5cc75338e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-7-pclmul.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+
+#include "../../gcc.dg/torture/crc-7.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-8-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-8-pclmul.c
new file mode 100644
index 00000000000..4b1145553fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-8-pclmul.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+
+#include "../../gcc.dg/torture/crc-8.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-9-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-9-pclmul.c
new file mode 100644
index 00000000000..204d6c84d47
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-9-pclmul.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-options "-mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+
+#include "../../gcc.dg/torture/crc-9.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-CCIT-data16-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-CCIT-data16-pclmul.c
new file mode 100644
index 00000000000..ca728120858
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-CCIT-data16-pclmul.c
@@ -0,0 +1,9 @@ 
+/* { dg-do run } */
+/* { dg-options "-w -mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-CCIT-data16.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/i386/crc-CCIT-data8-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-CCIT-data8-pclmul.c
new file mode 100644
index 00000000000..816e0561d8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-CCIT-data8-pclmul.c
@@ -0,0 +1,9 @@ 
+/* { dg-do run } */
+/* { dg-options "-w -mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+
+#include "../../gcc.dg/torture/crc-CCIT-data8.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/i386/crc-coremark-16bitdata-pclmul.c b/gcc/testsuite/gcc.target/i386/crc-coremark-16bitdata-pclmul.c
new file mode 100644
index 00000000000..817d960b0aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-coremark-16bitdata-pclmul.c
@@ -0,0 +1,9 @@ 
+/* { dg-do run } */
+/* { dg-options "-w -mpclmul -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include "../../gcc.dg/torture/crc-coremark16-data16.c"
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "pclmulqdq" "dfinish"} } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/i386/crc-crc32-data16.c b/gcc/testsuite/gcc.target/i386/crc-crc32-data16.c
new file mode 100644
index 00000000000..49ab5f31ef0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-crc32-data16.c
@@ -0,0 +1,53 @@ 
+/* { dg-do run } */
+/* { dg-options "-mcrc32 -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include <stdint.h>
+#include <stdlib.h>
+
+__attribute__ ((noinline,optimize(0)))
+uint32_t _crc32_O0 (uint32_t crc, uint16_t data) {
+  int i;
+  crc = crc ^ data;
+
+  for (i = 0; i < 8; i++) {
+      if (crc & 1)
+	crc = (crc >> 1) ^ 0x82F63B78;
+      else
+	crc = (crc >> 1);
+    }
+
+  return crc;
+}
+
+uint32_t _crc32 (uint32_t crc, uint16_t data) {
+  int i;
+  crc = crc ^ data;
+
+  for (i = 0; i < 8; i++) {
+      if (crc & 1)
+	crc = (crc >> 1) ^ 0x82F63B78;
+      else
+	crc = (crc >> 1);
+    }
+
+  return crc;
+}
+
+int main ()
+{
+  uint32_t crc = 0x0D800D80;
+  for (uint16_t i = 0; i < 0xffff; i++)
+    {
+      uint32_t res1 = _crc32_O0 (crc, i);
+      uint32_t res2 = _crc32 (crc, i);
+      if (res1 != res2)
+	abort ();
+      crc = res1;
+    }
+}
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "UNSPEC_CRC32" "dfinish"} } */
+/* { dg-final { scan-rtl-dump-times "pclmulqdq" 0 "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-crc32-data32.c b/gcc/testsuite/gcc.target/i386/crc-crc32-data32.c
new file mode 100644
index 00000000000..08d6c193a77
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-crc32-data32.c
@@ -0,0 +1,53 @@ 
+/* { dg-do run } */
+/* { dg-options "-mcrc32 -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include <stdint.h>
+#include <stdlib.h>
+
+__attribute__ ((noinline,optimize(0)))
+uint32_t _crc32_O0 (uint32_t crc, uint32_t data) {
+  int i;
+  crc = crc ^ data;
+
+  for (i = 0; i < 32; i++) {
+      if (crc & 1)
+	crc = (crc >> 1) ^ 0x82F63B78;
+      else
+	crc = (crc >> 1);
+    }
+
+  return crc;
+}
+
+uint32_t _crc32 (uint32_t crc, uint32_t data) {
+  int i;
+  crc = crc ^ data;
+
+  for (i = 0; i < 32; i++) {
+      if (crc & 1)
+	crc = (crc >> 1) ^ 0x82F63B78;
+      else
+	crc = (crc >> 1);
+    }
+
+  return crc;
+}
+
+int main ()
+{
+  uint32_t crc = 0x0D800D80;
+  for (uint8_t i = 0; i < 0xff; i++)
+    {
+      uint32_t res1 = _crc32_O0 (crc, i);
+      uint32_t res2 = _crc32 (crc, i);
+      if (res1 != res2)
+	abort ();
+      crc = res1;
+    }
+}
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "UNSPEC_CRC32" "dfinish"} } */
+/* { dg-final { scan-rtl-dump-times "pclmulqdq" 0 "dfinish"} } */
diff --git a/gcc/testsuite/gcc.target/i386/crc-crc32-data8.c b/gcc/testsuite/gcc.target/i386/crc-crc32-data8.c
new file mode 100644
index 00000000000..7a76b27fd28
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/crc-crc32-data8.c
@@ -0,0 +1,53 @@ 
+/* { dg-do run } */
+/* { dg-options "-mcrc32 -O2 -fdump-rtl-dfinish -fdump-tree-crc" } */
+/* { dg-skip-if "" { *-*-* } { "-flto"} } */
+
+#include <stdint.h>
+#include <stdlib.h>
+
+__attribute__ ((noinline,optimize(0)))
+uint32_t _crc32_O0 (uint32_t crc, uint8_t data) {
+  int i;
+  crc = crc ^ data;
+
+  for (i = 0; i < 8; i++) {
+      if (crc & 1)
+	crc = (crc >> 1) ^ 0x82F63B78;
+      else
+	crc = (crc >> 1);
+    }
+
+  return crc;
+}
+
+uint32_t _crc32 (uint32_t crc, uint8_t data) {
+  int i;
+  crc = crc ^ data;
+
+  for (i = 0; i < 8; i++) {
+      if (crc & 1)
+	crc = (crc >> 1) ^ 0x82F63B78;
+      else
+	crc = (crc >> 1);
+    }
+
+  return crc;
+}
+
+int main ()
+{
+  uint32_t crc = 0x0D800D80;
+  for (uint8_t i = 0; i < 0xff; i++)
+    {
+      uint32_t res1 = _crc32_O0 (crc, i);
+      uint32_t res2 = _crc32 (crc, i);
+      if (res1 != res2)
+	abort ();
+      crc = res1;
+    }
+}
+
+/* { dg-final { scan-tree-dump "calculates CRC!" "crc"} } */
+/* { dg-final { scan-tree-dump-times "Couldn't generate faster CRC code." 0 "crc"} } */
+/* { dg-final { scan-rtl-dump "UNSPEC_CRC32" "dfinish"} } */
+/* { dg-final { scan-rtl-dump-times "pclmulqdq" 0 "dfinish"} } */
-- 
2.25.1