diff mbox series

[committed] RISC-V: NFC: Move scalar block move expansion code into riscv-string.cc

Message ID 26b66674-778a-49ad-bbf3-d25446b35814@ventanamicro.com
State New
Headers show
Series [committed] RISC-V: NFC: Move scalar block move expansion code into riscv-string.cc | expand

Commit Message

Jeff Law Oct. 16, 2023, 8:04 p.m. UTC
This just moves a few functions out of riscv.cc into riscv-string.cc in 
an attempt to keep riscv.cc manageable.  This was originally Christoph's 
code and I'm just pushing it on his behalf.

Full disclosure: I built rv64gc after changing to verify everything 
still builds.  Given it was just lifting code from one place to another, 
I didn't run the testsuite.

Jeff
commit 328745607c5d403a1c7b6bc2ecaa1574ee42122f
Author: Christoph Müllner <christoph.muellner@vrull.eu>
Date:   Mon Oct 16 13:57:43 2023 -0600

    RISC-V: NFC: Move scalar block move expansion code into riscv-string.cc
    
    This just moves a few functions out of riscv.cc into riscv-string.cc in an
    attempt to keep riscv.cc manageable.  This was originally Christoph's code and
    I'm just pushing it on his behalf.
    
    Full disclosure: I built rv64gc after changing to verify everything still
    builds.  Given it was just lifting code from one place to another, I didn't run
    the testsuite.
    
    gcc/
            * config/riscv/riscv-protos.h (emit_block_move): Remove redundant
            prototype.  Improve comment.
            * config/riscv/riscv.cc (riscv_block_move_straight): Move from riscv.cc
            into riscv-string.cc.
            (riscv_adjust_block_mem, riscv_block_move_loop): Likewise.
            (riscv_expand_block_move): Likewise.
            * config/riscv/riscv-string.cc (riscv_block_move_straight): Add moved
            function.
            (riscv_adjust_block_mem, riscv_block_move_loop): Likewise.
            (riscv_expand_block_move): Likewise.
diff mbox series

Patch

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 49bdcdf2f93..6190faab501 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -117,7 +117,6 @@  extern rtx riscv_emit_binary (enum rtx_code code, rtx dest, rtx x, rtx y);
 extern bool riscv_expand_conditional_move (rtx, rtx, rtx, rtx);
 extern rtx riscv_legitimize_call_address (rtx);
 extern void riscv_set_return_address (rtx, rtx);
-extern bool riscv_expand_block_move (rtx, rtx, rtx);
 extern rtx riscv_return_addr (int, rtx);
 extern poly_int64 riscv_initial_elimination_offset (int, int);
 extern void riscv_expand_prologue (void);
@@ -125,7 +124,6 @@  extern void riscv_expand_epilogue (int);
 extern bool riscv_epilogue_uses (unsigned int);
 extern bool riscv_can_use_return_insn (void);
 extern rtx riscv_function_value (const_tree, const_tree, enum machine_mode);
-extern bool riscv_expand_block_move (rtx, rtx, rtx);
 extern bool riscv_store_data_bypass_p (rtx_insn *, rtx_insn *);
 extern rtx riscv_gen_gpr_save_insn (struct riscv_frame_info *);
 extern bool riscv_gpr_save_operation_p (rtx);
@@ -160,6 +158,9 @@  extern bool riscv_hard_regno_rename_ok (unsigned, unsigned);
 rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt);
 rtl_opt_pass * make_pass_vsetvl (gcc::context *ctxt);
 
+/* Routines implemented in riscv-string.c.  */
+extern bool riscv_expand_block_move (rtx, rtx, rtx);
+
 /* Information about one CPU we know about.  */
 struct riscv_cpu_info {
   /* This CPU's canonical name.  */
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 2bdff0374e8..0b4606aa7b2 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -592,3 +592,158 @@  riscv_expand_strlen (rtx result, rtx src, rtx search_char, rtx align)
 
   return false;
 }
+
+/* Emit straight-line code to move LENGTH bytes from SRC to DEST.
+   Assume that the areas do not overlap.  */
+
+static void
+riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT length)
+{
+  unsigned HOST_WIDE_INT offset, delta;
+  unsigned HOST_WIDE_INT bits;
+  int i;
+  enum machine_mode mode;
+  rtx *regs;
+
+  bits = MAX (BITS_PER_UNIT,
+	      MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest))));
+
+  mode = mode_for_size (bits, MODE_INT, 0).require ();
+  delta = bits / BITS_PER_UNIT;
+
+  /* Allocate a buffer for the temporary registers.  */
+  regs = XALLOCAVEC (rtx, length / delta);
+
+  /* Load as many BITS-sized chunks as possible.  Use a normal load if
+     the source has enough alignment, otherwise use left/right pairs.  */
+  for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+    {
+      regs[i] = gen_reg_rtx (mode);
+      riscv_emit_move (regs[i], adjust_address (src, mode, offset));
+    }
+
+  /* Copy the chunks to the destination.  */
+  for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+    riscv_emit_move (adjust_address (dest, mode, offset), regs[i]);
+
+  /* Mop up any left-over bytes.  */
+  if (offset < length)
+    {
+      src = adjust_address (src, BLKmode, offset);
+      dest = adjust_address (dest, BLKmode, offset);
+      move_by_pieces (dest, src, length - offset,
+		      MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), RETURN_BEGIN);
+    }
+}
+
+/* Helper function for doing a loop-based block operation on memory
+   reference MEM.  Each iteration of the loop will operate on LENGTH
+   bytes of MEM.
+
+   Create a new base register for use within the loop and point it to
+   the start of MEM.  Create a new memory reference that uses this
+   register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
+
+static void
+riscv_adjust_block_mem (rtx mem, unsigned HOST_WIDE_INT length,
+			rtx *loop_reg, rtx *loop_mem)
+{
+  *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
+
+  /* Although the new mem does not refer to a known location,
+     it does keep up to LENGTH bytes of alignment.  */
+  *loop_mem = change_address (mem, BLKmode, *loop_reg);
+  set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
+}
+
+/* Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
+   bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
+   the memory regions do not overlap.  */
+
+static void
+riscv_block_move_loop (rtx dest, rtx src, unsigned HOST_WIDE_INT length,
+		       unsigned HOST_WIDE_INT bytes_per_iter)
+{
+  rtx label, src_reg, dest_reg, final_src, test;
+  unsigned HOST_WIDE_INT leftover;
+
+  leftover = length % bytes_per_iter;
+  length -= leftover;
+
+  /* Create registers and memory references for use within the loop.  */
+  riscv_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
+  riscv_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
+
+  /* Calculate the value that SRC_REG should have after the last iteration
+     of the loop.  */
+  final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
+				   0, 0, OPTAB_WIDEN);
+
+  /* Emit the start of the loop.  */
+  label = gen_label_rtx ();
+  emit_label (label);
+
+  /* Emit the loop body.  */
+  riscv_block_move_straight (dest, src, bytes_per_iter);
+
+  /* Move on to the next block.  */
+  riscv_emit_move (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
+  riscv_emit_move (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
+
+  /* Emit the loop condition.  */
+  test = gen_rtx_NE (VOIDmode, src_reg, final_src);
+  emit_jump_insn (gen_cbranch4 (Pmode, test, src_reg, final_src, label));
+
+  /* Mop up any left-over bytes.  */
+  if (leftover)
+    riscv_block_move_straight (dest, src, leftover);
+  else
+    emit_insn(gen_nop ());
+}
+
+/* Expand a cpymemsi instruction, which copies LENGTH bytes from
+   memory reference SRC to memory reference DEST.  */
+
+bool
+riscv_expand_block_move (rtx dest, rtx src, rtx length)
+{
+  if (CONST_INT_P (length))
+    {
+      unsigned HOST_WIDE_INT hwi_length = UINTVAL (length);
+      unsigned HOST_WIDE_INT factor, align;
+
+      align = MIN (MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), BITS_PER_WORD);
+      factor = BITS_PER_WORD / align;
+
+      if (optimize_function_for_size_p (cfun)
+	  && hwi_length * factor * UNITS_PER_WORD > MOVE_RATIO (false))
+	return false;
+
+      if (hwi_length <= (RISCV_MAX_MOVE_BYTES_STRAIGHT / factor))
+	{
+	  riscv_block_move_straight (dest, src, INTVAL (length));
+	  return true;
+	}
+      else if (optimize && align >= BITS_PER_WORD)
+	{
+	  unsigned min_iter_words
+	    = RISCV_MAX_MOVE_BYTES_PER_LOOP_ITER / UNITS_PER_WORD;
+	  unsigned iter_words = min_iter_words;
+	  unsigned HOST_WIDE_INT bytes = hwi_length;
+	  unsigned HOST_WIDE_INT words = bytes / UNITS_PER_WORD;
+
+	  /* Lengthen the loop body if it shortens the tail.  */
+	  for (unsigned i = min_iter_words; i < min_iter_words * 2 - 1; i++)
+	    {
+	      unsigned cur_cost = iter_words + words % iter_words;
+	      unsigned new_cost = i + words % i;
+	      if (new_cost <= cur_cost)
+		iter_words = i;
+	    }
+
+	  riscv_block_move_loop (dest, src, bytes, iter_words * UNITS_PER_WORD);
+	  return true;
+	}
+    }
+  return false;
+}
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 44746256f61..f2dcb0db6fb 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -5147,161 +5147,6 @@  riscv_legitimize_call_address (rtx addr)
   return addr;
 }
 
-/* Emit straight-line code to move LENGTH bytes from SRC to DEST.
-   Assume that the areas do not overlap.  */
-
-static void
-riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT length)
-{
-  unsigned HOST_WIDE_INT offset, delta;
-  unsigned HOST_WIDE_INT bits;
-  int i;
-  enum machine_mode mode;
-  rtx *regs;
-
-  bits = MAX (BITS_PER_UNIT,
-	      MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest))));
-
-  mode = mode_for_size (bits, MODE_INT, 0).require ();
-  delta = bits / BITS_PER_UNIT;
-
-  /* Allocate a buffer for the temporary registers.  */
-  regs = XALLOCAVEC (rtx, length / delta);
-
-  /* Load as many BITS-sized chunks as possible.  Use a normal load if
-     the source has enough alignment, otherwise use left/right pairs.  */
-  for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
-    {
-      regs[i] = gen_reg_rtx (mode);
-      riscv_emit_move (regs[i], adjust_address (src, mode, offset));
-    }
-
-  /* Copy the chunks to the destination.  */
-  for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
-    riscv_emit_move (adjust_address (dest, mode, offset), regs[i]);
-
-  /* Mop up any left-over bytes.  */
-  if (offset < length)
-    {
-      src = adjust_address (src, BLKmode, offset);
-      dest = adjust_address (dest, BLKmode, offset);
-      move_by_pieces (dest, src, length - offset,
-		      MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), RETURN_BEGIN);
-    }
-}
-
-/* Helper function for doing a loop-based block operation on memory
-   reference MEM.  Each iteration of the loop will operate on LENGTH
-   bytes of MEM.
-
-   Create a new base register for use within the loop and point it to
-   the start of MEM.  Create a new memory reference that uses this
-   register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
-
-static void
-riscv_adjust_block_mem (rtx mem, unsigned HOST_WIDE_INT length,
-			rtx *loop_reg, rtx *loop_mem)
-{
-  *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
-
-  /* Although the new mem does not refer to a known location,
-     it does keep up to LENGTH bytes of alignment.  */
-  *loop_mem = change_address (mem, BLKmode, *loop_reg);
-  set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
-}
-
-/* Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
-   bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
-   the memory regions do not overlap.  */
-
-static void
-riscv_block_move_loop (rtx dest, rtx src, unsigned HOST_WIDE_INT length,
-		       unsigned HOST_WIDE_INT bytes_per_iter)
-{
-  rtx label, src_reg, dest_reg, final_src, test;
-  unsigned HOST_WIDE_INT leftover;
-
-  leftover = length % bytes_per_iter;
-  length -= leftover;
-
-  /* Create registers and memory references for use within the loop.  */
-  riscv_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
-  riscv_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
-
-  /* Calculate the value that SRC_REG should have after the last iteration
-     of the loop.  */
-  final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
-				   0, 0, OPTAB_WIDEN);
-
-  /* Emit the start of the loop.  */
-  label = gen_label_rtx ();
-  emit_label (label);
-
-  /* Emit the loop body.  */
-  riscv_block_move_straight (dest, src, bytes_per_iter);
-
-  /* Move on to the next block.  */
-  riscv_emit_move (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
-  riscv_emit_move (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
-
-  /* Emit the loop condition.  */
-  test = gen_rtx_NE (VOIDmode, src_reg, final_src);
-  emit_jump_insn (gen_cbranch4 (Pmode, test, src_reg, final_src, label));
-
-  /* Mop up any left-over bytes.  */
-  if (leftover)
-    riscv_block_move_straight (dest, src, leftover);
-  else
-    emit_insn(gen_nop ());
-}
-
-/* Expand a cpymemsi instruction, which copies LENGTH bytes from
-   memory reference SRC to memory reference DEST.  */
-
-bool
-riscv_expand_block_move (rtx dest, rtx src, rtx length)
-{
-  if (CONST_INT_P (length))
-    {
-      unsigned HOST_WIDE_INT hwi_length = UINTVAL (length);
-      unsigned HOST_WIDE_INT factor, align;
-
-      align = MIN (MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), BITS_PER_WORD);
-      factor = BITS_PER_WORD / align;
-
-      if (optimize_function_for_size_p (cfun)
-	  && hwi_length * factor * UNITS_PER_WORD > MOVE_RATIO (false))
-	return false;
-
-      if (hwi_length <= (RISCV_MAX_MOVE_BYTES_STRAIGHT / factor))
-	{
-	  riscv_block_move_straight (dest, src, INTVAL (length));
-	  return true;
-	}
-      else if (optimize && align >= BITS_PER_WORD)
-	{
-	  unsigned min_iter_words
-	    = RISCV_MAX_MOVE_BYTES_PER_LOOP_ITER / UNITS_PER_WORD;
-	  unsigned iter_words = min_iter_words;
-	  unsigned HOST_WIDE_INT bytes = hwi_length;
-	  unsigned HOST_WIDE_INT words = bytes / UNITS_PER_WORD;
-
-	  /* Lengthen the loop body if it shortens the tail.  */
-	  for (unsigned i = min_iter_words; i < min_iter_words * 2 - 1; i++)
-	    {
-	      unsigned cur_cost = iter_words + words % iter_words;
-	      unsigned new_cost = i + words % i;
-	      if (new_cost <= cur_cost)
-		iter_words = i;
-	    }
-
-	  riscv_block_move_loop (dest, src, bytes, iter_words * UNITS_PER_WORD);
-	  return true;
-	}
-    }
-  return false;
-}
-
 /* Print symbolic operand OP, which is part of a HIGH or LO_SUM
    in context CONTEXT.  HI_RELOC indicates a high-part reloc.  */