@@ -129,6 +129,7 @@ extern void ix86_split_ashr (rtx *, rtx, enum machine_mode);
extern void ix86_split_lshr (rtx *, rtx, enum machine_mode);
extern rtx ix86_find_base_term (rtx);
extern bool ix86_check_movabs (rtx, int);
+extern void ix86_split_idivmod (enum rtx_code, enum machine_mode, rtx[]);
extern rtx assign_386_stack_local (enum machine_mode, enum ix86_stack_slot);
extern int ix86_attr_length_immediate_default (rtx, int);
@@ -1985,6 +1985,7 @@ static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
static void ix86_add_new_builtins (int);
static rtx ix86_expand_vec_perm_builtin (tree);
static tree ix86_canonical_va_list_type (tree);
+static void predict_jump (int);
enum ix86_function_specific_strings
{
@@ -3703,6 +3704,9 @@ override_options (bool main_args_p)
#endif
}
+ if (flag_8bit_idiv < 0)
+ flag_8bit_idiv = 0;
+
/* Save the initial options in case the user does function specific options */
if (main_args_p)
target_option_default_node = target_option_current_node
@@ -14651,6 +14655,93 @@ ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
emit_move_insn (operands[0], dst);
}
+/* Split 32bit divmod with 8bit unsigned divmod if dividend and
+ divisor are within the the range [0-255]. */
+
+void
+ix86_split_idivmod (enum rtx_code code, enum machine_mode mode,
+ rtx operands[])
+{
+ rtx end_label, qimode_label;
+ rtx insn, div, mod;
+ rtx scratch, tmp0, tmp1, tmp2;
+ rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
+
+ switch (mode)
+ {
+ case SImode:
+ gen_divmod4_1 = code == DIV ? gen_divmodsi4_1 : gen_udivmodsi4_1;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ end_label = gen_label_rtx ();
+ qimode_label = gen_label_rtx ();
+
+ scratch = gen_reg_rtx (mode);
+
+ /* Use 8bit unsigned divimod if dividend and divisor are within the
+ the range [0-255]. */
+ emit_move_insn (scratch, operands[2]);
+ scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
+ scratch, 1, OPTAB_DIRECT);
+ emit_insn (gen_testsi_ccno_1 (scratch, GEN_INT (-0x100)));
+ tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
+ tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
+ tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
+ gen_rtx_LABEL_REF (VOIDmode, qimode_label),
+ pc_rtx);
+ insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
+ predict_jump (REG_BR_PROB_BASE * 50 / 100);
+ JUMP_LABEL (insn) = qimode_label;
+
+ /* Generate original signed/unsigned divimod. */
+ div = (*gen_divmod4_1) (operands[0], operands[1],
+ operands[2], operands[3]);
+ emit_insn (div);
+
+ /* Branch to the end. */
+ emit_jump_insn (gen_jump (end_label));
+ emit_barrier ();
+
+ /* Generate 8bit unsigned divide. */
+ emit_label (qimode_label);
+ tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
+ tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
+ tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
+ emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
+
+ if (code == DIV)
+ {
+ div = gen_rtx_DIV (SImode, operands[2], operands[3]);
+ mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
+ }
+ else
+ {
+ div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
+ mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
+ }
+
+ /* Zero extend quotient from AL. */
+ tmp1 = gen_lowpart (QImode, tmp0);
+ insn = emit_insn (gen_zero_extendqisi2 (operands[0], tmp1));
+ set_unique_reg_note (insn, REG_EQUAL, div);
+
+ /* Extract remainder from AH. */
+ tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
+ if (REG_P (operands[1]))
+ insn = emit_move_insn (operands[1], tmp1);
+ else
+ {
+ emit_move_insn (scratch, tmp1);
+ insn = emit_move_insn (operands[1], scratch);
+ }
+ set_unique_reg_note (insn, REG_EQUAL, mod);
+
+ emit_label (end_label);
+}
+
#define LEA_SEARCH_THRESHOLD 12
/* Search backward for non-agu definition of register number REGNO1
@@ -7306,13 +7306,76 @@
""
"")
-(define_insn_and_split "*divmod<mode>4"
+(define_insn "*divmod<mode>4"
[(set (match_operand:SWIM248 0 "register_operand" "=a")
(div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
- (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
+ (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
+ (set (match_operand:SWIM248 1 "register_operand" "=&d")
+ (mod:SWIM248 (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "#"
+ [(set_attr "type" "multi")
+ (set_attr "mode" "<MODE>")])
+
+;; Split with 8bit unsigned divide:
+;; if (dividend an divisor are in [0-255])
+;; use 8bit unsigned integer divide
+;; else
+;; use original integer divide
+(define_split
+ [(set (match_operand:SWIM248 0 "register_operand" "=a")
+ (div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
+ (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
+ (set (match_operand:SWIM248 1 "register_operand" "=&d")
+ (mod:SWIM248 (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
+ "<MODE>mode == SImode
+ && flag_8bit_idiv
+ && TARGET_QIMODE_MATH
+ && can_create_pseudo_p ()
+ && !optimize_insn_for_size_p ()"
+ [(const_int 0)]
+ "ix86_split_idivmod (DIV, <MODE>mode, operands); DONE;")
+
+(define_split
+ [(set (match_operand:SWIM248 0 "register_operand" "=a")
+ (div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
+ (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
(set (match_operand:SWIM248 1 "register_operand" "=&d")
(mod:SWIM248 (match_dup 2) (match_dup 3)))
(clobber (reg:CC FLAGS_REG))]
+ "reload_completed"
+ [(parallel [(set (match_dup 1)
+ (ashiftrt:SWIM248 (match_dup 4) (match_dup 5)))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_dup 0)
+ (div:SWIM248 (match_dup 2) (match_dup 3)))
+ (set (match_dup 1)
+ (mod:SWIM248 (match_dup 2) (match_dup 3)))
+ (use (match_dup 1))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
+
+ if (<MODE>mode != HImode
+ && (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD))
+ operands[4] = operands[2];
+ else
+ {
+ /* Avoid use of cltd in favor of a mov+shift. */
+ emit_move_insn (operands[1], operands[2]);
+ operands[4] = operands[1];
+ }
+})
+
+(define_insn_and_split "divmod<mode>4_1"
+ [(set (match_operand:SWIM248 1 "register_operand" "=&d")
+ (mod:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
+ (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
+ (set (match_operand:SWIM248 0 "register_operand" "=a")
+ (div:SWIM248 (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
""
"#"
"reload_completed"
@@ -7365,13 +7428,62 @@
""
"")
-(define_insn_and_split "*udivmod<mode>4"
+(define_insn "*udivmod<mode>4"
+ [(set (match_operand:SWIM248 0 "register_operand" "=a")
+ (udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
+ (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
+ (set (match_operand:SWIM248 1 "register_operand" "=&d")
+ (umod:SWIM248 (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "#"
+ [(set_attr "type" "multi")
+ (set_attr "mode" "<MODE>")])
+
+;; Split with 8bit unsigned divide:
+;; if (dividend an divisor are in [0-255])
+;; use 8bit unsigned integer divide
+;; else
+;; use original integer divide
+(define_split
[(set (match_operand:SWIM248 0 "register_operand" "=a")
(udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
(match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
(set (match_operand:SWIM248 1 "register_operand" "=&d")
(umod:SWIM248 (match_dup 2) (match_dup 3)))
(clobber (reg:CC FLAGS_REG))]
+ "<MODE>mode == SImode
+ && flag_8bit_idiv
+ && TARGET_QIMODE_MATH
+ && can_create_pseudo_p ()
+ && !optimize_insn_for_size_p ()"
+ [(const_int 0)]
+ "ix86_split_idivmod (UDIV, <MODE>mode, operands); DONE;")
+
+(define_split
+ [(set (match_operand:SWIM248 0 "register_operand" "=a")
+ (udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
+ (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
+ (set (match_operand:SWIM248 1 "register_operand" "=&d")
+ (umod:SWIM248 (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed"
+ [(set (match_dup 1) (const_int 0))
+ (parallel [(set (match_dup 0)
+ (udiv:SWIM248 (match_dup 2) (match_dup 3)))
+ (set (match_dup 1)
+ (umod:SWIM248 (match_dup 2) (match_dup 3)))
+ (use (match_dup 1))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+(define_insn_and_split "udivmod<mode>4_1"
+ [(set (match_operand:SWIM248 1 "register_operand" "=&d")
+ (umod:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
+ (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
+ (set (match_operand:SWIM248 0 "register_operand" "=a")
+ (udiv:SWIM248 (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
""
"#"
"reload_completed"
@@ -388,3 +388,7 @@ Support F16C built-in functions and code generation
mfentry
Target Report Var(flag_fentry) Init(-1)
Emit profiling counter call at function entry before prologue.
+
+m8bit-idiv
+Target Report Var(flag_8bit_idiv) Init(-1) Save
+Expand 32bit integer divide into control flow with 8bit unsigned integer divide
@@ -602,7 +602,7 @@ Objective-C and Objective-C++ Dialects}.
-momit-leaf-frame-pointer -mno-red-zone -mno-tls-direct-seg-refs @gol
-mcmodel=@var{code-model} -mabi=@var{name} @gol
-m32 -m64 -mlarge-data-threshold=@var{num} @gol
--msse2avx -mfentry}
+-msse2avx -mfentry -m8bit-idiv}
@emph{IA-64 Options}
@gccoptlist{-mbig-endian -mlittle-endian -mgnu-as -mgnu-ld -mno-pic @gol
@@ -12647,6 +12647,13 @@ If profiling is active @option{-pg} put the profiling
counter call before prologue.
Note: On x86 architectures the attribute @code{ms_hook_prologue}
isn't possible at the moment for @option{-mfentry} and @option{-pg}.
+
+@item -m8bit-idiv
+@itemx -mno-8bit-idiv
+@opindex 8bit-idiv
+This option will enable GCC to expand 32bit integer divide into control
+flow with 8bit unsigned integer divide.
+
@end table
These @samp{-m} switches are supported in addition to the above
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+extern void abort (void);
+
+void
+__attribute__((noinline))
+test (int x, int y, int q, int r)
+{
+ if ((x / y) != q || (x % y) != r)
+ abort ();
+}
+
+int
+main ()
+{
+ test (7, 6, 1, 1);
+ test (-7, -6, 1, -1);
+ test (-7, 6, -1, -1);
+ test (7, -6, -1, 1);
+ test (255, 254, 1, 1);
+ test (256, 254, 1, 2);
+ test (256, 256, 1, 0);
+ test (254, 256, 0, 254);
+ test (254, 255, 0, 254);
+ test (254, 1, 254, 0);
+ test (255, 2, 127, 1);
+ test (1, 256, 0, 1);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+int
+foo (int x, int y)
+{
+ return x / y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "idivl" 1 } } */
new file mode 100644
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+int
+foo (int x, int y)
+{
+ return x % y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "idivl" 1 } } */
new file mode 100644
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+extern void abort (void);
+
+void
+test (int x, int y, int q, int r)
+{
+ if ((x / y) != q || (x % y) != r)
+ abort ();
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "idivl" 1 } } */
new file mode 100644
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -m8bit-idiv" } */
+
+extern void abort (void);
+
+void
+test (int x, int y, int q, int r)
+{
+ if ((x / y) != q || (x % y) != r)
+ abort ();
+}
+
+/* { dg-final { scan-assembler-not "divb" } } */
+/* { dg-final { scan-assembler-times "idivl" 1 } } */
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+extern void abort (void);
+
+void
+__attribute__((noinline))
+test (unsigned int x, unsigned int y, unsigned int q, unsigned int r)
+{
+ if ((x / y) != q || (x % y) != r)
+ abort ();
+}
+
+int
+main ()
+{
+ test (7, 6, 1, 1);
+ test (255, 254, 1, 1);
+ test (256, 254, 1, 2);
+ test (256, 256, 1, 0);
+ test (254, 256, 0, 254);
+ test (254, 255, 0, 254);
+ test (254, 1, 254, 0);
+ test (255, 2, 127, 1);
+ test (1, 256, 0, 1);
+ test (0x80000000, 0x7fffffff, 1, 1);
+ test (0x7fffffff, 0x80000000, 0, 0x7fffffff);
+ test (0x80000000, 0x80000003, 0, 0x80000000);
+ test (0xfffffffd, 0xfffffffe, 0, 0xfffffffd);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+unsigned int
+foo (unsigned int x, unsigned int y)
+{
+ return x / y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "divl" 1 } } */
new file mode 100644
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+unsigned int
+foo (unsigned int x, unsigned int y)
+{
+ return x % y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "divl" 1 } } */
new file mode 100644
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+extern void abort (void);
+
+void
+test (unsigned int x, unsigned int y, unsigned int q, unsigned int r)
+{
+ if ((x / y) != q || (x % y) != r)
+ abort ();
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "divl" 1 } } */
new file mode 100644
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -m8bit-idiv" } */
+
+extern void abort (void);
+
+void
+test (unsigned int x, unsigned int y, unsigned int q, unsigned int r)
+{
+ if ((x / y) != q || (x % y) != r)
+ abort ();
+}
+
+/* { dg-final { scan-assembler-not "divb" } } */
+/* { dg-final { scan-assembler-times "divl" 1 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+extern void foo (int, int, int, int, int, int);
+
+void
+bar (int x, int y)
+{
+ foo (0, 0, 0, 0, x / y, x % y);
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "divl" 1 } } */
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mtune=atom" } */
+/* { dg-options "-O2 -m8bit-idiv" } */
unsigned char
foo (unsigned char x, unsigned char y)
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mtune=atom" } */
+/* { dg-options "-O2 -m8bit-idiv" } */
extern unsigned char z;
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mtune=atom" } */
+/* { dg-options "-O2 -m8bit-idiv" } */
extern void abort (void);
extern void exit (int);