@@ -129,6 +129,7 @@ extern void ix86_split_ashr (rtx *, rtx, enum machine_mode);
extern void ix86_split_lshr (rtx *, rtx, enum machine_mode);
extern rtx ix86_find_base_term (rtx);
extern bool ix86_check_movabs (rtx, int);
+extern void ix86_split_idivmod (enum machine_mode, rtx[], bool);
extern rtx assign_386_stack_local (enum machine_mode, enum ix86_stack_slot);
extern int ix86_attr_length_immediate_default (rtx, int);
@@ -1985,6 +1985,7 @@ static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
static void ix86_add_new_builtins (int);
static rtx ix86_expand_vec_perm_builtin (tree);
static tree ix86_canonical_va_list_type (tree);
+static void predict_jump (int);
enum ix86_function_specific_strings
{
@@ -2629,6 +2630,7 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune,
{ "-msseregparm", MASK_SSEREGPARM },
{ "-mstack-arg-probe", MASK_STACK_PROBE },
{ "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
+ { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
};
const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
@@ -14651,6 +14653,107 @@ ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
emit_move_insn (operands[0], dst);
}
+/* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
+ divisor are within the the range [0-255]. */
+
+void
+ix86_split_idivmod (enum machine_mode mode, rtx operands[],
+ bool signed_p)
+{
+ rtx end_label, qimode_label;
+ rtx insn, div, mod;
+ rtx scratch, tmp0, tmp1, tmp2;
+ rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
+ rtx (*gen_zero_extend) (rtx, rtx);
+ rtx (*gen_test_ccno_1) (rtx, rtx);
+
+ switch (mode)
+ {
+ case SImode:
+ gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
+ gen_test_ccno_1 = gen_testsi_ccno_1;
+ gen_zero_extend = gen_zero_extendqisi2;
+ break;
+ case DImode:
+ gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
+ gen_test_ccno_1 = gen_testdi_ccno_1;
+ gen_zero_extend = gen_zero_extendqidi2;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ end_label = gen_label_rtx ();
+ qimode_label = gen_label_rtx ();
+
+ scratch = gen_reg_rtx (mode);
+
+ /* Use 8bit unsigned divimod if dividend and divisor are within the
+ the range [0-255]. */
+ emit_move_insn (scratch, operands[2]);
+ scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
+ scratch, 1, OPTAB_DIRECT);
+ emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
+ tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
+ tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
+ tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
+ gen_rtx_LABEL_REF (VOIDmode, qimode_label),
+ pc_rtx);
+ insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
+ predict_jump (REG_BR_PROB_BASE * 50 / 100);
+ JUMP_LABEL (insn) = qimode_label;
+
+ /* Generate original signed/unsigned divimod. */
+ div = gen_divmod4_1 (operands[0], operands[1],
+ operands[2], operands[3]);
+ emit_insn (div);
+
+ /* Branch to the end. */
+ emit_jump_insn (gen_jump (end_label));
+ emit_barrier ();
+
+ /* Generate 8bit unsigned divide. */
+ emit_label (qimode_label);
+ /* Don't use operands[0] for result of 8bit divide since not all
+ registers support QImode ZERO_EXTRACT. */
+ tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
+ tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
+ tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
+ emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
+
+ if (signed_p)
+ {
+ div = gen_rtx_DIV (SImode, operands[2], operands[3]);
+ mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
+ }
+ else
+ {
+ div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
+ mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
+ }
+
+ /* Extract remainder from AH. */
+ tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
+ if (REG_P (operands[1]))
+ insn = emit_move_insn (operands[1], tmp1);
+ else
+ {
+ /* Need a new scratch register since the old one has result
+ of 8bit divide. */
+ scratch = gen_reg_rtx (mode);
+ emit_move_insn (scratch, tmp1);
+ insn = emit_move_insn (operands[1], scratch);
+ }
+ set_unique_reg_note (insn, REG_EQUAL, mod);
+
+ /* Zero extend quotient from AL. */
+ tmp1 = gen_lowpart (QImode, tmp0);
+ insn = emit_insn (gen_zero_extend (operands[0], tmp1));
+ set_unique_reg_note (insn, REG_EQUAL, div);
+
+ emit_label (end_label);
+}
+
#define LEA_SEARCH_THRESHOLD 12
/* Search backward for non-agu definition of register number REGNO1
@@ -7341,6 +7341,59 @@
[(set_attr "type" "multi")
(set_attr "mode" "<MODE>")])
+;; Split with 8bit unsigned divide:
+;; if (dividend an divisor are in [0-255])
+;; use 8bit unsigned integer divide
+;; else
+;; use original integer divide
+(define_split
+ [(set (match_operand:SWI48 0 "register_operand" "")
+ (div:SWI48 (match_operand:SWI48 2 "register_operand" "")
+ (match_operand:SWI48 3 "nonimmediate_operand" "")))
+ (set (match_operand:SWI48 1 "register_operand" "")
+ (mod:SWI48 (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_USE_8BIT_IDIV
+ && TARGET_QIMODE_MATH
+ && can_create_pseudo_p ()
+ && !optimize_insn_for_size_p ()"
+ [(const_int 0)]
+ "ix86_split_idivmod (<MODE>mode, operands, true); DONE;")
+
+(define_insn_and_split "divmod<mode>4_1"
+ [(set (match_operand:SWI48 1 "register_operand" "=&d")
+ (mod:SWI48 (match_operand:SWI48 2 "register_operand" "0")
+ (match_operand:SWI48 3 "nonimmediate_operand" "rm")))
+ (set (match_operand:SWI48 0 "register_operand" "=a")
+ (div:SWI48 (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "#"
+ "reload_completed"
+ [(parallel [(set (match_dup 1)
+ (ashiftrt:SWI48 (match_dup 4) (match_dup 5)))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_dup 0)
+ (div:SWI48 (match_dup 2) (match_dup 3)))
+ (set (match_dup 1)
+ (mod:SWI48 (match_dup 2) (match_dup 3)))
+ (use (match_dup 1))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
+
+ if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
+ operands[4] = operands[2];
+ else
+ {
+ /* Avoid use of cltd in favor of a mov+shift. */
+ emit_move_insn (operands[1], operands[2]);
+ operands[4] = operands[1];
+ }
+}
+ [(set_attr "type" "multi")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "*divmod<mode>4_noext"
[(set (match_operand:SWIM248 0 "register_operand" "=a")
(div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
@@ -7386,6 +7439,46 @@
[(set_attr "type" "multi")
(set_attr "mode" "<MODE>")])
+;; Split with 8bit unsigned divide:
+;; if (dividend an divisor are in [0-255])
+;; use 8bit unsigned integer divide
+;; else
+;; use original integer divide
+(define_split
+ [(set (match_operand:SWI48 0 "register_operand" "")
+ (udiv:SWI48 (match_operand:SWI48 2 "register_operand" "")
+ (match_operand:SWI48 3 "nonimmediate_operand" "")))
+ (set (match_operand:SWI48 1 "register_operand" "")
+ (umod:SWI48 (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_USE_8BIT_IDIV
+ && TARGET_QIMODE_MATH
+ && can_create_pseudo_p ()
+ && !optimize_insn_for_size_p ()"
+ [(const_int 0)]
+ "ix86_split_idivmod (<MODE>mode, operands, false); DONE;")
+
+(define_insn_and_split "udivmod<mode>4_1"
+ [(set (match_operand:SWI48 1 "register_operand" "=&d")
+ (umod:SWI48 (match_operand:SWI48 2 "register_operand" "0")
+ (match_operand:SWI48 3 "nonimmediate_operand" "rm")))
+ (set (match_operand:SWI48 0 "register_operand" "=a")
+ (udiv:SWI48 (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "#"
+ "reload_completed"
+ [(set (match_dup 1) (const_int 0))
+ (parallel [(set (match_dup 0)
+ (udiv:SWI48 (match_dup 2) (match_dup 3)))
+ (set (match_dup 1)
+ (umod:SWI48 (match_dup 2) (match_dup 3)))
+ (use (match_dup 1))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ [(set_attr "type" "multi")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "*udivmod<mode>4_noext"
[(set (match_operand:SWIM248 0 "register_operand" "=a")
(udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
@@ -7440,6 +7533,15 @@
""
"")
+(define_expand "testdi_ccno_1"
+ [(set (reg:CCNO FLAGS_REG)
+ (compare:CCNO
+ (and:DI (match_operand:DI 0 "nonimmediate_operand" "")
+ (match_operand:DI 1 "x86_64_szext_general_operand" ""))
+ (const_int 0)))]
+ "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "")
+
(define_insn "*testdi_1"
[(set (reg FLAGS_REG)
(compare
@@ -388,3 +388,7 @@ Support F16C built-in functions and code generation
mfentry
Target Report Var(flag_fentry) Init(-1)
Emit profiling counter call at function entry before prologue.
+
+m8bit-idiv
+Target Report Mask(USE_8BIT_IDIV) Save
+Expand 32bit/64bit integer divide into 8bit unsigned integer divide with run-time check
@@ -602,7 +602,7 @@ Objective-C and Objective-C++ Dialects}.
-momit-leaf-frame-pointer -mno-red-zone -mno-tls-direct-seg-refs @gol
-mcmodel=@var{code-model} -mabi=@var{name} @gol
-m32 -m64 -mlarge-data-threshold=@var{num} @gol
--msse2avx -mfentry}
+-msse2avx -mfentry -m8bit-idiv}
@emph{IA-64 Options}
@gccoptlist{-mbig-endian -mlittle-endian -mgnu-as -mgnu-ld -mno-pic @gol
@@ -12647,6 +12647,16 @@ If profiling is active @option{-pg} put the profiling
counter call before prologue.
Note: On x86 architectures the attribute @code{ms_hook_prologue}
isn't possible at the moment for @option{-mfentry} and @option{-pg}.
+
+@item -m8bit-idiv
+@itemx -mno-8bit-idiv
+@opindex 8bit-idiv
+On some processors, like Intel Atom, 8bit unsigned integer divide is
+much faster than 32bit/64bit integer divide. This option will generate a
+runt-time check. If both dividend and divisor are within range of 0
+to 255, 8bit unsigned integer divide will be used instead of
+32bit/64bit integer divide.
+
@end table
These @samp{-m} switches are supported in addition to the above
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+extern void abort (void);
+
+void
+__attribute__((noinline))
+test (int x, int y, int q, int r)
+{
+ if ((x / y) != q || (x % y) != r)
+ abort ();
+}
+
+int
+main ()
+{
+ test (7, 6, 1, 1);
+ test (-7, -6, 1, -1);
+ test (-7, 6, -1, -1);
+ test (7, -6, -1, 1);
+ test (255, 254, 1, 1);
+ test (256, 254, 1, 2);
+ test (256, 256, 1, 0);
+ test (254, 256, 0, 254);
+ test (254, 255, 0, 254);
+ test (254, 1, 254, 0);
+ test (255, 2, 127, 1);
+ test (1, 256, 0, 1);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+int
+foo (int x, int y)
+{
+ return x / y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "idivl" 1 } } */
new file mode 100644
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+int
+foo (int x, int y)
+{
+ return x % y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "idivl" 1 } } */
new file mode 100644
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+extern void abort (void);
+
+void
+test (int x, int y, int q, int r)
+{
+ if ((x / y) != q || (x % y) != r)
+ abort ();
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "idivl" 1 } } */
new file mode 100644
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -m8bit-idiv" } */
+
+extern void abort (void);
+
+void
+test (int x, int y, int q, int r)
+{
+ if ((x / y) != q || (x % y) != r)
+ abort ();
+}
+
+/* { dg-final { scan-assembler-not "divb" } } */
+/* { dg-final { scan-assembler-times "idivl" 1 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+extern void foo (int, int, int, int, int, int);
+
+void
+bar (int x, int y)
+{
+ foo (0, 0, 0, 0, x / y, x % y);
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "idivl" 1 } } */
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+extern void abort (void);
+
+void
+__attribute__((noinline))
+test (long long x, long long y, long long q, long long r)
+{
+ if ((x / y) != q || (x % y) != r)
+ abort ();
+}
+
+int
+main ()
+{
+ test (7, 6, 1, 1);
+ test (-7, -6, 1, -1);
+ test (-7, 6, -1, -1);
+ test (7, -6, -1, 1);
+ test (255, 254, 1, 1);
+ test (256, 254, 1, 2);
+ test (256, 256, 1, 0);
+ test (254, 256, 0, 254);
+ test (254, 255, 0, 254);
+ test (254, 1, 254, 0);
+ test (255, 2, 127, 1);
+ test (1, 256, 0, 1);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+/* { dg-require-effective-target lp64 } */
+
+extern void abort (void);
+
+void
+test (long long x, long long y, long long q, long long r)
+{
+ if ((x / y) != q || (x % y) != r)
+ abort ();
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "idivq" 1 } } */
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+extern void foo (long long, long long, long long, long long,
+ long long, long long);
+
+void
+bar (long long x, long long y)
+{
+ foo (0, 0, 0, 0, x / y, x % y);
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "idivq" 1 } } */
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+extern void abort (void);
+
+void
+__attribute__((noinline))
+test (unsigned int x, unsigned int y, unsigned int q, unsigned int r)
+{
+ if ((x / y) != q || (x % y) != r)
+ abort ();
+}
+
+int
+main ()
+{
+ test (7, 6, 1, 1);
+ test (255, 254, 1, 1);
+ test (256, 254, 1, 2);
+ test (256, 256, 1, 0);
+ test (254, 256, 0, 254);
+ test (254, 255, 0, 254);
+ test (254, 1, 254, 0);
+ test (255, 2, 127, 1);
+ test (1, 256, 0, 1);
+ test (0x80000000, 0x7fffffff, 1, 1);
+ test (0x7fffffff, 0x80000000, 0, 0x7fffffff);
+ test (0x80000000, 0x80000003, 0, 0x80000000);
+ test (0xfffffffd, 0xfffffffe, 0, 0xfffffffd);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+unsigned int
+foo (unsigned int x, unsigned int y)
+{
+ return x / y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "divl" 1 } } */
new file mode 100644
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+unsigned int
+foo (unsigned int x, unsigned int y)
+{
+ return x % y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "divl" 1 } } */
new file mode 100644
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+extern void abort (void);
+
+void
+test (unsigned int x, unsigned int y, unsigned int q, unsigned int r)
+{
+ if ((x / y) != q || (x % y) != r)
+ abort ();
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "divl" 1 } } */
new file mode 100644
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -m8bit-idiv" } */
+
+extern void abort (void);
+
+void
+test (unsigned int x, unsigned int y, unsigned int q, unsigned int r)
+{
+ if ((x / y) != q || (x % y) != r)
+ abort ();
+}
+
+/* { dg-final { scan-assembler-not "divb" } } */
+/* { dg-final { scan-assembler-times "divl" 1 } } */
new file mode 100644
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+extern void foo (unsigned int, unsigned int, unsigned int,
+ unsigned int, unsigned int, unsigned int);
+
+void
+bar (unsigned int x, unsigned int y)
+{
+ foo (0, 0, 0, 0, x / y, x % y);
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "divl" 1 } } */
new file mode 100644
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+extern void abort (void);
+
+void
+__attribute__((noinline))
+test (unsigned long long x, unsigned long long y,
+ unsigned long long q, unsigned long long r)
+{
+ if ((x / y) != q || (x % y) != r)
+ abort ();
+}
+
+int
+main ()
+{
+ test (7, 6, 1, 1);
+ test (255, 254, 1, 1);
+ test (256, 254, 1, 2);
+ test (256, 256, 1, 0);
+ test (254, 256, 0, 254);
+ test (254, 255, 0, 254);
+ test (254, 1, 254, 0);
+ test (255, 2, 127, 1);
+ test (1, 256, 0, 1);
+ test (0x80000000, 0x7fffffff, 1, 1);
+ test (0x7fffffff, 0x80000000, 0, 0x7fffffff);
+ test (0x80000000, 0x80000003, 0, 0x80000000);
+ test (0xfffffffd, 0xfffffffe, 0, 0xfffffffd);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+/* { dg-require-effective-target lp64 } */
+
+extern void abort (void);
+
+void
+test (unsigned long long x, unsigned long long y,
+ unsigned long long q, unsigned long long r)
+{
+ if ((x / y) != q || (x % y) != r)
+ abort ();
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "divq" 1 } } */
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+extern void foo (unsigned long long, unsigned long long,
+ unsigned long long, unsigned long long,
+ unsigned long long, unsigned long long);
+
+void
+bar (unsigned long long x, unsigned long long y)
+{
+ foo (0, 0, 0, 0, x / y, x % y);
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "divq" 1 } } */