@@ -79,6 +79,7 @@ along with GCC; see the file COPYING3. If not see
(OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
#define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
+#define OPTION_MASK_ISA_BMI2_SET OPTION_MASK_ISA_BMI2
Are you sure that -mbmi2 does not imply -mbmi?
@@ -13285,6 +13291,7 @@ put_condition_code (enum rtx_code code, enum
machine_mode mode, int reverse,
If CODE is 't', pretend the mode is V8SFmode.
If CODE is 'h', pretend the reg is the 'high' byte register.
If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
+ If CODE is 'N', print the half mode high register.
If CODE is 'd', duplicate the operand for AVX instruction.
*/
On Fri, Aug 19, 2011 at 4:51 PM, Kirill Yukhin <kirill.yukhin@gmail.com> wrote: >>> Updated patch is attached. Comments inline. If CODE is 'N', print the high register of a double word register pair. @@ -13294,6 +13301,15 @@ print_reg (rtx x, int code, FILE *file) const char *reg; bool duplicated = code == 'd' && TARGET_AVX; + if (code == 'N') + { + enum machine_mode mode = GET_MODE (x); + enum machine_mode half_mode = mode == TImode ? DImode : SImode; + x = simplify_gen_subreg (half_mode, x, mode, + GET_MODE_SIZE (half_mode)); + code = 0; + } + No need to check modes, we _KNOW_ that DWI expands to double word modes. Also, handling of 'N' should be put a couple of lines lower, like: code = 16; else if (code == 't') code = 32; + else if (code == 'N') + { + gcc_assert (mode == GET_MODE_WIDER_MODE (word_mode)); + x = gen_highpart (word_mode, x); + code = GET_MODE_SIZE (word_mode); + } else code = GET_MODE_SIZE (GET_MODE (x)); diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index e7ae397..05f7666 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md (define_c_enum "unspecv" [ @@ -751,14 +756,17 @@ ;; Base name for insn mnemonic. (define_code_attr logic [(and "and") (ior "or") (xor "xor")]) +;; Mapping of shift operators +(define_code_iterator any_shift [ashift lshiftrt ashiftrt]) + ;; Mapping of shift-right operators (define_code_iterator any_shiftrt [lshiftrt ashiftrt]) ;; Base name for define_insn -(define_code_attr shiftrt_insn [(lshiftrt "lshr") (ashiftrt "ashr")]) +(define_code_attr shift_insn [(ashift "ashl") (lshiftrt "lshr") (ashiftrt "ashr")]) ;; Base name for insn mnemonic. -(define_code_attr shiftrt [(lshiftrt "shr") (ashiftrt "sar")]) +(define_code_attr shift [(ashift "shl") (lshiftrt "shr") (ashiftrt "sar")]) These renames should be part of another follow-up patch. ;; Mapping of rotate operators (define_code_iterator any_rotate [rotate rotatert]) @@ -777,6 +785,8 @@ ;; Used in signed and unsigned widening multiplications. (define_code_iterator any_extend [sign_extend zero_extend]) +(define_code_attr any_extend [(sign_extend "SIGN_EXTEND") + (zero_extend "ZERO_EXTEND")]) No. Pattern should be splitted instead. ;; Various insn prefixes for signed and unsigned operations. (define_code_attr u [(sign_extend "") (zero_extend "u") @@ -6837,7 +6847,17 @@ (match_operand:DWIH 1 "nonimmediate_operand" "")) (any_extend:<DWI> (match_operand:DWIH 2 "register_operand" "")))) - (clobber (reg:CC FLAGS_REG))])]) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + if (TARGET_BMI2 && <any_extend> == ZERO_EXTEND) + { + emit_insn (gen_bmi2_umul<mode><dwi>3_1 (operands[0], + operands[1], + operands[2])); + DONE; + } +}) Please split the expander instead! +;; Update pattern if BMI2 is available +(define_split + [(set (match_operand:SWI48 0 "register_operand" "") + (any_shift:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "") + (subreg:QI + (match_operand:SI 2 "register_operand" "") 0))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && !reload_completed" + [(set (match_dup 0) + (any_shift:SWI48 (match_dup 1) (match_dup 2)))] +{ + if (can_create_pseudo_p () && <MODE>mode != SImode) + { + rtx tmp = gen_rtx_REG (<MODE>mode, 0); + emit_insn (gen_extendsidi2 (tmp, operands[2])); + operands[2] = tmp; + } +}) Why splitters? Generate the shifts directly from the expander, fixing the operands on-the-fly if necessary. Also, do not rename half of the shift expanders and insn patterns just to introduce *ONE* extra RTX... @@ -15745,8 +15763,23 @@ ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode, } Don't expand RORX through ix86_expand_binary_operator, generate it directly from expander. You are complicating things with splitters too much! I will rewrite this part of i386.md. @@ -12346,6 +12422,42 @@ "xor{b}\t{%h0, %b0|%b0, %h0}" [(set_attr "length" "2") (set_attr "mode" "HI")]) + +;; BMI2 instructions. +(define_insn "bmi2_bzhi_<mode>3" Please put these into "Bit manipulation instructions." section. Also, if possible, UNSPECs should not be used. It is much better to describe pattern with generic RTXes, this way you will enable many existing optimizations that (obviously) don't know how to handle UNSPECs. +++ b/gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1.c @@ -0,0 +1,35 @@ +/* { dg-do run { target { bmi2 } } } */ +/* { dg-options "-mbmi2 -O2" } */ + +#include <x86intrin.h> +#include "bmi2-check.h" + +__attribute__((noinline)) +unsigned +calc_bzhi_u32 (unsigned a, int l) +{ + unsigned res = a; + int i; + for (i=0; i<32-l; ++i) + res &= ~(1 << (31 - i)); + + return res; +} Please add spaces around operators (also in other testcases). +/* { dg-do run { target { bmi2 && { ia32 } } } } */ +/* { dg-options "-mbmi2 -Ofast" } */ Don't use -Ofast in the testsuite. Use explicit -O2, -ffast-math, etc. Options come and go from -Ofast. +#include "bmi2-check.h" + +__attribute__((noinline)) +unsigned long long +calc_mul_u32 (unsigned a, unsigned b) +{ + unsigned long long res = 0; + volatile unsigned dummy = 0; + int i; + for (i=0; i<b; ++i) + res += (unsigned long long)(dummy? 0 : a); Spaces. + res = (unsigned long long)a * b; And here. + for (i=0; i<b; ++i) { And here. Please follow GNU coding standards [1]. + /* Block loop opts */ + res += (unsigned __int128)(dummy? 0 : a); + } Why? There are other ways to suppress unwanted optimizations, using volatile and asm. See many examples through the testsuite. +++ b/gcc/testsuite/gcc.target/i386/bmi2-sarx32-1.c @@ -0,0 +1,36 @@ +/* { dg-do run { target { bmi2 } } } */ +/* { dg-options "-mbmi2 -O2 -dp" } */ + +#include "bmi2-check.h" + +__attribute__((noinline)) +int +calc_sarx_u32 (int a, int l) +{ + int res = a; + int i; + for (i=0; i<l; ++i) + res >>= 1; + + return res; +} + +static void +bmi2_test () +{ + unsigned i; + int src = 0xfce7ace0; + int res, res_ref; + + for (i=0; i<5; ++i) { + src = src * (i + 1); + + res_ref = calc_sarx_u32 (src, i + 1); + res = src >> (i + 1); + + printf ("%x %x\n", res_ref, res); If you _REALLY_ need debugging printfs in the test, then protect it with #ifdef DEBUG and put them just before abort (again, see many examples). But more or less, these printfs are just annoying. [1] http://www.gnu.org/prep/standards/standards.html Uros.