Add AVG and UAVG rtx codes

Message ID	4DFA2A71.7030402@codesourcery.com
State	New
Headers	show Return-Path: <gcc-patches-return-294585-incoming=patchwork.ozlabs.org@gcc.gnu.org> Message-ID: <4DFA2A71.7030402@codesourcery.com> Date: Thu, 16 Jun 2011 18:08:17 +0200 From: Bernd Schmidt <bernds@codesourcery.com> User-Agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.17) Gecko/20110516 Lightning/1.0b3pre Thunderbird/3.1.10 MIME-Version: 1.0 To: GCC Patches <gcc-patches@gcc.gnu.org> Subject: Add AVG and UAVG rtx codes Content-Type: multipart/mixed; boundary="------------080600040606040101090608" Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org

Index: gcc/doc/rtl.texi =================================================================== --- gcc/doc/rtl.texi (revision 174339) +++ gcc/doc/rtl.texi (working copy) @@ -2385,6 +2385,18 @@ Represents the absolute value of @var{x} @code{ss_abs} ensures that an out-of-bounds result saturates to the maximum signed value. +@findex avg +@cindex average +@findex uavg +@cindex unsigned average +@item (avg:@var{m} @var{x} @var{c}) +@itemx (uavg:@var{m} @var{x} @var{c}) +Represents an averaging operation. Two integer values are added, the +constant 1 is added to the result, and the whole is shifted right by one +to produce the result. The result has the same mode as the inputs, but +the operation uses intermediate values which are one bit wider. The +inputs are sign-extended to that wider precision for @code{avg}, +zero-extended for @code{uavg}. @findex sqrt @cindex square root Index: gcc/rtl.def =================================================================== --- gcc/rtl.def (revision 174339) +++ gcc/rtl.def (working copy) @@ -674,6 +674,15 @@ DEF_RTL_EXPR(VEC_CONCAT, "vec_concat", " an integer multiple of the number of input parts. */ DEF_RTL_EXPR(VEC_DUPLICATE, "vec_duplicate", "e", RTX_UNARY) +/* Describes an operation that averages two integer values by adding + them together, adding 1, and shifting the result right by 1. The + result is as large as the inputs, but the operation uses + intermediate values with a precision one bit wider. For AVG, the + input values are sign-extended to that wider precision, for UAVG, + they are zero-extended. */ +DEF_RTL_EXPR(AVG, "avg", "ee", RTX_COMM_ARITH) +DEF_RTL_EXPR(UAVG, "uavg", "ee", RTX_COMM_ARITH) + /* Addition with signed saturation */ DEF_RTL_EXPR(SS_PLUS, "ss_plus", "ee", RTX_COMM_ARITH) Index: gcc/simplify-rtx.c =================================================================== --- gcc/simplify-rtx.c (revision 174339) +++ gcc/simplify-rtx.c (working copy) @@ -2962,6 +2962,8 @@ simplify_binary_operation_1 (enum rtx_co case US_MULT: case SS_DIV: case US_DIV: + case AVG: + case UAVG: /* ??? There are simplifications that can be done. */ return 0; @@ -3671,6 +3673,8 @@ simplify_const_binary_operation (enum rt case US_DIV: case SS_ASHIFT: case US_ASHIFT: + case AVG: + case UAVG: /* ??? There are simplifications that can be done. */ return 0; Index: gcc/config/i386/mmx.md =================================================================== --- gcc/config/i386/mmx.md (revision 174339) +++ gcc/config/i386/mmx.md (working copy) @@ -1460,37 +1460,15 @@ (define_expand "vec_initv8qi" (define_expand "mmx_uavgv8qi3" [(set (match_operand:V8QI 0 "register_operand" "") - (truncate:V8QI - (lshiftrt:V8HI - (plus:V8HI - (plus:V8HI - (zero_extend:V8HI - (match_operand:V8QI 1 "nonimmediate_operand" "")) - (zero_extend:V8HI - (match_operand:V8QI 2 "nonimmediate_operand" ""))) - (const_vector:V8HI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) - (const_int 1))))] + (uavg:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "") + (match_operand:V8QI 2 "nonimmediate_operand" "")))] "TARGET_SSE || TARGET_3DNOW" "ix86_fixup_binary_operands_no_copy (PLUS, V8QImode, operands);") (define_insn "*mmx_uavgv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") - (truncate:V8QI - (lshiftrt:V8HI - (plus:V8HI - (plus:V8HI - (zero_extend:V8HI - (match_operand:V8QI 1 "nonimmediate_operand" "%0")) - (zero_extend:V8HI - (match_operand:V8QI 2 "nonimmediate_operand" "ym"))) - (const_vector:V8HI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) - (const_int 1))))] + (uavg:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "%0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "(TARGET_SSE || TARGET_3DNOW) && ix86_binary_operator_ok (PLUS, V8QImode, operands)" { @@ -1511,33 +1489,15 @@ (define_insn "*mmx_uavgv8qi3" (define_expand "mmx_uavgv4hi3" [(set (match_operand:V4HI 0 "register_operand" "") - (truncate:V4HI - (lshiftrt:V4SI - (plus:V4SI - (plus:V4SI - (zero_extend:V4SI - (match_operand:V4HI 1 "nonimmediate_operand" "")) - (zero_extend:V4SI - (match_operand:V4HI 2 "nonimmediate_operand" ""))) - (const_vector:V4SI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) - (const_int 1))))] + (uavg:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "") + (match_operand:V4HI 2 "nonimmediate_operand" "")))] "TARGET_SSE || TARGET_3DNOW_A" "ix86_fixup_binary_operands_no_copy (PLUS, V4HImode, operands);") (define_insn "*mmx_uavgv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") - (truncate:V4HI - (lshiftrt:V4SI - (plus:V4SI - (plus:V4SI - (zero_extend:V4SI - (match_operand:V4HI 1 "nonimmediate_operand" "%0")) - (zero_extend:V4SI - (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) - (const_vector:V4SI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) - (const_int 1))))] + (uavg:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "(TARGET_SSE || TARGET_3DNOW_A) && ix86_binary_operator_ok (PLUS, V4HImode, operands)" "pavgw\t{%2, %0|%0, %2}" Index: gcc/config/i386/sse.md =================================================================== --- gcc/config/i386/sse.md (revision 174339) +++ gcc/config/i386/sse.md (working copy) @@ -6734,45 +6734,15 @@ (define_expand "vec_unpacku_hi_<mode>" (define_expand "sse2_uavgv16qi3" [(set (match_operand:V16QI 0 "register_operand" "") - (truncate:V16QI - (lshiftrt:V16HI - (plus:V16HI - (plus:V16HI - (zero_extend:V16HI - (match_operand:V16QI 1 "nonimmediate_operand" "")) - (zero_extend:V16HI - (match_operand:V16QI 2 "nonimmediate_operand" ""))) - (const_vector:V16QI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) - (const_int 1))))] + (uavg:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "") + (match_operand:V16QI 2 "nonimmediate_operand" "")))] "TARGET_SSE2" "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);") (define_insn "*sse2_uavgv16qi3" [(set (match_operand:V16QI 0 "register_operand" "=x,x") - (truncate:V16QI - (lshiftrt:V16HI - (plus:V16HI - (plus:V16HI - (zero_extend:V16HI - (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")) - (zero_extend:V16HI - (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))) - (const_vector:V16QI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) - (const_int 1))))] + (uavg:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0,x") + (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))] "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)" "@ pavgb\t{%2, %0|%0, %2} @@ -6785,37 +6755,15 @@ (define_insn "*sse2_uavgv16qi3" (define_expand "sse2_uavgv8hi3" [(set (match_operand:V8HI 0 "register_operand" "") - (truncate:V8HI - (lshiftrt:V8SI - (plus:V8SI - (plus:V8SI - (zero_extend:V8SI - (match_operand:V8HI 1 "nonimmediate_operand" "")) - (zero_extend:V8SI - (match_operand:V8HI 2 "nonimmediate_operand" ""))) - (const_vector:V8HI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) - (const_int 1))))] + (uavg:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") + (match_operand:V8HI 2 "nonimmediate_operand" "")))] "TARGET_SSE2" "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);") (define_insn "*sse2_uavgv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x,x") - (truncate:V8HI - (lshiftrt:V8SI - (plus:V8SI - (plus:V8SI - (zero_extend:V8SI - (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")) - (zero_extend:V8SI - (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))) - (const_vector:V8HI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) - (const_int 1))))] + (uavg:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0,x") + (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))] "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)" "@ pavgw\t{%2, %0|%0, %2}

Add AVG and UAVG rtx codes

Commit Message

Comments

Patch