diff mbox

[v2,for-2.5,11/12] tcg/optimize: do not remember garbage high bits for 32-bit ops

Message ID 1437994568-7825-12-git-send-email-aurelien@aurel32.net
State New
Headers show

Commit Message

Aurelien Jarno July 27, 2015, 10:56 a.m. UTC
Now that we have real size changing ops, we don't need to mark high
bits of the destination as garbage. The goal of the optimizer is to
predict the value of the temps (and not of the registers) and do
simplifications when possible. The problem there is therefore not the
fact that those bits are not counted as garbage, but that a size
changing op is replaced by a move.

This patch is basically a revert of 24666baf, including the changes that
have been made since then.

Cc: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/optimize.c | 41 +++++++++++------------------------------
 1 file changed, 11 insertions(+), 30 deletions(-)

Comments

Aurelien Jarno July 29, 2015, 4:34 p.m. UTC | #1
On 2015-07-27 12:56, Aurelien Jarno wrote:
> Now that we have real size changing ops, we don't need to mark high
> bits of the destination as garbage. The goal of the optimizer is to
> predict the value of the temps (and not of the registers) and do
> simplifications when possible. The problem there is therefore not the
> fact that those bits are not counted as garbage, but that a size
> changing op is replaced by a move.
> 
> This patch is basically a revert of 24666baf, including the changes that
> have been made since then.
> 
> Cc: Richard Henderson <rth@twiddle.net>
> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
> ---
>  tcg/optimize.c | 41 +++++++++++------------------------------
>  1 file changed, 11 insertions(+), 30 deletions(-)
> 
> diff --git a/tcg/optimize.c b/tcg/optimize.c
> index 8f33755..166074e 100644
> --- a/tcg/optimize.c
> +++ b/tcg/optimize.c
> @@ -937,17 +922,13 @@ void tcg_optimize(TCGContext *s)
>              break;
>          }
>  
> -        /* 32-bit ops generate 32-bit results.  For the result is zero test
> -           below, we can ignore high bits, but for further optimizations we
> -           need to record that the high bits contain garbage.  */
> -        partmask = mask;
> +        /* 32-bit ops generate 32-bit results.  */
>          if (!(def->flags & TCG_OPF_64BIT)) {
> -            mask |= ~(tcg_target_ulong)0xffffffffu;
> -            partmask &= 0xffffffffu;
> +            mask &= 0xffffffffu;
>              affected &= 0xffffffffu;
>          }
>  
> -        if (partmask == 0) {
> +        if (mask == 0) {
>              assert(nb_oargs == 1);
>              tcg_opt_gen_movi(s, op, args, args[0], 0);
>              continue;

Answering to myself, this actually doesn't work as the current code
wrongly assumes that all ops writing a 64-bit temp will have the
TCG_OPF_64BIT flag set. This is wrong for at least call.

I still haven't decided what is the best way to fix that, either by
special casing these ops, or by actually looking at the temp type. I
guess performances will decide. In early version of this patchset, I
tried to access the temp type at other places, and it has some
performances impact.

Aurelien
diff mbox

Patch

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 8f33755..166074e 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -203,20 +203,12 @@  static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args,
                              TCGArg dst, TCGArg val)
 {
-    TCGOpcode new_op = op_to_movi(op->opc);
-    tcg_target_ulong mask;
-
-    op->opc = new_op;
+    op->opc = op_to_movi(op->opc);
 
     reset_temp(dst);
     temps[dst].is_const = true;
     temps[dst].val = val;
-    mask = val;
-    if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) {
-        /* High bits of the destination are now garbage.  */
-        mask |= ~0xffffffffull;
-    }
-    temps[dst].mask = mask;
+    temps[dst].mask = val;
 
     args[0] = dst;
     args[1] = val;
@@ -230,28 +222,21 @@  static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args,
         return;
     }
 
-    TCGOpcode new_op = op_to_mov(op->opc);
-    tcg_target_ulong mask;
-
-    op->opc = new_op;
+    op->opc = op_to_mov(op->opc);
 
     reset_temp(dst);
-    mask = temps[src].mask;
-    if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
-        /* High bits of the destination are now garbage.  */
-        mask |= ~0xffffffffull;
-    }
-    temps[dst].mask = mask;
 
     if (s->temps[src].type == s->temps[dst].type) {
         temps[dst].next_copy = temps[src].next_copy;
         temps[dst].prev_copy = src;
         temps[temps[dst].next_copy].prev_copy = dst;
         temps[src].next_copy = dst;
-        temps[dst].is_const = temps[src].is_const;
-        temps[dst].val = temps[src].val;
     }
 
+    temps[dst].is_const = temps[src].is_const;
+    temps[dst].val = temps[src].val;
+    temps[dst].mask = temps[src].mask;
+
     args[0] = dst;
     args[1] = src;
 }
@@ -584,7 +569,7 @@  void tcg_optimize(TCGContext *s)
     reset_all_temps(nb_temps);
 
     for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
-        tcg_target_ulong mask, partmask, affected;
+        tcg_target_ulong mask, affected;
         int nb_oargs, nb_iargs, i;
         TCGArg tmp;
 
@@ -937,17 +922,13 @@  void tcg_optimize(TCGContext *s)
             break;
         }
 
-        /* 32-bit ops generate 32-bit results.  For the result is zero test
-           below, we can ignore high bits, but for further optimizations we
-           need to record that the high bits contain garbage.  */
-        partmask = mask;
+        /* 32-bit ops generate 32-bit results.  */
         if (!(def->flags & TCG_OPF_64BIT)) {
-            mask |= ~(tcg_target_ulong)0xffffffffu;
-            partmask &= 0xffffffffu;
+            mask &= 0xffffffffu;
             affected &= 0xffffffffu;
         }
 
-        if (partmask == 0) {
+        if (mask == 0) {
             assert(nb_oargs == 1);
             tcg_opt_gen_movi(s, op, args, args[0], 0);
             continue;