diff mbox

[for-2.5,07/10] tcg: implement real ext_i32_i64 and extu_i32_i64 ops

Message ID 1437755447-10537-8-git-send-email-aurelien@aurel32.net
State New
Headers show

Commit Message

Aurelien Jarno July 24, 2015, 4:30 p.m. UTC
Implement real ext_i32_i64 and extu_i32_i64 ops. They ensure that a
32-bit value is always converted to a 64-bit value and not propagated
through the register allocator or the optimizer.

Cc: Andrzej Zaborowski <balrogg@gmail.com>
Cc: Alexander Graf <agraf@suse.de>
Cc: Blue Swirl <blauwirbel@gmail.com>
Cc: Claudio Fontana <claudio.fontana@huawei.com>
Cc: Claudio Fontana <claudio.fontana@gmail.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Stefan Weil <sw@weilnetz.de>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/aarch64/tcg-target.c |  4 ++++
 tcg/i386/tcg-target.c    |  5 +++++
 tcg/ia64/tcg-target.c    |  4 ++++
 tcg/ppc/tcg-target.c     |  6 ++++++
 tcg/s390/tcg-target.c    |  5 +++++
 tcg/sparc/tcg-target.c   |  8 ++++++--
 tcg/tcg-op.c             | 10 ++++------
 tcg/tcg-opc.h            |  3 +++
 tcg/tci/tcg-target.c     |  4 ++++
 tci.c                    |  6 ++++--
 10 files changed, 45 insertions(+), 10 deletions(-)

Comments

Alex Bennée July 31, 2015, 4:01 p.m. UTC | #1
Aurelien Jarno <aurelien@aurel32.net> writes:

> Implement real ext_i32_i64 and extu_i32_i64 ops. They ensure that a
> 32-bit value is always converted to a 64-bit value and not propagated
> through the register allocator or the optimizer.
>
> Cc: Andrzej Zaborowski <balrogg@gmail.com>
> Cc: Alexander Graf <agraf@suse.de>
> Cc: Blue Swirl <blauwirbel@gmail.com>
> Cc: Claudio Fontana <claudio.fontana@huawei.com>
> Cc: Claudio Fontana <claudio.fontana@gmail.com>
> Cc: Richard Henderson <rth@twiddle.net>
> Cc: Stefan Weil <sw@weilnetz.de>
> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
> ---
>  tcg/aarch64/tcg-target.c |  4 ++++
>  tcg/i386/tcg-target.c    |  5 +++++
>  tcg/ia64/tcg-target.c    |  4 ++++
>  tcg/ppc/tcg-target.c     |  6 ++++++
>  tcg/s390/tcg-target.c    |  5 +++++
>  tcg/sparc/tcg-target.c   |  8 ++++++--
>  tcg/tcg-op.c             | 10 ++++------
>  tcg/tcg-opc.h            |  3 +++
>  tcg/tci/tcg-target.c     |  4 ++++
>  tci.c                    |  6 ++++--
>  10 files changed, 45 insertions(+), 10 deletions(-)
>
> diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
> index b7ec4f5..7f7ab7e 100644
> --- a/tcg/aarch64/tcg-target.c
> +++ b/tcg/aarch64/tcg-target.c
> @@ -1556,6 +1556,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
>      case INDEX_op_ext16s_i32:
>          tcg_out_sxt(s, ext, MO_16, a0, a1);
>          break;
> +    case INDEX_op_ext_i32_i64:
>      case INDEX_op_ext32s_i64:
>          tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
>          break;
> @@ -1567,6 +1568,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
>      case INDEX_op_ext16u_i32:
>          tcg_out_uxt(s, MO_16, a0, a1);
>          break;
> +    case INDEX_op_extu_i32_i64:
>      case INDEX_op_ext32u_i64:
>          tcg_out_movr(s, TCG_TYPE_I32, a0, a1);

So what is the difference between extu_i32_i64 and ext32u_i64. The
README skips over this particular part of the naming convention and I
wonder if we should be clearer about that before we add more ops.

>          break;
> @@ -1712,6 +1714,8 @@ static const TCGTargetOpDef aarch64_op_defs[] = {
>      { INDEX_op_ext8u_i64, { "r", "r" } },
>      { INDEX_op_ext16u_i64, { "r", "r" } },
>      { INDEX_op_ext32u_i64, { "r", "r" } },
> +    { INDEX_op_ext_i32_i64, { "r", "r" } },
> +    { INDEX_op_extu_i32_i64, { "r", "r" } },
>  
>      { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
>      { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index 4f40468..ff55499 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -2068,9 +2068,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>      case INDEX_op_bswap64_i64:
>          tcg_out_bswap64(s, args[0]);
>          break;
> +    case INDEX_op_extu_i32_i64:
>      case INDEX_op_ext32u_i64:
>          tcg_out_ext32u(s, args[0], args[1]);
>          break;
> +    case INDEX_op_ext_i32_i64:
>      case INDEX_op_ext32s_i64:
>          tcg_out_ext32s(s, args[0], args[1]);
>          break;
> @@ -2205,6 +2207,9 @@ static const TCGTargetOpDef x86_op_defs[] = {
>      { INDEX_op_ext16u_i64, { "r", "r" } },
>      { INDEX_op_ext32u_i64, { "r", "r" } },
>  
> +    { INDEX_op_ext_i32_i64, { "r", "r" } },
> +    { INDEX_op_extu_i32_i64, { "r", "r" } },
> +
>      { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
>      { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
>  
> diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
> index 81cb9f7..71e79cf 100644
> --- a/tcg/ia64/tcg-target.c
> +++ b/tcg/ia64/tcg-target.c
> @@ -2148,9 +2148,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>      case INDEX_op_ext16u_i64:
>          tcg_out_ext(s, OPC_ZXT2_I29, args[0], args[1]);
>          break;
> +    case INDEX_op_ext_i32_i64:
>      case INDEX_op_ext32s_i64:
>          tcg_out_ext(s, OPC_SXT4_I29, args[0], args[1]);
>          break;
> +    case INDEX_op_extu_i32_i64:
>      case INDEX_op_ext32u_i64:
>          tcg_out_ext(s, OPC_ZXT4_I29, args[0], args[1]);
>          break;
> @@ -2301,6 +2303,8 @@ static const TCGTargetOpDef ia64_op_defs[] = {
>      { INDEX_op_ext16u_i64, { "r", "rZ"} },
>      { INDEX_op_ext32s_i64, { "r", "rZ"} },
>      { INDEX_op_ext32u_i64, { "r", "rZ"} },
> +    { INDEX_op_ext_i32_i64, { "r", "rZ" } },
> +    { INDEX_op_extu_i32_i64, { "r", "rZ" } },
>  
>      { INDEX_op_bswap16_i64, { "r", "rZ" } },
>      { INDEX_op_bswap32_i64, { "r", "rZ" } },
> diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
> index ce8d546..1672220 100644
> --- a/tcg/ppc/tcg-target.c
> +++ b/tcg/ppc/tcg-target.c
> @@ -2221,12 +2221,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
>      case INDEX_op_ext16s_i64:
>          c = EXTSH;
>          goto gen_ext;
> +    case INDEX_op_ext_i32_i64:
>      case INDEX_op_ext32s_i64:
>          c = EXTSW;
>          goto gen_ext;
>      gen_ext:
>          tcg_out32(s, c | RS(args[1]) | RA(args[0]));
>          break;
> +    case INDEX_op_extu_i32_i64:
> +        tcg_out_ext32u(s, args[0], args[1]);
> +        break;
>  
>      case INDEX_op_setcond_i32:
>          tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
> @@ -2503,6 +2507,8 @@ static const TCGTargetOpDef ppc_op_defs[] = {
>      { INDEX_op_ext8s_i64, { "r", "r" } },
>      { INDEX_op_ext16s_i64, { "r", "r" } },
>      { INDEX_op_ext32s_i64, { "r", "r" } },
> +    { INDEX_op_ext_i32_i64, { "r", "r" } },
> +    { INDEX_op_extu_i32_i64, { "r", "r" } },

Again getting confused about naming here - I would read both of those as
widen from 32 to 64 bit registers without sign extension.


>      { INDEX_op_bswap16_i64, { "r", "r" } },
>      { INDEX_op_bswap32_i64, { "r", "r" } },
>      { INDEX_op_bswap64_i64, { "r", "r" } },
> diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
> index b3433ce..d4db6d3 100644
> --- a/tcg/s390/tcg-target.c
> +++ b/tcg/s390/tcg-target.c
> @@ -2106,6 +2106,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>      case INDEX_op_ext16s_i64:
>          tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]);
>          break;
> +    case INDEX_op_ext_i32_i64:
>      case INDEX_op_ext32s_i64:
>          tgen_ext32s(s, args[0], args[1]);
>          break;
> @@ -2115,6 +2116,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>      case INDEX_op_ext16u_i64:
>          tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]);
>          break;
> +    case INDEX_op_extu_i32_i64:
>      case INDEX_op_ext32u_i64:
>          tgen_ext32u(s, args[0], args[1]);
>          break;
> @@ -2267,6 +2269,9 @@ static const TCGTargetOpDef s390_op_defs[] = {
>      { INDEX_op_ext32s_i64, { "r", "r" } },
>      { INDEX_op_ext32u_i64, { "r", "r" } },
>  
> +    { INDEX_op_ext_i32_i64, { "r", "r" } },
> +    { INDEX_op_extu_i32_i64, { "r", "r" } },
> +
>      { INDEX_op_bswap16_i64, { "r", "r" } },
>      { INDEX_op_bswap32_i64, { "r", "r" } },
>      { INDEX_op_bswap64_i64, { "r", "r" } },
> diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
> index b23032b..fe75af0 100644
> --- a/tcg/sparc/tcg-target.c
> +++ b/tcg/sparc/tcg-target.c
> @@ -1407,9 +1407,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
>      case INDEX_op_divu_i64:
>          c = ARITH_UDIVX;
>          goto gen_arith;
> +    case INDEX_op_ext_i32_i64:
>      case INDEX_op_ext32s_i64:
>          tcg_out_arithi(s, a0, a1, 0, SHIFT_SRA);
>          break;
> +    case INDEX_op_extu_i32_i64:
>      case INDEX_op_ext32u_i64:
>          tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL);
>          break;
> @@ -1531,8 +1533,10 @@ static const TCGTargetOpDef sparc_op_defs[] = {
>      { INDEX_op_neg_i64, { "R", "RJ" } },
>      { INDEX_op_not_i64, { "R", "RJ" } },
>  
> -    { INDEX_op_ext32s_i64, { "R", "r" } },
> -    { INDEX_op_ext32u_i64, { "R", "r" } },
> +    { INDEX_op_ext32s_i64, { "R", "R" } },
> +    { INDEX_op_ext32u_i64, { "R", "R" } },
> +    { INDEX_op_ext_i32_i64, { "R", "r" } },
> +    { INDEX_op_extu_i32_i64, { "R", "r" } },
>      { INDEX_op_trunc_shr_i64_i32,  { "r", "R" } },
>  
>      { INDEX_op_brcond_i64, { "RZ", "RJ" } },
> diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
> index 0e79fd1..7114315 100644
> --- a/tcg/tcg-op.c
> +++ b/tcg/tcg-op.c
> @@ -1770,9 +1770,8 @@ void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
>          tcg_gen_mov_i32(TCGV_LOW(ret), arg);
>          tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
>      } else {
> -        /* Note: we assume the target supports move between
> -           32 and 64 bit registers.  */
> -        tcg_gen_ext32u_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg)));
> +        tcg_gen_op2(&tcg_ctx, INDEX_op_extu_i32_i64,
> +                    GET_TCGV_I64(ret), GET_TCGV_I32(arg));
>      }
>  }
>  
> @@ -1782,9 +1781,8 @@ void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
>          tcg_gen_mov_i32(TCGV_LOW(ret), arg);
>          tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
>      } else {
> -        /* Note: we assume the target supports move between
> -           32 and 64 bit registers.  */
> -        tcg_gen_ext32s_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg)));
> +        tcg_gen_op2(&tcg_ctx, INDEX_op_ext_i32_i64,
> +                    GET_TCGV_I64(ret), GET_TCGV_I32(arg));
>      }
>  }
>  
> diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
> index 4a34f43..f721a5a 100644
> --- a/tcg/tcg-opc.h
> +++ b/tcg/tcg-opc.h
> @@ -138,6 +138,9 @@ DEF(rotl_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64))
>  DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64))
>  DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64))
>  
> +/* size changing ops */
> +DEF(ext_i32_i64, 1, 1, 0, IMPL64)
> +DEF(extu_i32_i64, 1, 1, 0, IMPL64)
>  DEF(trunc_shr_i64_i32, 1, 1, 1,
>      IMPL(TCG_TARGET_HAS_trunc_shr_i64_i32)
>      | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0))
> diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c
> index 83472db..bbb54d4 100644
> --- a/tcg/tci/tcg-target.c
> +++ b/tcg/tci/tcg-target.c
> @@ -210,6 +210,8 @@ static const TCGTargetOpDef tcg_target_op_defs[] = {
>  #if TCG_TARGET_HAS_ext32u_i64
>      { INDEX_op_ext32u_i64, { R, R } },
>  #endif
> +    { INDEX_op_ext_i32_i64, { R, R } },
> +    { INDEX_op_extu_i32_i64, { R, R } },
>  #if TCG_TARGET_HAS_bswap16_i64
>      { INDEX_op_bswap16_i64, { R, R } },
>  #endif
> @@ -701,6 +703,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
>      case INDEX_op_ext16u_i64:   /* Optional (TCG_TARGET_HAS_ext16u_i64). */
>      case INDEX_op_ext32s_i64:   /* Optional (TCG_TARGET_HAS_ext32s_i64). */
>      case INDEX_op_ext32u_i64:   /* Optional (TCG_TARGET_HAS_ext32u_i64). */
> +    case INDEX_op_ext_i32_i64:
> +    case INDEX_op_extu_i32_i64:
>  #endif /* TCG_TARGET_REG_BITS == 64 */
>      case INDEX_op_neg_i32:      /* Optional (TCG_TARGET_HAS_neg_i32). */
>      case INDEX_op_not_i32:      /* Optional (TCG_TARGET_HAS_not_i32). */
> diff --git a/tci.c b/tci.c
> index 8444948..3d6d177 100644
> --- a/tci.c
> +++ b/tci.c
> @@ -1033,18 +1033,20 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
>  #endif
>  #if TCG_TARGET_HAS_ext32s_i64
>          case INDEX_op_ext32s_i64:
> +#endif
> +        case INDEX_op_ext_i32_i64:
>              t0 = *tb_ptr++;
>              t1 = tci_read_r32s(&tb_ptr);
>              tci_write_reg64(t0, t1);
>              break;
> -#endif
>  #if TCG_TARGET_HAS_ext32u_i64
>          case INDEX_op_ext32u_i64:
> +#endif
> +        case INDEX_op_extu_i32_i64:
>              t0 = *tb_ptr++;
>              t1 = tci_read_r32(&tb_ptr);
>              tci_write_reg64(t0, t1);
>              break;
> -#endif
>  #if TCG_TARGET_HAS_bswap16_i64
>          case INDEX_op_bswap16_i64:
>              TODO();
Richard Henderson July 31, 2015, 4:11 p.m. UTC | #2
On 07/31/2015 09:01 AM, Alex Bennée wrote:
>
> Aurelien Jarno <aurelien@aurel32.net> writes:
>
>> Implement real ext_i32_i64 and extu_i32_i64 ops. They ensure that a
>> 32-bit value is always converted to a 64-bit value and not propagated
>> through the register allocator or the optimizer.
>>
>> Cc: Andrzej Zaborowski <balrogg@gmail.com>
>> Cc: Alexander Graf <agraf@suse.de>
>> Cc: Blue Swirl <blauwirbel@gmail.com>
>> Cc: Claudio Fontana <claudio.fontana@huawei.com>
>> Cc: Claudio Fontana <claudio.fontana@gmail.com>
>> Cc: Richard Henderson <rth@twiddle.net>
>> Cc: Stefan Weil <sw@weilnetz.de>
>> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
>> ---
>>   tcg/aarch64/tcg-target.c |  4 ++++
>>   tcg/i386/tcg-target.c    |  5 +++++
>>   tcg/ia64/tcg-target.c    |  4 ++++
>>   tcg/ppc/tcg-target.c     |  6 ++++++
>>   tcg/s390/tcg-target.c    |  5 +++++
>>   tcg/sparc/tcg-target.c   |  8 ++++++--
>>   tcg/tcg-op.c             | 10 ++++------
>>   tcg/tcg-opc.h            |  3 +++
>>   tcg/tci/tcg-target.c     |  4 ++++
>>   tci.c                    |  6 ++++--
>>   10 files changed, 45 insertions(+), 10 deletions(-)
>>
>> diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
>> index b7ec4f5..7f7ab7e 100644
>> --- a/tcg/aarch64/tcg-target.c
>> +++ b/tcg/aarch64/tcg-target.c
>> @@ -1556,6 +1556,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
>>       case INDEX_op_ext16s_i32:
>>           tcg_out_sxt(s, ext, MO_16, a0, a1);
>>           break;
>> +    case INDEX_op_ext_i32_i64:
>>       case INDEX_op_ext32s_i64:
>>           tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
>>           break;
>> @@ -1567,6 +1568,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
>>       case INDEX_op_ext16u_i32:
>>           tcg_out_uxt(s, MO_16, a0, a1);
>>           break;
>> +    case INDEX_op_extu_i32_i64:
>>       case INDEX_op_ext32u_i64:
>>           tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
>
> So what is the difference between extu_i32_i64 and ext32u_i64. The
> README skips over this particular part of the naming convention and I
> wonder if we should be clearer about that before we add more ops.

The size of the input, for one.  The possibility of eliding is another.

Our current plan for x86_64 and aarch64 is to canonicalize all 32-bit 
quantities to be zero-extended in the register.  Primarily because for both 
platforms this can be done for free.  Thus exts_i32_64 and extrl_i64_i32 will 
require implementation, but extu_i32_i64 will be replaced by a move.

Similarly, mips64 would keep values sign-extended (as *required* by the 
standard for all 32-bit operations), and thus exts_i32_i64 would be replaced by 
a move.

Other targets will probably make extrl_i64_i32 be the move, since the 32-bit 
ops don't re-extend for free.


r~
diff mbox

Patch

diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index b7ec4f5..7f7ab7e 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -1556,6 +1556,7 @@  static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_ext16s_i32:
         tcg_out_sxt(s, ext, MO_16, a0, a1);
         break;
+    case INDEX_op_ext_i32_i64:
     case INDEX_op_ext32s_i64:
         tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
         break;
@@ -1567,6 +1568,7 @@  static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_ext16u_i32:
         tcg_out_uxt(s, MO_16, a0, a1);
         break;
+    case INDEX_op_extu_i32_i64:
     case INDEX_op_ext32u_i64:
         tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
         break;
@@ -1712,6 +1714,8 @@  static const TCGTargetOpDef aarch64_op_defs[] = {
     { INDEX_op_ext8u_i64, { "r", "r" } },
     { INDEX_op_ext16u_i64, { "r", "r" } },
     { INDEX_op_ext32u_i64, { "r", "r" } },
+    { INDEX_op_ext_i32_i64, { "r", "r" } },
+    { INDEX_op_extu_i32_i64, { "r", "r" } },
 
     { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
     { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 4f40468..ff55499 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -2068,9 +2068,11 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_bswap64_i64:
         tcg_out_bswap64(s, args[0]);
         break;
+    case INDEX_op_extu_i32_i64:
     case INDEX_op_ext32u_i64:
         tcg_out_ext32u(s, args[0], args[1]);
         break;
+    case INDEX_op_ext_i32_i64:
     case INDEX_op_ext32s_i64:
         tcg_out_ext32s(s, args[0], args[1]);
         break;
@@ -2205,6 +2207,9 @@  static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_ext16u_i64, { "r", "r" } },
     { INDEX_op_ext32u_i64, { "r", "r" } },
 
+    { INDEX_op_ext_i32_i64, { "r", "r" } },
+    { INDEX_op_extu_i32_i64, { "r", "r" } },
+
     { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
     { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
 
diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 81cb9f7..71e79cf 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -2148,9 +2148,11 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_ext16u_i64:
         tcg_out_ext(s, OPC_ZXT2_I29, args[0], args[1]);
         break;
+    case INDEX_op_ext_i32_i64:
     case INDEX_op_ext32s_i64:
         tcg_out_ext(s, OPC_SXT4_I29, args[0], args[1]);
         break;
+    case INDEX_op_extu_i32_i64:
     case INDEX_op_ext32u_i64:
         tcg_out_ext(s, OPC_ZXT4_I29, args[0], args[1]);
         break;
@@ -2301,6 +2303,8 @@  static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_ext16u_i64, { "r", "rZ"} },
     { INDEX_op_ext32s_i64, { "r", "rZ"} },
     { INDEX_op_ext32u_i64, { "r", "rZ"} },
+    { INDEX_op_ext_i32_i64, { "r", "rZ" } },
+    { INDEX_op_extu_i32_i64, { "r", "rZ" } },
 
     { INDEX_op_bswap16_i64, { "r", "rZ" } },
     { INDEX_op_bswap32_i64, { "r", "rZ" } },
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index ce8d546..1672220 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -2221,12 +2221,16 @@  static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
     case INDEX_op_ext16s_i64:
         c = EXTSH;
         goto gen_ext;
+    case INDEX_op_ext_i32_i64:
     case INDEX_op_ext32s_i64:
         c = EXTSW;
         goto gen_ext;
     gen_ext:
         tcg_out32(s, c | RS(args[1]) | RA(args[0]));
         break;
+    case INDEX_op_extu_i32_i64:
+        tcg_out_ext32u(s, args[0], args[1]);
+        break;
 
     case INDEX_op_setcond_i32:
         tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
@@ -2503,6 +2507,8 @@  static const TCGTargetOpDef ppc_op_defs[] = {
     { INDEX_op_ext8s_i64, { "r", "r" } },
     { INDEX_op_ext16s_i64, { "r", "r" } },
     { INDEX_op_ext32s_i64, { "r", "r" } },
+    { INDEX_op_ext_i32_i64, { "r", "r" } },
+    { INDEX_op_extu_i32_i64, { "r", "r" } },
     { INDEX_op_bswap16_i64, { "r", "r" } },
     { INDEX_op_bswap32_i64, { "r", "r" } },
     { INDEX_op_bswap64_i64, { "r", "r" } },
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index b3433ce..d4db6d3 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -2106,6 +2106,7 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_ext16s_i64:
         tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]);
         break;
+    case INDEX_op_ext_i32_i64:
     case INDEX_op_ext32s_i64:
         tgen_ext32s(s, args[0], args[1]);
         break;
@@ -2115,6 +2116,7 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_ext16u_i64:
         tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]);
         break;
+    case INDEX_op_extu_i32_i64:
     case INDEX_op_ext32u_i64:
         tgen_ext32u(s, args[0], args[1]);
         break;
@@ -2267,6 +2269,9 @@  static const TCGTargetOpDef s390_op_defs[] = {
     { INDEX_op_ext32s_i64, { "r", "r" } },
     { INDEX_op_ext32u_i64, { "r", "r" } },
 
+    { INDEX_op_ext_i32_i64, { "r", "r" } },
+    { INDEX_op_extu_i32_i64, { "r", "r" } },
+
     { INDEX_op_bswap16_i64, { "r", "r" } },
     { INDEX_op_bswap32_i64, { "r", "r" } },
     { INDEX_op_bswap64_i64, { "r", "r" } },
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index b23032b..fe75af0 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -1407,9 +1407,11 @@  static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_divu_i64:
         c = ARITH_UDIVX;
         goto gen_arith;
+    case INDEX_op_ext_i32_i64:
     case INDEX_op_ext32s_i64:
         tcg_out_arithi(s, a0, a1, 0, SHIFT_SRA);
         break;
+    case INDEX_op_extu_i32_i64:
     case INDEX_op_ext32u_i64:
         tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL);
         break;
@@ -1531,8 +1533,10 @@  static const TCGTargetOpDef sparc_op_defs[] = {
     { INDEX_op_neg_i64, { "R", "RJ" } },
     { INDEX_op_not_i64, { "R", "RJ" } },
 
-    { INDEX_op_ext32s_i64, { "R", "r" } },
-    { INDEX_op_ext32u_i64, { "R", "r" } },
+    { INDEX_op_ext32s_i64, { "R", "R" } },
+    { INDEX_op_ext32u_i64, { "R", "R" } },
+    { INDEX_op_ext_i32_i64, { "R", "r" } },
+    { INDEX_op_extu_i32_i64, { "R", "r" } },
     { INDEX_op_trunc_shr_i64_i32,  { "r", "R" } },
 
     { INDEX_op_brcond_i64, { "RZ", "RJ" } },
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 0e79fd1..7114315 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -1770,9 +1770,8 @@  void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
         tcg_gen_mov_i32(TCGV_LOW(ret), arg);
         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
     } else {
-        /* Note: we assume the target supports move between
-           32 and 64 bit registers.  */
-        tcg_gen_ext32u_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg)));
+        tcg_gen_op2(&tcg_ctx, INDEX_op_extu_i32_i64,
+                    GET_TCGV_I64(ret), GET_TCGV_I32(arg));
     }
 }
 
@@ -1782,9 +1781,8 @@  void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
         tcg_gen_mov_i32(TCGV_LOW(ret), arg);
         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
     } else {
-        /* Note: we assume the target supports move between
-           32 and 64 bit registers.  */
-        tcg_gen_ext32s_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg)));
+        tcg_gen_op2(&tcg_ctx, INDEX_op_ext_i32_i64,
+                    GET_TCGV_I64(ret), GET_TCGV_I32(arg));
     }
 }
 
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 4a34f43..f721a5a 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -138,6 +138,9 @@  DEF(rotl_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64))
 DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64))
 DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64))
 
+/* size changing ops */
+DEF(ext_i32_i64, 1, 1, 0, IMPL64)
+DEF(extu_i32_i64, 1, 1, 0, IMPL64)
 DEF(trunc_shr_i64_i32, 1, 1, 1,
     IMPL(TCG_TARGET_HAS_trunc_shr_i64_i32)
     | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0))
diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c
index 83472db..bbb54d4 100644
--- a/tcg/tci/tcg-target.c
+++ b/tcg/tci/tcg-target.c
@@ -210,6 +210,8 @@  static const TCGTargetOpDef tcg_target_op_defs[] = {
 #if TCG_TARGET_HAS_ext32u_i64
     { INDEX_op_ext32u_i64, { R, R } },
 #endif
+    { INDEX_op_ext_i32_i64, { R, R } },
+    { INDEX_op_extu_i32_i64, { R, R } },
 #if TCG_TARGET_HAS_bswap16_i64
     { INDEX_op_bswap16_i64, { R, R } },
 #endif
@@ -701,6 +703,8 @@  static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
     case INDEX_op_ext16u_i64:   /* Optional (TCG_TARGET_HAS_ext16u_i64). */
     case INDEX_op_ext32s_i64:   /* Optional (TCG_TARGET_HAS_ext32s_i64). */
     case INDEX_op_ext32u_i64:   /* Optional (TCG_TARGET_HAS_ext32u_i64). */
+    case INDEX_op_ext_i32_i64:
+    case INDEX_op_extu_i32_i64:
 #endif /* TCG_TARGET_REG_BITS == 64 */
     case INDEX_op_neg_i32:      /* Optional (TCG_TARGET_HAS_neg_i32). */
     case INDEX_op_not_i32:      /* Optional (TCG_TARGET_HAS_not_i32). */
diff --git a/tci.c b/tci.c
index 8444948..3d6d177 100644
--- a/tci.c
+++ b/tci.c
@@ -1033,18 +1033,20 @@  uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
 #endif
 #if TCG_TARGET_HAS_ext32s_i64
         case INDEX_op_ext32s_i64:
+#endif
+        case INDEX_op_ext_i32_i64:
             t0 = *tb_ptr++;
             t1 = tci_read_r32s(&tb_ptr);
             tci_write_reg64(t0, t1);
             break;
-#endif
 #if TCG_TARGET_HAS_ext32u_i64
         case INDEX_op_ext32u_i64:
+#endif
+        case INDEX_op_extu_i32_i64:
             t0 = *tb_ptr++;
             t1 = tci_read_r32(&tb_ptr);
             tci_write_reg64(t0, t1);
             break;
-#endif
 #if TCG_TARGET_HAS_bswap16_i64
         case INDEX_op_bswap16_i64:
             TODO();