diff mbox

[047/126] target-s390: Convert LOAD, STORE MULTIPLE

Message ID 1347224784-19472-48-git-send-email-rth@twiddle.net
State New
Headers show

Commit Message

Richard Henderson Sept. 9, 2012, 9:05 p.m. UTC
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-s390x/insn-data.def |  14 ++++
 target-s390x/translate.c   | 191 ++++++++++++++++++++++++---------------------
 2 files changed, 116 insertions(+), 89 deletions(-)

Comments

Alexander Graf Sept. 18, 2012, 8:49 p.m. UTC | #1
On 09/09/2012 11:05 PM, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>   target-s390x/insn-data.def |  14 ++++
>   target-s390x/translate.c   | 191 ++++++++++++++++++++++++---------------------
>   2 files changed, 116 insertions(+), 89 deletions(-)
>
> diff --git a/target-s390x/insn-data.def b/target-s390x/insn-data.def
> index 103c1d2..b1f0448 100644
> --- a/target-s390x/insn-data.def
> +++ b/target-s390x/insn-data.def
> @@ -269,6 +269,13 @@
>       C(0xb900, LPGR,    RRE,   Z,   0, r2, r1, 0, abs, abs64)
>       C(0xb910, LPGFR,   RRE,   Z,   0, r2_32s, r1, 0, abs, abs64)
>   
> +/* LOAD MULTIPLE */
> +    C(0x9800, LM,      RS_a,  Z,   0, a2, 0, 0, lm32, 0)
> +    C(0xeb98, LMY,     RSY_a, LD,  0, a2, 0, 0, lm32, 0)
> +    C(0xeb04, LMG,     RSY_a, Z,   0, a2, 0, 0, lm64, 0)
> +/* LOAD MULTIPLE HIGH */
> +    C(0xeb96, LMH,     RSY_a, Z,   0, a2, 0, 0, lmh, 0)
> +
>   /* MOVE LONG */
>       C(0x0e00, MVCL,    RR_a,  Z,   0, 0, 0, 0, mvcl, 0)
>   
> @@ -364,6 +371,13 @@
>   /* STORE HALFWORD RELATIVE LONG */
>       C(0xc407, STHRL,   RIL_b, GIE, r1_o, ri2, 0, 0, st16, 0)
>   
> +/* STORE MULTIPLE */
> +    D(0x9000, STM,     RS_a,  Z,   0, a2, 0, 0, stm, 0, 4)
> +    D(0xeb90, STMY,    RSY_a, LD,  0, a2, 0, 0, stm, 0, 4)
> +    D(0xeb24, STMG,    RSY_a, Z,   0, a2, 0, 0, stm, 0, 8)
> +/* STORE MULTIPLE HIGH */
> +    C(0xeb26, STMH,    RSY_a, Z,   0, a2, 0, 0, stmh, 0)
> +
>   /* SUBTRACT */
>       C(0x1b00, SR,      RR_a,  Z,   r1, r2, new, r1_32, sub, subs32)
>       C(0xb9f9, SRK,     RRF_a, DO,  r2, r3, new, r1_32, sub, subs32)
> diff --git a/target-s390x/translate.c b/target-s390x/translate.c
> index e78e4bb..4c5540d 100644
> --- a/target-s390x/translate.c
> +++ b/target-s390x/translate.c
> @@ -271,6 +271,11 @@ static inline void store_reg32_i64(int reg, TCGv_i64 v)
>       tcg_gen_deposit_i64(regs[reg], regs[reg], v, 0, 32);
>   }
>   
> +static inline void store_reg32h_i64(int reg, TCGv_i64 v)
> +{
> +    tcg_gen_deposit_i64(regs[reg], regs[reg], v, 32, 32);
> +}
> +
>   static inline void store_reg16(int reg, TCGv_i32 v)
>   {
>       /* 16 bit register writes keep the upper bytes */
> @@ -1320,65 +1325,12 @@ static void disas_e5(DisasContext* s, uint64_t insn)
>   
>   static void disas_eb(DisasContext *s, int op, int r1, int r3, int b2, int d2)
>   {
> -    TCGv_i64 tmp, tmp2, tmp3, tmp4;
> +    TCGv_i64 tmp, tmp2;
>       TCGv_i32 tmp32_1, tmp32_2;
> -    int i, stm_len;
>   
>       LOG_DISAS("disas_eb: op 0x%x r1 %d r3 %d b2 %d d2 0x%x\n",
>                 op, r1, r3, b2, d2);
>       switch (op) {
> -    case 0x4:  /* LMG      R1,R3,D2(B2)     [RSE] */
> -    case 0x24: /* STMG     R1,R3,D2(B2)     [RSE] */
> -        stm_len = 8;
> -        goto do_mh;
> -    case 0x26: /* STMH     R1,R3,D2(B2)     [RSE] */
> -    case 0x96: /* LMH      R1,R3,D2(B2)     [RSE] */
> -        stm_len = 4;
> -do_mh:
> -        /* Apparently, unrolling lmg/stmg of any size gains performance -
> -           even for very long ones... */
> -        tmp = get_address(s, 0, b2, d2);
> -        tmp3 = tcg_const_i64(stm_len);
> -        tmp4 = tcg_const_i64(op == 0x26 ? 32 : 4);
> -        for (i = r1;; i = (i + 1) % 16) {
> -            switch (op) {
> -            case 0x4:
> -                tcg_gen_qemu_ld64(regs[i], tmp, get_mem_index(s));
> -                break;
> -            case 0x96:
> -                tmp2 = tcg_temp_new_i64();
> -#if HOST_LONG_BITS == 32
> -                tcg_gen_qemu_ld32u(tmp2, tmp, get_mem_index(s));
> -                tcg_gen_trunc_i64_i32(TCGV_HIGH(regs[i]), tmp2);
> -#else
> -                tcg_gen_qemu_ld32u(tmp2, tmp, get_mem_index(s));
> -                tcg_gen_shl_i64(tmp2, tmp2, tmp4);
> -                tcg_gen_ext32u_i64(regs[i], regs[i]);
> -                tcg_gen_or_i64(regs[i], regs[i], tmp2);
> -#endif
> -                tcg_temp_free_i64(tmp2);
> -                break;
> -            case 0x24:
> -                tcg_gen_qemu_st64(regs[i], tmp, get_mem_index(s));
> -                break;
> -            case 0x26:
> -                tmp2 = tcg_temp_new_i64();
> -                tcg_gen_shr_i64(tmp2, regs[i], tmp4);
> -                tcg_gen_qemu_st32(tmp2, tmp, get_mem_index(s));
> -                tcg_temp_free_i64(tmp2);
> -                break;
> -            default:
> -                tcg_abort();
> -            }
> -            if (i == r3) {
> -                break;
> -            }
> -            tcg_gen_add_i64(tmp, tmp, tmp3);
> -        }
> -        tcg_temp_free_i64(tmp);
> -        tcg_temp_free_i64(tmp3);
> -        tcg_temp_free_i64(tmp4);
> -        break;
>       case 0x2c: /* STCMH R1,M3,D2(B2) [RSY] */
>           tmp = get_address(s, 0, b2, d2);
>           tmp32_1 = tcg_const_i32(r1);
> @@ -2270,44 +2222,17 @@ static void disas_b9(DisasContext *s, int op, int r1, int r2)
>   
>   static void disas_s390_insn(DisasContext *s)
>   {
> -    TCGv_i64 tmp, tmp2, tmp3, tmp4;
> +    TCGv_i64 tmp, tmp2;
>       TCGv_i32 tmp32_1, tmp32_2;
>       unsigned char opc;
>       uint64_t insn;
> -    int op, r1, r2, r3, d1, d2, x2, b1, b2, i, i2, r1b;
> +    int op, r1, r2, r3, d1, d2, x2, b1, b2, i2, r1b;
>       TCGv_i32 vl;
>   
>       opc = cpu_ldub_code(cpu_single_env, s->pc);
>       LOG_DISAS("opc 0x%x\n", opc);
>   
>       switch (opc) {
> -    case 0x98: /* LM     R1,R3,D2(B2)     [RS] */
> -    case 0x90: /* STM    R1,R3,D2(B2)     [RS] */
> -        insn = ld_code4(s->pc);
> -        decode_rs(s, insn, &r1, &r3, &b2, &d2);
> -
> -        tmp = get_address(s, 0, b2, d2);
> -        tmp2 = tcg_temp_new_i64();
> -        tmp3 = tcg_const_i64(4);
> -        tmp4 = tcg_const_i64(0xffffffff00000000ULL);
> -        for (i = r1;; i = (i + 1) % 16) {
> -            if (opc == 0x98) {
> -                tcg_gen_qemu_ld32u(tmp2, tmp, get_mem_index(s));
> -                tcg_gen_and_i64(regs[i], regs[i], tmp4);
> -                tcg_gen_or_i64(regs[i], regs[i], tmp2);
> -            } else {
> -                tcg_gen_qemu_st32(regs[i], tmp, get_mem_index(s));
> -            }
> -            if (i == r3) {
> -                break;
> -            }
> -            tcg_gen_add_i64(tmp, tmp, tmp3);
> -        }
> -        tcg_temp_free_i64(tmp);
> -        tcg_temp_free_i64(tmp2);
> -        tcg_temp_free_i64(tmp3);
> -        tcg_temp_free_i64(tmp4);
> -        break;
>       case 0x92: /* MVI    D1(B1),I2        [SI] */
>           insn = ld_code4(s->pc);
>           tmp = decode_si(s, insn, &i2, &b1, &d1);
> @@ -2604,19 +2529,17 @@ static void disas_s390_insn(DisasContext *s)
>           d1 = (insn >> 16) & 0xfff;
>           b2 = (insn >> 12) & 0xf;
>           d2 = insn & 0xfff;
> -        tmp = load_reg(r1);
>           /* XXX key in r3 */
> -        tmp2 = get_address(s, 0, b1, d1);
> -        tmp3 = get_address(s, 0, b2, d2);
> +        tmp = get_address(s, 0, b1, d1);
> +        tmp2 = get_address(s, 0, b2, d2);
>           if (opc == 0xda) {
> -            gen_helper_mvcp(cc_op, cpu_env, tmp, tmp2, tmp3);
> +            gen_helper_mvcp(cc_op, cpu_env, regs[r1], tmp, tmp2);
>           } else {
> -            gen_helper_mvcs(cc_op, cpu_env, tmp, tmp2, tmp3);
> +            gen_helper_mvcs(cc_op, cpu_env, regs[r1], tmp, tmp2);
>           }
>           set_cc_static(s);
>           tcg_temp_free_i64(tmp);
>           tcg_temp_free_i64(tmp2);
> -        tcg_temp_free_i64(tmp3);
>           break;
>   #endif
>       case 0xe3:
> @@ -3346,6 +3269,57 @@ static ExitStatus op_lpsw(DisasContext *s, DisasOps *o)
>   }
>   #endif
>   
> +static ExitStatus op_lm32(DisasContext *s, DisasOps *o)
> +{
> +    int r1 = get_field(s->fields, r1);
> +    int r3 = get_field(s->fields, r3);
> +    TCGv_i64 t = tcg_temp_new_i64();
> +
> +    while (1) {
> +        tcg_gen_qemu_ld32u(t, o->in2, get_mem_index(s));
> +        store_reg32_i64(r1, t);
> +        if (r1 == r3) {
> +            break;
> +        }
> +        tcg_gen_addi_i64(o->in2, o->in2, 4);
> +        r1 = (r1 + 1) & 15;
> +    }
> +    return NO_EXIT;
> +}
> +
> +static ExitStatus op_lmh(DisasContext *s, DisasOps *o)
> +{
> +    int r1 = get_field(s->fields, r1);
> +    int r3 = get_field(s->fields, r3);
> +    TCGv_i64 t = tcg_temp_new_i64();
> +
> +    while (1) {
> +        tcg_gen_qemu_ld32u(t, o->in2, get_mem_index(s));
> +        store_reg32h_i64(r1, t);
> +        if (r1 == r3) {
> +            break;
> +        }
> +        tcg_gen_addi_i64(o->in2, o->in2, 4);

I don't have a good answer here, but how do we guarantee that we're not 
overrunning the tcg inst buffer?


Alex
Richard Henderson Sept. 19, 2012, 12:15 a.m. UTC | #2
On 09/18/2012 01:49 PM, Alexander Graf wrote:
> I don't have a good answer here, but how do we guarantee that we're
> not overrunning the tcg inst buffer?

Dunno.  Not a new problem though, since the old code does the same thing.


r~
diff mbox

Patch

diff --git a/target-s390x/insn-data.def b/target-s390x/insn-data.def
index 103c1d2..b1f0448 100644
--- a/target-s390x/insn-data.def
+++ b/target-s390x/insn-data.def
@@ -269,6 +269,13 @@ 
     C(0xb900, LPGR,    RRE,   Z,   0, r2, r1, 0, abs, abs64)
     C(0xb910, LPGFR,   RRE,   Z,   0, r2_32s, r1, 0, abs, abs64)
 
+/* LOAD MULTIPLE */
+    C(0x9800, LM,      RS_a,  Z,   0, a2, 0, 0, lm32, 0)
+    C(0xeb98, LMY,     RSY_a, LD,  0, a2, 0, 0, lm32, 0)
+    C(0xeb04, LMG,     RSY_a, Z,   0, a2, 0, 0, lm64, 0)
+/* LOAD MULTIPLE HIGH */
+    C(0xeb96, LMH,     RSY_a, Z,   0, a2, 0, 0, lmh, 0)
+
 /* MOVE LONG */
     C(0x0e00, MVCL,    RR_a,  Z,   0, 0, 0, 0, mvcl, 0)
 
@@ -364,6 +371,13 @@ 
 /* STORE HALFWORD RELATIVE LONG */
     C(0xc407, STHRL,   RIL_b, GIE, r1_o, ri2, 0, 0, st16, 0)
 
+/* STORE MULTIPLE */
+    D(0x9000, STM,     RS_a,  Z,   0, a2, 0, 0, stm, 0, 4)
+    D(0xeb90, STMY,    RSY_a, LD,  0, a2, 0, 0, stm, 0, 4)
+    D(0xeb24, STMG,    RSY_a, Z,   0, a2, 0, 0, stm, 0, 8)
+/* STORE MULTIPLE HIGH */
+    C(0xeb26, STMH,    RSY_a, Z,   0, a2, 0, 0, stmh, 0)
+
 /* SUBTRACT */
     C(0x1b00, SR,      RR_a,  Z,   r1, r2, new, r1_32, sub, subs32)
     C(0xb9f9, SRK,     RRF_a, DO,  r2, r3, new, r1_32, sub, subs32)
diff --git a/target-s390x/translate.c b/target-s390x/translate.c
index e78e4bb..4c5540d 100644
--- a/target-s390x/translate.c
+++ b/target-s390x/translate.c
@@ -271,6 +271,11 @@  static inline void store_reg32_i64(int reg, TCGv_i64 v)
     tcg_gen_deposit_i64(regs[reg], regs[reg], v, 0, 32);
 }
 
+static inline void store_reg32h_i64(int reg, TCGv_i64 v)
+{
+    tcg_gen_deposit_i64(regs[reg], regs[reg], v, 32, 32);
+}
+
 static inline void store_reg16(int reg, TCGv_i32 v)
 {
     /* 16 bit register writes keep the upper bytes */
@@ -1320,65 +1325,12 @@  static void disas_e5(DisasContext* s, uint64_t insn)
 
 static void disas_eb(DisasContext *s, int op, int r1, int r3, int b2, int d2)
 {
-    TCGv_i64 tmp, tmp2, tmp3, tmp4;
+    TCGv_i64 tmp, tmp2;
     TCGv_i32 tmp32_1, tmp32_2;
-    int i, stm_len;
 
     LOG_DISAS("disas_eb: op 0x%x r1 %d r3 %d b2 %d d2 0x%x\n",
               op, r1, r3, b2, d2);
     switch (op) {
-    case 0x4:  /* LMG      R1,R3,D2(B2)     [RSE] */
-    case 0x24: /* STMG     R1,R3,D2(B2)     [RSE] */
-        stm_len = 8;
-        goto do_mh;
-    case 0x26: /* STMH     R1,R3,D2(B2)     [RSE] */
-    case 0x96: /* LMH      R1,R3,D2(B2)     [RSE] */
-        stm_len = 4;
-do_mh:
-        /* Apparently, unrolling lmg/stmg of any size gains performance -
-           even for very long ones... */
-        tmp = get_address(s, 0, b2, d2);
-        tmp3 = tcg_const_i64(stm_len);
-        tmp4 = tcg_const_i64(op == 0x26 ? 32 : 4);
-        for (i = r1;; i = (i + 1) % 16) {
-            switch (op) {
-            case 0x4:
-                tcg_gen_qemu_ld64(regs[i], tmp, get_mem_index(s));
-                break;
-            case 0x96:
-                tmp2 = tcg_temp_new_i64();
-#if HOST_LONG_BITS == 32
-                tcg_gen_qemu_ld32u(tmp2, tmp, get_mem_index(s));
-                tcg_gen_trunc_i64_i32(TCGV_HIGH(regs[i]), tmp2);
-#else
-                tcg_gen_qemu_ld32u(tmp2, tmp, get_mem_index(s));
-                tcg_gen_shl_i64(tmp2, tmp2, tmp4);
-                tcg_gen_ext32u_i64(regs[i], regs[i]);
-                tcg_gen_or_i64(regs[i], regs[i], tmp2);
-#endif
-                tcg_temp_free_i64(tmp2);
-                break;
-            case 0x24:
-                tcg_gen_qemu_st64(regs[i], tmp, get_mem_index(s));
-                break;
-            case 0x26:
-                tmp2 = tcg_temp_new_i64();
-                tcg_gen_shr_i64(tmp2, regs[i], tmp4);
-                tcg_gen_qemu_st32(tmp2, tmp, get_mem_index(s));
-                tcg_temp_free_i64(tmp2);
-                break;
-            default:
-                tcg_abort();
-            }
-            if (i == r3) {
-                break;
-            }
-            tcg_gen_add_i64(tmp, tmp, tmp3);
-        }
-        tcg_temp_free_i64(tmp);
-        tcg_temp_free_i64(tmp3);
-        tcg_temp_free_i64(tmp4);
-        break;
     case 0x2c: /* STCMH R1,M3,D2(B2) [RSY] */
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = tcg_const_i32(r1);
@@ -2270,44 +2222,17 @@  static void disas_b9(DisasContext *s, int op, int r1, int r2)
 
 static void disas_s390_insn(DisasContext *s)
 {
-    TCGv_i64 tmp, tmp2, tmp3, tmp4;
+    TCGv_i64 tmp, tmp2;
     TCGv_i32 tmp32_1, tmp32_2;
     unsigned char opc;
     uint64_t insn;
-    int op, r1, r2, r3, d1, d2, x2, b1, b2, i, i2, r1b;
+    int op, r1, r2, r3, d1, d2, x2, b1, b2, i2, r1b;
     TCGv_i32 vl;
 
     opc = cpu_ldub_code(cpu_single_env, s->pc);
     LOG_DISAS("opc 0x%x\n", opc);
 
     switch (opc) {
-    case 0x98: /* LM     R1,R3,D2(B2)     [RS] */
-    case 0x90: /* STM    R1,R3,D2(B2)     [RS] */
-        insn = ld_code4(s->pc);
-        decode_rs(s, insn, &r1, &r3, &b2, &d2);
-
-        tmp = get_address(s, 0, b2, d2);
-        tmp2 = tcg_temp_new_i64();
-        tmp3 = tcg_const_i64(4);
-        tmp4 = tcg_const_i64(0xffffffff00000000ULL);
-        for (i = r1;; i = (i + 1) % 16) {
-            if (opc == 0x98) {
-                tcg_gen_qemu_ld32u(tmp2, tmp, get_mem_index(s));
-                tcg_gen_and_i64(regs[i], regs[i], tmp4);
-                tcg_gen_or_i64(regs[i], regs[i], tmp2);
-            } else {
-                tcg_gen_qemu_st32(regs[i], tmp, get_mem_index(s));
-            }
-            if (i == r3) {
-                break;
-            }
-            tcg_gen_add_i64(tmp, tmp, tmp3);
-        }
-        tcg_temp_free_i64(tmp);
-        tcg_temp_free_i64(tmp2);
-        tcg_temp_free_i64(tmp3);
-        tcg_temp_free_i64(tmp4);
-        break;
     case 0x92: /* MVI    D1(B1),I2        [SI] */
         insn = ld_code4(s->pc);
         tmp = decode_si(s, insn, &i2, &b1, &d1);
@@ -2604,19 +2529,17 @@  static void disas_s390_insn(DisasContext *s)
         d1 = (insn >> 16) & 0xfff;
         b2 = (insn >> 12) & 0xf;
         d2 = insn & 0xfff;
-        tmp = load_reg(r1);
         /* XXX key in r3 */
-        tmp2 = get_address(s, 0, b1, d1);
-        tmp3 = get_address(s, 0, b2, d2);
+        tmp = get_address(s, 0, b1, d1);
+        tmp2 = get_address(s, 0, b2, d2);
         if (opc == 0xda) {
-            gen_helper_mvcp(cc_op, cpu_env, tmp, tmp2, tmp3);
+            gen_helper_mvcp(cc_op, cpu_env, regs[r1], tmp, tmp2);
         } else {
-            gen_helper_mvcs(cc_op, cpu_env, tmp, tmp2, tmp3);
+            gen_helper_mvcs(cc_op, cpu_env, regs[r1], tmp, tmp2);
         }
         set_cc_static(s);
         tcg_temp_free_i64(tmp);
         tcg_temp_free_i64(tmp2);
-        tcg_temp_free_i64(tmp3);
         break;
 #endif
     case 0xe3:
@@ -3346,6 +3269,57 @@  static ExitStatus op_lpsw(DisasContext *s, DisasOps *o)
 }
 #endif
 
+static ExitStatus op_lm32(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s->fields, r1);
+    int r3 = get_field(s->fields, r3);
+    TCGv_i64 t = tcg_temp_new_i64();
+
+    while (1) {
+        tcg_gen_qemu_ld32u(t, o->in2, get_mem_index(s));
+        store_reg32_i64(r1, t);
+        if (r1 == r3) {
+            break;
+        }
+        tcg_gen_addi_i64(o->in2, o->in2, 4);
+        r1 = (r1 + 1) & 15;
+    }
+    return NO_EXIT;
+}
+
+static ExitStatus op_lmh(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s->fields, r1);
+    int r3 = get_field(s->fields, r3);
+    TCGv_i64 t = tcg_temp_new_i64();
+
+    while (1) {
+        tcg_gen_qemu_ld32u(t, o->in2, get_mem_index(s));
+        store_reg32h_i64(r1, t);
+        if (r1 == r3) {
+            break;
+        }
+        tcg_gen_addi_i64(o->in2, o->in2, 4);
+        r1 = (r1 + 1) & 15;
+    }
+    return NO_EXIT;
+}
+
+static ExitStatus op_lm64(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s->fields, r1);
+    int r3 = get_field(s->fields, r3);
+    while (1) {
+        tcg_gen_qemu_ld64(regs[r1], o->in2, get_mem_index(s));
+        if (r1 == r3) {
+            break;
+        }
+        tcg_gen_addi_i64(o->in2, o->in2, 8);
+        r1 = (r1 + 1) & 15;
+    }
+    return NO_EXIT;
+}
+
 static ExitStatus op_mov2(DisasContext *s, DisasOps *o)
 {
     o->out = o->in2;
@@ -3512,6 +3486,45 @@  static ExitStatus op_st64(DisasContext *s, DisasOps *o)
     return NO_EXIT;
 }
 
+static ExitStatus op_stm(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s->fields, r1);
+    int r3 = get_field(s->fields, r3);
+    int size = s->insn->data;
+
+    while (1) {
+        if (size == 8) {
+            tcg_gen_qemu_st64(regs[r1], o->in2, get_mem_index(s));
+        } else {
+            tcg_gen_qemu_st32(regs[r1], o->in2, get_mem_index(s));
+        }
+        if (r1 == r3) {
+            break;
+        }
+        tcg_gen_addi_i64(o->in2, o->in2, size);
+        r1 = (r1 + 1) & 15;
+    }
+    return NO_EXIT;
+}
+
+static ExitStatus op_stmh(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s->fields, r1);
+    int r3 = get_field(s->fields, r3);
+    TCGv_i64 t = tcg_temp_new_i64();
+
+    while (1) {
+        tcg_gen_shli_i64(t, regs[r1], 32);
+        tcg_gen_qemu_st32(t, o->in2, get_mem_index(s));
+        if (r1 == r3) {
+            break;
+        }
+        tcg_gen_addi_i64(o->in2, o->in2, 4);
+        r1 = (r1 + 1) & 15;
+    }
+    return NO_EXIT;
+}
+
 static ExitStatus op_sub(DisasContext *s, DisasOps *o)
 {
     tcg_gen_sub_i64(o->out, o->in1, o->in2);