@@ -318,7 +318,7 @@ DEF_HELPER_FLAGS_3(vbpermq, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(vpmsumb, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(vpmsumh, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(vpmsumw, TCG_CALL_NO_RWG, void, avr, avr, avr)
-DEF_HELPER_FLAGS_3(vpmsumd, TCG_CALL_NO_RWG, void, avr, avr, avr)
+DEF_HELPER_FLAGS_3(VPMSUMD, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_2(vextublx, TCG_CALL_NO_RWG, tl, tl, avr)
DEF_HELPER_FLAGS_2(vextuhlx, TCG_CALL_NO_RWG, tl, tl, avr)
DEF_HELPER_FLAGS_2(vextuwlx, TCG_CALL_NO_RWG, tl, tl, avr)
@@ -426,6 +426,10 @@ DSCLIQ 111111 ..... ..... ...... 001000010 . @Z22_tap_sh_rc
DSCRI 111011 ..... ..... ...... 001100010 . @Z22_ta_sh_rc
DSCRIQ 111111 ..... ..... ...... 001100010 . @Z22_tap_sh_rc
+## Vector Exclusive-OR-based Instructions
+
+VPMSUMD 000100 ..... ..... ..... 10011001000 @VX
+
## Vector Integer Instructions
VCMPEQUB 000100 ..... ..... ..... . 0000000110 @VC
@@ -1484,52 +1484,24 @@ PMSUM(vpmsumb, u8, u16, uint16_t)
PMSUM(vpmsumh, u16, u32, uint32_t)
PMSUM(vpmsumw, u32, u64, uint64_t)
-void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+void helper_VPMSUMD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
-
-#ifdef CONFIG_INT128
int i, j;
- __uint128_t prod[2];
+ Int128 tmp, prod[2] = {int128_zero(), int128_zero()};
- VECTOR_FOR_INORDER_I(i, u64) {
- prod[i] = 0;
- for (j = 0; j < 64; j++) {
- if (a->u64[i] & (1ull << j)) {
- prod[i] ^= (((__uint128_t)b->u64[i]) << j);
+ for (j = 0; j < 64; j++) {
+ for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
+ if (a->VsrD(i) & (1ull << j)) {
+ tmp = int128_make64(b->VsrD(i));
+ tmp = int128_lshift(tmp, j);
+ prod[i] = int128_xor(prod[i], tmp);
}
}
}
- r->u128 = prod[0] ^ prod[1];
-
-#else
- int i, j;
- ppc_avr_t prod[2];
-
- VECTOR_FOR_INORDER_I(i, u64) {
- prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
- for (j = 0; j < 64; j++) {
- if (a->u64[i] & (1ull << j)) {
- ppc_avr_t bshift;
- if (j == 0) {
- bshift.VsrD(0) = 0;
- bshift.VsrD(1) = b->u64[i];
- } else {
- bshift.VsrD(0) = b->u64[i] >> (64 - j);
- bshift.VsrD(1) = b->u64[i] << j;
- }
- prod[i].VsrD(1) ^= bshift.VsrD(1);
- prod[i].VsrD(0) ^= bshift.VsrD(0);
- }
- }
- }
-
- r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
- r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
-#endif
+ r->s128 = int128_xor(prod[0], prod[1]);
}
-
#if HOST_BIG_ENDIAN
#define PKBIG 1
#else
@@ -2717,7 +2717,6 @@ GEN_VXFORM_TRANS(vgbbd, 6, 20);
GEN_VXFORM(vpmsumb, 4, 16)
GEN_VXFORM(vpmsumh, 4, 17)
GEN_VXFORM(vpmsumw, 4, 18)
-GEN_VXFORM(vpmsumd, 4, 19)
#define GEN_BCD(op) \
static void gen_##op(DisasContext *ctx) \
@@ -3101,6 +3100,8 @@ static bool do_vx_helper(DisasContext *ctx, arg_VX *a,
return true;
}
+TRANS_FLAGS2(ALTIVEC_207, VPMSUMD, do_vx_helper, gen_helper_VPMSUMD)
+
static bool do_vx_vmuleo(DisasContext *ctx, arg_VX *a, bool even,
void (*gen_mul)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
{
@@ -237,7 +237,6 @@ GEN_VXFORM_207(vgbbd, 6, 20),
GEN_VXFORM_207(vpmsumb, 4, 16),
GEN_VXFORM_207(vpmsumh, 4, 17),
GEN_VXFORM_207(vpmsumw, 4, 18),
-GEN_VXFORM_207(vpmsumd, 4, 19),
GEN_VXFORM_207(vsbox, 4, 23),
Also drop VECTOR_FOR_INORDER_I usage since there is no need to access the elements in any particular order, and move the instruction to decodetree. Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br> --- target/ppc/helper.h | 2 +- target/ppc/insn32.decode | 4 +++ target/ppc/int_helper.c | 46 ++++++----------------------- target/ppc/translate/vmx-impl.c.inc | 3 +- target/ppc/translate/vmx-ops.c.inc | 1 - 5 files changed, 16 insertions(+), 40 deletions(-)