Message ID | 1275678883-7082-17-git-send-email-rth@twiddle.net |
---|---|
State | New |
Headers | show |
On Fri, Jun 04, 2010 at 12:14:24PM -0700, Richard Henderson wrote: > Make better use of the LOAD HALFWORD IMMEDIATE, LOAD IMMEDIATE, > and INSERT IMMEDIATE instruction groups. > > Signed-off-by: Richard Henderson <rth@twiddle.net> > --- > tcg/s390/tcg-target.c | 129 +++++++++++++++++++++++++++++++++++++++++++------ > 1 files changed, 113 insertions(+), 16 deletions(-) > > diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c > index d99bb5c..71e017a 100644 > --- a/tcg/s390/tcg-target.c > +++ b/tcg/s390/tcg-target.c > @@ -52,12 +52,23 @@ typedef enum S390Opcode { > RIL_BRASL = 0xc005, > RIL_BRCL = 0xc004, > RIL_LARL = 0xc000, > + RIL_IIHF = 0xc008, > + RIL_IILF = 0xc009, > + RIL_LGFI = 0xc001, > + RIL_LLIHF = 0xc00e, > + RIL_LLILF = 0xc00f, > > RI_AGHI = 0xa70b, > RI_AHI = 0xa70a, > RI_BRC = 0xa704, > + RI_IIHH = 0xa500, > + RI_IIHL = 0xa501, > RI_IILH = 0xa502, > + RI_IILL = 0xa503, > RI_LGHI = 0xa709, > + RI_LLIHH = 0xa50c, > + RI_LLIHL = 0xa50d, > + RI_LLILH = 0xa50e, > RI_LLILL = 0xa50f, > > RRE_AGR = 0xb908, > @@ -382,24 +393,110 @@ static inline void tcg_out_mov(TCGContext *s, int ret, int arg) > } > > /* load a register with an immediate value */ > -static inline void tcg_out_movi(TCGContext *s, TCGType type, > - int ret, tcg_target_long arg) > +static void tcg_out_movi(TCGContext *s, TCGType type, > + TCGReg ret, tcg_target_long sval) > { > - if (arg >= -0x8000 && arg < 0x8000) { /* signed immediate load */ > - tcg_out_insn(s, RI, LGHI, ret, arg); > - } else if (!(arg & 0xffffffffffff0000UL)) { > - tcg_out_insn(s, RI, LLILL, ret, arg); > - } else if (!(arg & 0xffffffff00000000UL) || type == TCG_TYPE_I32) { > - tcg_out_insn(s, RI, LLILL, ret, arg); > - tcg_out_insn(s, RI, IILH, ret, arg >> 16); > + static const S390Opcode lli_insns[4] = { > + RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH > + }; > + > + tcg_target_ulong uval = sval; > + int i; > + > + if (type == TCG_TYPE_I32) { > + uval = (uint32_t)sval; > + sval = (int32_t)sval; > + } > + > + /* Try all 32-bit insns that can load it in one go. */ > + if (sval >= -0x8000 && sval < 0x8000) { > + tcg_out_insn(s, RI, LGHI, ret, sval); > + return; > + } > + > + for (i = 0; i < 4; i++) { > + tcg_target_long mask = 0xffffull << i*16; > + if ((uval & mask) != 0 && (uval & ~mask) == 0) { Wouldn't it be simpler to use (uval & mask) == uval ? > + tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i*16); > + return; > + } > + } > + > + /* Try all 48-bit insns that can load it in one go. */ > + if (facilities & FACILITY_EXT_IMM) { > + if (sval == (int32_t)sval) { > + tcg_out_insn(s, RIL, LGFI, ret, sval); > + return; > + } > + if (uval <= 0xffffffff) { > + tcg_out_insn(s, RIL, LLILF, ret, uval); > + return; > + } > + if ((uval & 0xffffffff) == 0) { > + tcg_out_insn(s, RIL, LLIHF, ret, uval >> 32); > + return; > + } > + } > + > + /* Try for PC-relative address load. */ > + if ((sval & 1) == 0) { > + intptr_t off = (sval - (intptr_t)s->code_ptr) >> 1; > + if (off == (int32_t)off) { > + tcg_out_insn(s, RIL, LARL, ret, off); > + return; > + } > + } Is this part used in practice? There was such a trick on the ARM backend, but it was actually never used. > + > + /* If extended immediates are not present, then we may have to issue > + several instructions to load the low 32 bits. */ > + if (!(facilities & FACILITY_EXT_IMM)) { > + /* A 32-bit unsigned value can be loaded in 2 insns. And given > + that the lli_insns loop above did not succeed, we know that > + both insns are required. */ > + if (uval <= 0xffffffff) { > + tcg_out_insn(s, RI, LLILL, ret, uval); > + tcg_out_insn(s, RI, IILH, ret, uval >> 16); > + return; > + } > + > + /* If all high bits are set, the value can be loaded in 2 or 3 insns. > + We first want to make sure that all the high bits get set. With > + luck the low 16-bits can be considered negative to perform that for > + free, otherwise we load an explicit -1. */ > + if (sval >> 32 == -1) { > + if (uval & 0x8000) { > + tcg_out_insn(s, RI, LGHI, ret, uval); > + } else { > + tcg_out_insn(s, RI, LGHI, ret, -1); > + tcg_out_insn(s, RI, IILL, ret, uval); > + } > + tcg_out_insn(s, RI, IILH, ret, uval >> 16); > + return; > + } > + } > + > + /* If we get here, both the high and low parts have non-zero bits. */ > + > + /* Recurse to load the lower 32-bits. */ > + tcg_out_movi(s, TCG_TYPE_I32, ret, sval); > + > + /* Insert data into the high 32-bits. */ > + uval >>= 32; > + if (facilities & FACILITY_EXT_IMM) { > + if (uval < 0x10000) { > + tcg_out_insn(s, RI, IIHL, ret, uval); > + } else if ((uval & 0xffff) == 0) { > + tcg_out_insn(s, RI, IIHH, ret, uval >> 16); > + } else { > + tcg_out_insn(s, RIL, IIHF, ret, uval); > + } > } else { > - /* branch over constant and store its address in R13 */ > - tcg_out_insn(s, RIL, BRASL, TCG_TMP0, (6 + 8) >> 1); > - /* 64-bit constant */ > - tcg_out32(s, arg >> 32); > - tcg_out32(s, arg); > - /* load constant to ret */ > - tcg_out_insn(s, RXY, LG, ret, TCG_TMP0, 0, 0); > + if (uval & 0xffff) { > + tcg_out_insn(s, RI, IIHL, ret, uval); > + } > + if (uval & 0xffff0000) { > + tcg_out_insn(s, RI, IIHH, ret, uval >> 16); > + } > } > } > > -- > 1.7.0.1 > > >
On 06/12/2010 05:04 AM, Aurelien Jarno wrote: >> + for (i = 0; i < 4; i++) { >> + tcg_target_long mask = 0xffffull << i*16; >> + if ((uval & mask) != 0 && (uval & ~mask) == 0) { > > Wouldn't it be simpler to use (uval & mask) == uval ? Doh. >> + /* Try for PC-relative address load. */ >> + if ((sval & 1) == 0) { >> + intptr_t off = (sval - (intptr_t)s->code_ptr) >> 1; >> + if (off == (int32_t)off) { >> + tcg_out_insn(s, RIL, LARL, ret, off); >> + return; >> + } >> + } > > Is this part used in practice? There was such a trick on the ARM > backend, but it was actually never used. Yes. The difference here is we have a +- 4GB displacement. This is primarily used when the extended-immediate facility is not present; we can generate all even 32-bit constants from LARL, given the placement of the code_gen_buffer. r~
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c index d99bb5c..71e017a 100644 --- a/tcg/s390/tcg-target.c +++ b/tcg/s390/tcg-target.c @@ -52,12 +52,23 @@ typedef enum S390Opcode { RIL_BRASL = 0xc005, RIL_BRCL = 0xc004, RIL_LARL = 0xc000, + RIL_IIHF = 0xc008, + RIL_IILF = 0xc009, + RIL_LGFI = 0xc001, + RIL_LLIHF = 0xc00e, + RIL_LLILF = 0xc00f, RI_AGHI = 0xa70b, RI_AHI = 0xa70a, RI_BRC = 0xa704, + RI_IIHH = 0xa500, + RI_IIHL = 0xa501, RI_IILH = 0xa502, + RI_IILL = 0xa503, RI_LGHI = 0xa709, + RI_LLIHH = 0xa50c, + RI_LLIHL = 0xa50d, + RI_LLILH = 0xa50e, RI_LLILL = 0xa50f, RRE_AGR = 0xb908, @@ -382,24 +393,110 @@ static inline void tcg_out_mov(TCGContext *s, int ret, int arg) } /* load a register with an immediate value */ -static inline void tcg_out_movi(TCGContext *s, TCGType type, - int ret, tcg_target_long arg) +static void tcg_out_movi(TCGContext *s, TCGType type, + TCGReg ret, tcg_target_long sval) { - if (arg >= -0x8000 && arg < 0x8000) { /* signed immediate load */ - tcg_out_insn(s, RI, LGHI, ret, arg); - } else if (!(arg & 0xffffffffffff0000UL)) { - tcg_out_insn(s, RI, LLILL, ret, arg); - } else if (!(arg & 0xffffffff00000000UL) || type == TCG_TYPE_I32) { - tcg_out_insn(s, RI, LLILL, ret, arg); - tcg_out_insn(s, RI, IILH, ret, arg >> 16); + static const S390Opcode lli_insns[4] = { + RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH + }; + + tcg_target_ulong uval = sval; + int i; + + if (type == TCG_TYPE_I32) { + uval = (uint32_t)sval; + sval = (int32_t)sval; + } + + /* Try all 32-bit insns that can load it in one go. */ + if (sval >= -0x8000 && sval < 0x8000) { + tcg_out_insn(s, RI, LGHI, ret, sval); + return; + } + + for (i = 0; i < 4; i++) { + tcg_target_long mask = 0xffffull << i*16; + if ((uval & mask) != 0 && (uval & ~mask) == 0) { + tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i*16); + return; + } + } + + /* Try all 48-bit insns that can load it in one go. */ + if (facilities & FACILITY_EXT_IMM) { + if (sval == (int32_t)sval) { + tcg_out_insn(s, RIL, LGFI, ret, sval); + return; + } + if (uval <= 0xffffffff) { + tcg_out_insn(s, RIL, LLILF, ret, uval); + return; + } + if ((uval & 0xffffffff) == 0) { + tcg_out_insn(s, RIL, LLIHF, ret, uval >> 32); + return; + } + } + + /* Try for PC-relative address load. */ + if ((sval & 1) == 0) { + intptr_t off = (sval - (intptr_t)s->code_ptr) >> 1; + if (off == (int32_t)off) { + tcg_out_insn(s, RIL, LARL, ret, off); + return; + } + } + + /* If extended immediates are not present, then we may have to issue + several instructions to load the low 32 bits. */ + if (!(facilities & FACILITY_EXT_IMM)) { + /* A 32-bit unsigned value can be loaded in 2 insns. And given + that the lli_insns loop above did not succeed, we know that + both insns are required. */ + if (uval <= 0xffffffff) { + tcg_out_insn(s, RI, LLILL, ret, uval); + tcg_out_insn(s, RI, IILH, ret, uval >> 16); + return; + } + + /* If all high bits are set, the value can be loaded in 2 or 3 insns. + We first want to make sure that all the high bits get set. With + luck the low 16-bits can be considered negative to perform that for + free, otherwise we load an explicit -1. */ + if (sval >> 32 == -1) { + if (uval & 0x8000) { + tcg_out_insn(s, RI, LGHI, ret, uval); + } else { + tcg_out_insn(s, RI, LGHI, ret, -1); + tcg_out_insn(s, RI, IILL, ret, uval); + } + tcg_out_insn(s, RI, IILH, ret, uval >> 16); + return; + } + } + + /* If we get here, both the high and low parts have non-zero bits. */ + + /* Recurse to load the lower 32-bits. */ + tcg_out_movi(s, TCG_TYPE_I32, ret, sval); + + /* Insert data into the high 32-bits. */ + uval >>= 32; + if (facilities & FACILITY_EXT_IMM) { + if (uval < 0x10000) { + tcg_out_insn(s, RI, IIHL, ret, uval); + } else if ((uval & 0xffff) == 0) { + tcg_out_insn(s, RI, IIHH, ret, uval >> 16); + } else { + tcg_out_insn(s, RIL, IIHF, ret, uval); + } } else { - /* branch over constant and store its address in R13 */ - tcg_out_insn(s, RIL, BRASL, TCG_TMP0, (6 + 8) >> 1); - /* 64-bit constant */ - tcg_out32(s, arg >> 32); - tcg_out32(s, arg); - /* load constant to ret */ - tcg_out_insn(s, RXY, LG, ret, TCG_TMP0, 0, 0); + if (uval & 0xffff) { + tcg_out_insn(s, RI, IIHL, ret, uval); + } + if (uval & 0xffff0000) { + tcg_out_insn(s, RI, IIHH, ret, uval >> 16); + } } }
Make better use of the LOAD HALFWORD IMMEDIATE, LOAD IMMEDIATE, and INSERT IMMEDIATE instruction groups. Signed-off-by: Richard Henderson <rth@twiddle.net> --- tcg/s390/tcg-target.c | 129 +++++++++++++++++++++++++++++++++++++++++++------ 1 files changed, 113 insertions(+), 16 deletions(-)