[v2,01/17] RISC-V: add vfp field in CPURISCVState

Message ID	1568183141-67641-2-git-send-email-zhiwei_liu@c-sky.com
State	New
Headers	show Return-Path: <qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org> From: liuzhiwei <zhiwei_liu@c-sky.com> To: Alistair.Francis@wdc.com, palmer@sifive.com, sagark@eecs.berkeley.edu, kbastian@mail.uni-paderborn.de, riku.voipio@iki.fi, laurent@vivier.eu, wenmeng_zhang@c-sky.com Date: Wed, 11 Sep 2019 14:25:25 +0800 Message-Id: <1568183141-67641-2-git-send-email-zhiwei_liu@c-sky.com> In-Reply-To: <1568183141-67641-1-git-send-email-zhiwei_liu@c-sky.com> References: <1568183141-67641-1-git-send-email-zhiwei_liu@c-sky.com> Subject: [Qemu-devel] [PATCH v2 01/17] RISC-V: add vfp field in CPURISCVState Precedence: list Cc: qemu-riscv@nongnu.org, qemu-devel@nongnu.org, wxy194768@alibaba-inc.com, LIU Zhiwei <zhiwei_liu@c-sky.com> Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: "Qemu-devel" <qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>
Series	RISC-V: support vector extension \| expand [v2,00/17] RISC-V: support vector extension [v2,01/17] RISC-V: add vfp field in CPURISCVState [v2,02/17] RISC-V: turn on vector extension from command line by cfg.ext_v Property [v2,03/17] RISC-V: support vector extension csr [v2,04/17] RISC-V: add vector extension configure instruction [v2,05/17] RISC-V: add vector extension load and store instructions [v2,06/17] RISC-V: add vector extension fault-only-first implementation [v2,07/17] RISC-V: add vector extension atomic instructions [v2,08/17] RISC-V: add vector extension integer instructions part1, add/sub/adc/sbc [v2,09/17] RISC-V: add vector extension integer instructions part2, bit/shift [v2,10/17] RISC-V: add vector extension integer instructions part3, cmp/min/max [v2,11/17] RISC-V: add vector extension integer instructions part4, mul/div/merge [v2,12/17] RISC-V: add vector extension fixed point instructions [v2,13/17] RISC-V: add vector extension float instruction part1, add/sub/mul/div [v2,14/17] RISC-V: add vector extension float instructions part2, sqrt/cmp/cvt/others [v2,15/17] RISC-V: add vector extension reduction instructions [v2,16/17] RISC-V: add vector extension mask instructions [v2,17/17] RISC-V: add vector extension premutation instructions

LIU Zhiwei Sept. 11, 2019, 6:25 a.m. UTC

From: LIU Zhiwei <zhiwei_liu@c-sky.com>

Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
---
 target/riscv/cpu.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

Chih-Min Chao Sept. 11, 2019, 2:51 p.m. UTC | #1

On Wed, Sep 11, 2019 at 2:35 PM liuzhiwei <zhiwei_liu@c-sky.com> wrote:

> From: LIU Zhiwei <zhiwei_liu@c-sky.com>
>
> Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
> ---
>  target/riscv/cpu.h | 28 ++++++++++++++++++++++++++++
>  1 file changed, 28 insertions(+)
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 0adb307..c992b1d 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -93,9 +93,37 @@ typedef struct CPURISCVState CPURISCVState;
>
>  #include "pmp.h"
>
> +#define VLEN 128
> +#define VUNIT(x) (VLEN / x)
> +
>  struct CPURISCVState {
>      target_ulong gpr[32];
>      uint64_t fpr[32]; /* assume both F and D extensions */
> +
> +    /* vector coprocessor state.  */
> +    struct {
> +        union VECTOR {
> +            float64  f64[VUNIT(64)];
> +            float32  f32[VUNIT(32)];
> +            float16  f16[VUNIT(16)];
> +            uint64_t u64[VUNIT(64)];
> +            int64_t  s64[VUNIT(64)];
> +            uint32_t u32[VUNIT(32)];
> +            int32_t  s32[VUNIT(32)];
> +            uint16_t u16[VUNIT(16)];
> +            int16_t  s16[VUNIT(16)];
> +            uint8_t  u8[VUNIT(8)];
> +            int8_t   s8[VUNIT(8)];
> +        } vreg[32];
> +        target_ulong vxrm;
> +        target_ulong vxsat;
> +        target_ulong vl;
> +        target_ulong vstart;
> +        target_ulong vtype;
> +        float_status fp_status;
> +    } vfp;
> +
> +    bool         foflag;
>      target_ulong pc;
>      target_ulong load_res;
>      target_ulong load_val;
> --
> 2.7.4
>
>
Could  the VLEN be configurable in cpu initialization but not fixed in
compilation phase ?
Take the integer element as example  and the difference should be the
stride of vfp.vreg[x] isn't continuous

    struct {
        union VECTOR {
            uint64_t *u64;
            uint16_t *u16;
            uint8_t  *u8;
        } vreg[32];
    } vfp;

   initialization

    int vlen = 256;  //parameter from cpu command line option
    int elem = vlen / 8;
    int size = elem * 32;

    uint8_t *mem = malloc(size)
    for (int idx = 0; idx < 32; ++idx) {
        vfp.vreg[idx].u64 = (void *)&mem[idx * elem];
        vfp.vreg[idx].u32 = (void *)&mem[idx * elem];
        vfp.vreg[idx].u16 = (void *)&mem[idx * elem];
   }

  chihmin

Richard Henderson Sept. 11, 2019, 10:32 p.m. UTC | #2

On 9/11/19 2:25 AM, liuzhiwei wrote:
>      uint64_t fpr[32]; /* assume both F and D extensions */
> +
> +    /* vector coprocessor state.  */
> +    struct {
> +        union VECTOR {
> +            float64  f64[VUNIT(64)];
> +            float32  f32[VUNIT(32)];
> +            float16  f16[VUNIT(16)];
> +            uint64_t u64[VUNIT(64)];
> +            int64_t  s64[VUNIT(64)];
> +            uint32_t u32[VUNIT(32)];
> +            int32_t  s32[VUNIT(32)];
> +            uint16_t u16[VUNIT(16)];
> +            int16_t  s16[VUNIT(16)];
> +            uint8_t  u8[VUNIT(8)];
> +            int8_t   s8[VUNIT(8)];
> +        } vreg[32];
> +        target_ulong vxrm;
> +        target_ulong vxsat;
> +        target_ulong vl;
> +        target_ulong vstart;
> +        target_ulong vtype;
> +        float_status fp_status;
> +    } vfp;

Is there a good reason why you're putting all of these into a sub-structure?
And more, a sub-structure whose name, vfp, looks like it is copied from ARM?

Why are the vxrm, vxsat, vl, vstart, vtype fields sized target_ulong?  I would
think that most could be uint32_t.  Although I suppose frm is also target_ulong
and need not be...

Why are you adding a new fp_status field?  The new vector floating point
instructions set the exact same fflags exception bits as normal fp instructions.

r~

Richard Henderson Sept. 11, 2019, 10:39 p.m. UTC | #3

On 9/11/19 10:51 AM, Chih-Min Chao wrote:
> Could  the VLEN be configurable in cpu initialization but not fixed in
> compilation phase ?
> Take the integer element as example  and the difference should be the
> stride of vfp.vreg[x] isn't continuous

Do you really want an unbounded amount of vector register storage?

>     uint8_t *mem = malloc(size)
>     for (int idx = 0; idx < 32; ++idx) {
>         vfp.vreg[idx].u64 = (void *)&mem[idx * elem];
>         vfp.vreg[idx].u32 = (void *)&mem[idx * elem];
>         vfp.vreg[idx].u16 = (void *)&mem[idx * elem];
>    }

This isn't adjusting the stride of the elements.  And in any case this would
have to be re-adjusted for every vsetvl.


r~

Chih-Min Chao Sept. 12, 2019, 2:53 p.m. UTC | #4

On Thu, Sep 12, 2019 at 6:39 AM Richard Henderson <
richard.henderson@linaro.org> wrote:

> On 9/11/19 10:51 AM, Chih-Min Chao wrote:
> > Could  the VLEN be configurable in cpu initialization but not fixed in
> > compilation phase ?
> > Take the integer element as example  and the difference should be the
> > stride of vfp.vreg[x] isn't continuous
>
> Do you really want an unbounded amount of vector register storage?


 Hi Richard,

VLEN is implementation-defined parameter and the only limitation on spec is
that it must be power of 2.
What I prefer is the value could be adjustable in runtime.

>


> >     uint8_t *mem = malloc(size)
> >     for (int idx = 0; idx < 32; ++idx) {
> >         vfp.vreg[idx].u64 = (void *)&mem[idx * elem];
> >         vfp.vreg[idx].u32 = (void *)&mem[idx * elem];
> >         vfp.vreg[idx].u16 = (void *)&mem[idx * elem];
> >    }
>
> This isn't adjusting the stride of the elements.  And in any case this
> would
> have to be re-adjusted for every vsetvl.
>
>  Not sure about the relation with vsetvl. Could you provide an example ?

Chih-Min

>
> r~
>

Richard Henderson Sept. 12, 2019, 3:06 p.m. UTC | #5

On 9/12/19 10:53 AM, Chih-Min Chao wrote:
> 
> 
> On Thu, Sep 12, 2019 at 6:39 AM Richard Henderson <richard.henderson@linaro.org
> <mailto:richard.henderson@linaro.org>> wrote:
> 
>     On 9/11/19 10:51 AM, Chih-Min Chao wrote:
>     > Could  the VLEN be configurable in cpu initialization but not fixed in
>     > compilation phase ?
>     > Take the integer element as example  and the difference should be the
>     > stride of vfp.vreg[x] isn't continuous
> 
>     Do you really want an unbounded amount of vector register storage?
> 
> 
>  Hi Richard,
> 
> VLEN is implementation-defined parameter and the only limitation on spec is
> that it must be power of 2.
> What I prefer is the value could be adjustable in runtime.

Ok, fine, I suppose.  I'll let a risc-v maintainer opine on whether there
should be some sanity check on the bounds of VLEN.  If you really do have an
unbounded vlen, you'll need to consider carefully how you want to manage migration.

>     >     uint8_t *mem = malloc(size)
>     >     for (int idx = 0; idx < 32; ++idx) {
>     >         vfp.vreg[idx].u64 = (void *)&mem[idx * elem];
>     >         vfp.vreg[idx].u32 = (void *)&mem[idx * elem];
>     >         vfp.vreg[idx].u16 = (void *)&mem[idx * elem];
>     >    }
> 
>     This isn't adjusting the stride of the elements.  And in any case this would
>     have to be re-adjusted for every vsetvl.
> 
>  Not sure about the relation with vsetvl. Could you provide an example ?

Well, I think it's merely a matter of there's no point having so many different
pointers into the block of memory that provides the backing storage.  I've
asserted elsewhere in the thread that we shouldn't have an array of 32
"registers" anyway.


r~

LIU Zhiwei Sept. 17, 2019, 8:09 a.m. UTC | #6

On 2019/9/11 下午10:51, Chih-Min Chao wrote:
>
>
> On Wed, Sep 11, 2019 at 2:35 PM liuzhiwei <zhiwei_liu@c-sky.com 
> <mailto:zhiwei_liu@c-sky.com>> wrote:
>
>     From: LIU Zhiwei <zhiwei_liu@c-sky.com <mailto:zhiwei_liu@c-sky.com>>
>
>     Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com
>     <mailto:zhiwei_liu@c-sky.com>>
>     ---
>      target/riscv/cpu.h | 28 ++++++++++++++++++++++++++++
>      1 file changed, 28 insertions(+)
>
>     diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
>     index 0adb307..c992b1d 100644
>     --- a/target/riscv/cpu.h
>     +++ b/target/riscv/cpu.h
>     @@ -93,9 +93,37 @@ typedef struct CPURISCVState CPURISCVState;
>
>      #include "pmp.h"
>
>     +#define VLEN 128
>     +#define VUNIT(x) (VLEN / x)
>     +
>      struct CPURISCVState {
>          target_ulong gpr[32];
>          uint64_t fpr[32]; /* assume both F and D extensions */
>     +
>     +    /* vector coprocessor state.  */
>     +    struct {
>     +        union VECTOR {
>     +            float64  f64[VUNIT(64)];
>     +            float32  f32[VUNIT(32)];
>     +            float16  f16[VUNIT(16)];
>     +            uint64_t u64[VUNIT(64)];
>     +            int64_t  s64[VUNIT(64)];
>     +            uint32_t u32[VUNIT(32)];
>     +            int32_t  s32[VUNIT(32)];
>     +            uint16_t u16[VUNIT(16)];
>     +            int16_t  s16[VUNIT(16)];
>     +            uint8_t  u8[VUNIT(8)];
>     +            int8_t   s8[VUNIT(8)];
>     +        } vreg[32];
>     +        target_ulong vxrm;
>     +        target_ulong vxsat;
>     +        target_ulong vl;
>     +        target_ulong vstart;
>     +        target_ulong vtype;
>     +        float_status fp_status;
>     +    } vfp;
>     +
>     +    bool         foflag;
>          target_ulong pc;
>          target_ulong load_res;
>          target_ulong load_val;
>     -- 
>     2.7.4
>
>
> Could  the VLEN be configurable in cpu initialization but not fixed in 
> compilation phase ?

Yes,  it's important that VLEN is configurable to support different 
types of cpu.

> Take the integer element as example  and the difference should be the 
> stride of vfp.vreg[x] isn't continuous
>
>     struct {
>         union VECTOR {
>             uint64_t *u64;
>             uint16_t *u16;
>             uint8_t  *u8;
>         } vreg[32];
>     } vfp;
>
>    initialization
>     int vlen = 256;  //parameter from cpu command line option
>     int elem = vlen / 8;
>     int size = elem * 32;
>
>     uint8_t *mem = malloc(size)
>     for (int idx = 0; idx < 32; ++idx) {
>         vfp.vreg[idx].u64 = (void *)&mem[idx * elem];
>         vfp.vreg[idx].u32 = (void *)&mem[idx * elem];
>         vfp.vreg[idx].u16 = (void *)&mem[idx * elem];
>    }
>
>   chihmin

It's a good idea. I will accept it.

Thanks for review.

Zhiwei

[v2,01/17] RISC-V: add vfp field in CPURISCVState

Commit Message

Comments

Patch