Message ID | 1532702862-22751-6-git-send-email-arbab@linux.ibm.com |
---|---|
State | Superseded |
Headers | show |
Series | npu2: Add support for relaxed-ordering mode | expand |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/apply_patch | success | master/apply_patch Successfully applied |
On Friday, 27 July 2018 9:47:41 AM AEST Reza Arbab wrote: > Make the code that initializes these registers more descriptive by > using macros instead of open coded literals. No functional change. But open coded literals are fun and have been sanctioned until at least 2020 so we still have heaps of time. Although at that point they will just explode and stop working :-) > Signed-off-by: Reza Arbab <arbab@linux.ibm.com> > --- > hw/npu2.c | 31 +++++++++++++------------------ > include/npu2-regs.h | 2 ++ > 2 files changed, 15 insertions(+), 18 deletions(-) > > diff --git a/hw/npu2.c b/hw/npu2.c > index acd56c1..9748536 100644 > --- a/hw/npu2.c > +++ b/hw/npu2.c > @@ -857,7 +857,8 @@ static void npu2_mcd_init(struct npu2 *p) > > static void npu2_hw_init(struct npu2 *p) > { > - uint64_t val; > + uint64_t reg, val; > + int s, b; Not sure I can deal with s & b instead of i & j in for loops, but I guess this stands for stack and brick. Reviewed-by: Alistair Popple <alistair@popple.id.au> > npu2_ioda_reset(&p->phb_nvlink, false); > > @@ -916,6 +917,17 @@ static void npu2_hw_init(struct npu2 *p) > NPU2DBG(p, "Using large memory map + MCD disabled\n"); > p->gpu_map_type = GPU_MEM_4T_DOWN; > } > + > + /* Static initialization of every relaxed-ordering cfg[2] register */ > + val = NPU2_RELAXED_ORDERING_CMD_CL_RD_NC_F0 | > + NPU2_RELAXED_ORDERING_SOURCE4_RDENA; > + > + for (s = NPU2_STACK_STCK_0; s <= NPU2_STACK_STCK_2; s++) { > + for (b = NPU2_BLOCK_SM_0; b <= NPU2_BLOCK_SM_3; b++) { > + reg = NPU2_REG_OFFSET(s, b, NPU2_RELAXED_ORDERING_CFG2); > + npu2_write(p, reg, val); > + } > + } > } > > static int64_t npu2_map_pe_dma_window_real(struct phb *phb, > @@ -1416,23 +1428,6 @@ static void npu2_probe_phb(struct dt_node *dn) > xscom_write_mask(gcid, 0x5011510, val, val); > xscom_write_mask(gcid, 0x5011530, val, val); > > - /* > - * Enable relaxed ordering for peer-to-peer reads > - */ > - val = PPC_BIT(5) | PPC_BIT(29); > - xscom_write_mask(gcid, 0x501100c, val, val); > - xscom_write_mask(gcid, 0x501103c, val, val); > - xscom_write_mask(gcid, 0x501106c, val, val); > - xscom_write_mask(gcid, 0x501109c, val, val); > - xscom_write_mask(gcid, 0x501120c, val, val); > - xscom_write_mask(gcid, 0x501123c, val, val); > - xscom_write_mask(gcid, 0x501126c, val, val); > - xscom_write_mask(gcid, 0x501129c, val, val); > - xscom_write_mask(gcid, 0x501140c, val, val); > - xscom_write_mask(gcid, 0x501143c, val, val); > - xscom_write_mask(gcid, 0x501146c, val, val); > - xscom_write_mask(gcid, 0x501149c, val, val); > - > val = PPC_BIT(6) | PPC_BIT(7) | PPC_BIT(11); > xscom_write_mask(gcid, 0x5011009, val, PPC_BITMASK(6,11)); > xscom_write_mask(gcid, 0x5011039, val, PPC_BITMASK(6,11)); > diff --git a/include/npu2-regs.h b/include/npu2-regs.h > index 4a17ac8..d9db988 100644 > --- a/include/npu2-regs.h > +++ b/include/npu2-regs.h > @@ -155,6 +155,8 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, > #define NPU2_RELAXED_ORDERING_CFG0 0x050 > #define NPU2_RELAXED_ORDERING_CFG1 0x058 > #define NPU2_RELAXED_ORDERING_CFG2 0x060 > +#define NPU2_RELAXED_ORDERING_CMD_CL_RD_NC_F0 PPC_BIT(5) > +#define NPU2_RELAXED_ORDERING_SOURCE4_RDENA PPC_BIT(29) > #define NPU2_NTL0_BAR 0x068 > #define NPU2_NTL1_BAR 0x070 > #define NPU2_NTL_BAR_ENABLE PPC_BIT(0) >
diff --git a/hw/npu2.c b/hw/npu2.c index acd56c1..9748536 100644 --- a/hw/npu2.c +++ b/hw/npu2.c @@ -857,7 +857,8 @@ static void npu2_mcd_init(struct npu2 *p) static void npu2_hw_init(struct npu2 *p) { - uint64_t val; + uint64_t reg, val; + int s, b; npu2_ioda_reset(&p->phb_nvlink, false); @@ -916,6 +917,17 @@ static void npu2_hw_init(struct npu2 *p) NPU2DBG(p, "Using large memory map + MCD disabled\n"); p->gpu_map_type = GPU_MEM_4T_DOWN; } + + /* Static initialization of every relaxed-ordering cfg[2] register */ + val = NPU2_RELAXED_ORDERING_CMD_CL_RD_NC_F0 | + NPU2_RELAXED_ORDERING_SOURCE4_RDENA; + + for (s = NPU2_STACK_STCK_0; s <= NPU2_STACK_STCK_2; s++) { + for (b = NPU2_BLOCK_SM_0; b <= NPU2_BLOCK_SM_3; b++) { + reg = NPU2_REG_OFFSET(s, b, NPU2_RELAXED_ORDERING_CFG2); + npu2_write(p, reg, val); + } + } } static int64_t npu2_map_pe_dma_window_real(struct phb *phb, @@ -1416,23 +1428,6 @@ static void npu2_probe_phb(struct dt_node *dn) xscom_write_mask(gcid, 0x5011510, val, val); xscom_write_mask(gcid, 0x5011530, val, val); - /* - * Enable relaxed ordering for peer-to-peer reads - */ - val = PPC_BIT(5) | PPC_BIT(29); - xscom_write_mask(gcid, 0x501100c, val, val); - xscom_write_mask(gcid, 0x501103c, val, val); - xscom_write_mask(gcid, 0x501106c, val, val); - xscom_write_mask(gcid, 0x501109c, val, val); - xscom_write_mask(gcid, 0x501120c, val, val); - xscom_write_mask(gcid, 0x501123c, val, val); - xscom_write_mask(gcid, 0x501126c, val, val); - xscom_write_mask(gcid, 0x501129c, val, val); - xscom_write_mask(gcid, 0x501140c, val, val); - xscom_write_mask(gcid, 0x501143c, val, val); - xscom_write_mask(gcid, 0x501146c, val, val); - xscom_write_mask(gcid, 0x501149c, val, val); - val = PPC_BIT(6) | PPC_BIT(7) | PPC_BIT(11); xscom_write_mask(gcid, 0x5011009, val, PPC_BITMASK(6,11)); xscom_write_mask(gcid, 0x5011039, val, PPC_BITMASK(6,11)); diff --git a/include/npu2-regs.h b/include/npu2-regs.h index 4a17ac8..d9db988 100644 --- a/include/npu2-regs.h +++ b/include/npu2-regs.h @@ -155,6 +155,8 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, #define NPU2_RELAXED_ORDERING_CFG0 0x050 #define NPU2_RELAXED_ORDERING_CFG1 0x058 #define NPU2_RELAXED_ORDERING_CFG2 0x060 +#define NPU2_RELAXED_ORDERING_CMD_CL_RD_NC_F0 PPC_BIT(5) +#define NPU2_RELAXED_ORDERING_SOURCE4_RDENA PPC_BIT(29) #define NPU2_NTL0_BAR 0x068 #define NPU2_NTL1_BAR 0x070 #define NPU2_NTL_BAR_ENABLE PPC_BIT(0)
Make the code that initializes these registers more descriptive by using macros instead of open coded literals. No functional change. Signed-off-by: Reza Arbab <arbab@linux.ibm.com> --- hw/npu2.c | 31 +++++++++++++------------------ include/npu2-regs.h | 2 ++ 2 files changed, 15 insertions(+), 18 deletions(-)