diff mbox

[1/5] nvlink: Use SCOMs instead of MMIO in reset procedure

Message ID 1452137792-24062-1-git-send-email-ruscur@russell.cc
State Accepted
Headers show

Commit Message

Russell Currey Jan. 7, 2016, 3:36 a.m. UTC
EEH in the kernel shuts down MMIO BARs as part of freeze recovery.
This can cause the reset procedure, which you probably want to work
during a freeze, to fail because it can't do operations with MMIO.
Refactor the MMIO operations to use SCOM instead.

Signed-off-by: Russell Currey <ruscur@russell.cc>
---
 hw/npu-hw-procedures.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

Comments

Alistair Popple Jan. 14, 2016, 3:18 a.m. UTC | #1
Thanks Russell.

Acked-By: Alistair Popple <alistair@popple.id.au>

On Thu, 7 Jan 2016 14:36:28 Russell Currey wrote:
> EEH in the kernel shuts down MMIO BARs as part of freeze recovery.
> This can cause the reset procedure, which you probably want to work
> during a freeze, to fail because it can't do operations with MMIO.
> Refactor the MMIO operations to use SCOM instead.
> 
> Signed-off-by: Russell Currey <ruscur@russell.cc>
> ---
>  hw/npu-hw-procedures.c | 16 +++++++---------
>  1 file changed, 7 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/npu-hw-procedures.c b/hw/npu-hw-procedures.c
> index 118ed6d..24f3b2c 100644
> --- a/hw/npu-hw-procedures.c
> +++ b/hw/npu-hw-procedures.c
> @@ -132,13 +132,12 @@ DEFINE_PROCEDURE(nop);
>   * incorporates AT reset. */
>  static uint32_t reset_npu_dl(struct npu_dev *npu_dev)
>  {
> -	void *ntl_base = (void *) npu_dev->bar.base;
>  	uint64_t val;
>  
>  	/* Assert NPU reset */
> -	val = in_be64(ntl_base + NTL_CONTROL);
> +	xscom_read(npu_dev->npu->chip_id, npu_dev->xscom + NX_NTL_CONTROL, &val);
>  	val |= NTL_CONTROL_RESET;
> -	out_be64(ntl_base + NTL_CONTROL, val);
> +	xscom_write(npu_dev->npu->chip_id, npu_dev->xscom + NX_NTL_CONTROL, val);
>  
>  	/* Put the Nvidia logic in reset */
>  	dl_write(npu_dev, NDL_CONTROL, 0xe8000000);
> @@ -148,14 +147,13 @@ static uint32_t reset_npu_dl(struct npu_dev *npu_dev)
>  
>  	/* Release NPU from reset */
>  	val &= ~NTL_CONTROL_RESET;
> -	out_be64(ntl_base + NTL_CONTROL, val);
> +	xscom_write(npu_dev->npu->chip_id, npu_dev->xscom + NX_NTL_CONTROL, val);
>  
>  	/* Setup up TL credits */
> -	out_be64(ntl_base + TL_CMD_CR, PPC_BIT(0));
> -	out_be64(ntl_base + TL_CMD_D_CR, PPC_BIT(0));
> -	out_be64(ntl_base + TL_RSP_CR, PPC_BIT(15));
> -	out_be64(ntl_base + TL_RSP_D_CR, PPC_BIT(15));
> -
> +	xscom_write(npu_dev->npu->chip_id, npu_dev->xscom + NX_TL_CMD_CR, PPC_BIT(0));
> +	xscom_write(npu_dev->npu->chip_id, npu_dev->xscom + NX_TL_CMD_D_CR, PPC_BIT(0));
> +	xscom_write(npu_dev->npu->chip_id, npu_dev->xscom + NX_TL_RSP_CR, PPC_BIT(15));
> +	xscom_write(npu_dev->npu->chip_id, npu_dev->xscom + NX_TL_RSP_D_CR, PPC_BIT(15));
>  	return PROCEDURE_COMPLETE;
>  }
>  DEFINE_PROCEDURE(reset_npu_dl);
>
Stewart Smith Jan. 15, 2016, 4:42 a.m. UTC | #2
Russell Currey <ruscur@russell.cc> writes:
> EEH in the kernel shuts down MMIO BARs as part of freeze recovery.
> This can cause the reset procedure, which you probably want to work
> during a freeze, to fail because it can't do operations with MMIO.
> Refactor the MMIO operations to use SCOM instead.
>
> Signed-off-by: Russell Currey <ruscur@russell.cc>
> ---
>  hw/npu-hw-procedures.c | 16 +++++++---------
>  1 file changed, 7 insertions(+), 9 deletions(-)

Thanks, merged this one as of b25529d182b53be0b760638c812c422b794546e4.

Can merge the rest of the series after Alistair acks them with any
needed changes
diff mbox

Patch

diff --git a/hw/npu-hw-procedures.c b/hw/npu-hw-procedures.c
index 118ed6d..24f3b2c 100644
--- a/hw/npu-hw-procedures.c
+++ b/hw/npu-hw-procedures.c
@@ -132,13 +132,12 @@  DEFINE_PROCEDURE(nop);
  * incorporates AT reset. */
 static uint32_t reset_npu_dl(struct npu_dev *npu_dev)
 {
-	void *ntl_base = (void *) npu_dev->bar.base;
 	uint64_t val;
 
 	/* Assert NPU reset */
-	val = in_be64(ntl_base + NTL_CONTROL);
+	xscom_read(npu_dev->npu->chip_id, npu_dev->xscom + NX_NTL_CONTROL, &val);
 	val |= NTL_CONTROL_RESET;
-	out_be64(ntl_base + NTL_CONTROL, val);
+	xscom_write(npu_dev->npu->chip_id, npu_dev->xscom + NX_NTL_CONTROL, val);
 
 	/* Put the Nvidia logic in reset */
 	dl_write(npu_dev, NDL_CONTROL, 0xe8000000);
@@ -148,14 +147,13 @@  static uint32_t reset_npu_dl(struct npu_dev *npu_dev)
 
 	/* Release NPU from reset */
 	val &= ~NTL_CONTROL_RESET;
-	out_be64(ntl_base + NTL_CONTROL, val);
+	xscom_write(npu_dev->npu->chip_id, npu_dev->xscom + NX_NTL_CONTROL, val);
 
 	/* Setup up TL credits */
-	out_be64(ntl_base + TL_CMD_CR, PPC_BIT(0));
-	out_be64(ntl_base + TL_CMD_D_CR, PPC_BIT(0));
-	out_be64(ntl_base + TL_RSP_CR, PPC_BIT(15));
-	out_be64(ntl_base + TL_RSP_D_CR, PPC_BIT(15));
-
+	xscom_write(npu_dev->npu->chip_id, npu_dev->xscom + NX_TL_CMD_CR, PPC_BIT(0));
+	xscom_write(npu_dev->npu->chip_id, npu_dev->xscom + NX_TL_CMD_D_CR, PPC_BIT(0));
+	xscom_write(npu_dev->npu->chip_id, npu_dev->xscom + NX_TL_RSP_CR, PPC_BIT(15));
+	xscom_write(npu_dev->npu->chip_id, npu_dev->xscom + NX_TL_RSP_D_CR, PPC_BIT(15));
 	return PROCEDURE_COMPLETE;
 }
 DEFINE_PROCEDURE(reset_npu_dl);