diff mbox

[V2,2/4] nvlink: Add fence mode emulation for NPUs

Message ID 1453096782-8398-2-git-send-email-ruscur@russell.cc
State Accepted
Headers show

Commit Message

Russell Currey Jan. 18, 2016, 5:59 a.m. UTC
As NPUs are emulated PCI devices, they do not get physically fenced as real
PCI devices do.  As such, when the device is in a state that it should be
fenced, we need to emulate this behaviour by returning all 1s in config
space reads.

This will be utilised by error injection in subsequent patches.

Signed-off-by: Russell Currey <ruscur@russell.cc>
---
V2: Clarify fence comment and move freeze status change to this patch
---
 hw/npu.c      | 15 +++++++++++++--
 include/npu.h |  1 +
 2 files changed, 14 insertions(+), 2 deletions(-)

Comments

Alistair Popple Jan. 20, 2016, 4:36 a.m. UTC | #1
I like the clarification, thanks.

Acked-By: Alistair Popple <alistair@popple.id.au>

On Mon, 18 Jan 2016 16:59:40 Russell Currey wrote:
> As NPUs are emulated PCI devices, they do not get physically fenced as real
> PCI devices do.  As such, when the device is in a state that it should be
> fenced, we need to emulate this behaviour by returning all 1s in config
> space reads.
> 
> This will be utilised by error injection in subsequent patches.
> 
> Signed-off-by: Russell Currey <ruscur@russell.cc>
> ---
> V2: Clarify fence comment and move freeze status change to this patch
> ---
>  hw/npu.c      | 15 +++++++++++++--
>  include/npu.h |  1 +
>  2 files changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/npu.c b/hw/npu.c
> index 3a06c04..3c2c0b8 100644
> --- a/hw/npu.c
> +++ b/hw/npu.c
> @@ -390,6 +390,10 @@ static int64_t _npu_dev_cfg_read(struct phb *phb, uint32_t bdfn,
>  	/* Data returned upon errors */
>  	*data = 0xffffffff;
>  
> +	/* If fenced, we want to return all 1s, so we're done. */
> +	if (p->fenced)
> +		return OPAL_SUCCESS;
> +
>  	/* Retrieve NPU device */
>  	dev = npu_dev_cfg_check(p, bdfn, offset, size);
>  	if (!dev)
> @@ -996,7 +1000,7 @@ static int64_t npu_freset(struct phb *phb __unused)
>  	return OPAL_SUCCESS;
>  }
>  
> -static int64_t npu_freeze_status(struct phb *phb __unused,
> +static int64_t npu_freeze_status(struct phb *phb,
>  				     uint64_t pe_number __unused,
>  				     uint8_t *freeze_state,
>  				     uint16_t *pci_error_type __unused,
> @@ -1008,7 +1012,11 @@ static int64_t npu_freeze_status(struct phb *phb __unused,
>  	 * introduce another PHB callback to translate it. For now,
>  	 * it keeps the skiboot PCI enumeration going.
>  	 */
> -	*freeze_state = OPAL_EEH_STOPPED_NOT_FROZEN;
> +	struct npu *p = phb_to_npu(phb);
> +	if (p->fenced)
> +		*freeze_state = OPAL_EEH_STOPPED_MMIO_DMA_FREEZE;
> +	else
> +		*freeze_state = OPAL_EEH_STOPPED_NOT_FROZEN;
>  	return OPAL_SUCCESS;
>  }
>  
> @@ -1680,6 +1688,9 @@ static void npu_create_phb(struct dt_node *dn)
>  	p->xscom_base = dt_prop_get_u32(dn, "ibm,xscom-base");
>  	p->total_devices = links;
>  
> +	/* TODO: When hardware fences are implemented, detect them here */
> +	p->fenced = false;
> +
>  	/* This is the AT base */
>  	p->at_xscom = p->xscom_base + NPU_AT_SCOM_OFFSET;
>  	p->at_regs = (void *)dt_get_address(dn, 0, NULL);
> diff --git a/include/npu.h b/include/npu.h
> index 795b704..5d5135b 100644
> --- a/include/npu.h
> +++ b/include/npu.h
> @@ -167,6 +167,7 @@ struct npu {
>  	uint64_t		tve_cache[NPU_NUM_OF_PES];
>  
>  	bool			tx_zcal_complete[2];
> +	bool			fenced;
>  
>  	struct phb		phb;
>  };
>
diff mbox

Patch

diff --git a/hw/npu.c b/hw/npu.c
index 3a06c04..3c2c0b8 100644
--- a/hw/npu.c
+++ b/hw/npu.c
@@ -390,6 +390,10 @@  static int64_t _npu_dev_cfg_read(struct phb *phb, uint32_t bdfn,
 	/* Data returned upon errors */
 	*data = 0xffffffff;
 
+	/* If fenced, we want to return all 1s, so we're done. */
+	if (p->fenced)
+		return OPAL_SUCCESS;
+
 	/* Retrieve NPU device */
 	dev = npu_dev_cfg_check(p, bdfn, offset, size);
 	if (!dev)
@@ -996,7 +1000,7 @@  static int64_t npu_freset(struct phb *phb __unused)
 	return OPAL_SUCCESS;
 }
 
-static int64_t npu_freeze_status(struct phb *phb __unused,
+static int64_t npu_freeze_status(struct phb *phb,
 				     uint64_t pe_number __unused,
 				     uint8_t *freeze_state,
 				     uint16_t *pci_error_type __unused,
@@ -1008,7 +1012,11 @@  static int64_t npu_freeze_status(struct phb *phb __unused,
 	 * introduce another PHB callback to translate it. For now,
 	 * it keeps the skiboot PCI enumeration going.
 	 */
-	*freeze_state = OPAL_EEH_STOPPED_NOT_FROZEN;
+	struct npu *p = phb_to_npu(phb);
+	if (p->fenced)
+		*freeze_state = OPAL_EEH_STOPPED_MMIO_DMA_FREEZE;
+	else
+		*freeze_state = OPAL_EEH_STOPPED_NOT_FROZEN;
 	return OPAL_SUCCESS;
 }
 
@@ -1680,6 +1688,9 @@  static void npu_create_phb(struct dt_node *dn)
 	p->xscom_base = dt_prop_get_u32(dn, "ibm,xscom-base");
 	p->total_devices = links;
 
+	/* TODO: When hardware fences are implemented, detect them here */
+	p->fenced = false;
+
 	/* This is the AT base */
 	p->at_xscom = p->xscom_base + NPU_AT_SCOM_OFFSET;
 	p->at_regs = (void *)dt_get_address(dn, 0, NULL);
diff --git a/include/npu.h b/include/npu.h
index 795b704..5d5135b 100644
--- a/include/npu.h
+++ b/include/npu.h
@@ -167,6 +167,7 @@  struct npu {
 	uint64_t		tve_cache[NPU_NUM_OF_PES];
 
 	bool			tx_zcal_complete[2];
+	bool			fenced;
 
 	struct phb		phb;
 };