diff mbox series

[v2] powerpc/pseries/eeh: Fix pseries_eeh_err_inject

Message ID 20240823151158.92602-1-nnmlinux@linux.ibm.com (mailing list archive)
State Superseded, archived
Headers show
Series [v2] powerpc/pseries/eeh: Fix pseries_eeh_err_inject | expand

Checks

Context Check Description
snowpatch_ozlabs/github-powerpc_selftests success Successfully ran 8 jobs.
snowpatch_ozlabs/github-powerpc_ppctests success Successfully ran 8 jobs.
snowpatch_ozlabs/github-powerpc_kernel_qemu success Successfully ran 21 jobs.
snowpatch_ozlabs/github-powerpc_clang success Successfully ran 5 jobs.
snowpatch_ozlabs/github-powerpc_sparse success Successfully ran 4 jobs.

Commit Message

Narayana Murty N Aug. 23, 2024, 3:11 p.m. UTC
VFIO_EEH_PE_INJECT_ERR ioctl is currently failing on pseries
due to missing implementation of err_inject eeh_ops for pseries.
This patch implements pseries_eeh_err_inject in eeh_ops/pseries
eeh_ops. Implements support for injecting MMIO load/store error
for testing from user space.

The check on PCI error type code is moved to platform code, since
the eeh_pe_inject_err can be allowed to more error types depending
on platform requirement.

Signed-off-by: Narayana Murty N <nnmlinux@linux.ibm.com>
---

Testing:
========
vfio-test [1] by Alex Willamson, was forked and updated to add
support inject error on pSeries guest and used to test this
patch[2].

References:
===========
[1] https://github.com/awilliam/tests
[2] https://github.com/nnmwebmin/vfio-ppc-tests/tree/vfio-ppc-ex

================
Changelog:
V1:https://lore.kernel.org/all/20240822082713.529982-1-nnmlinux@linux.ibm.com/
- Resolved build issues for ppc64|le_defconfig by moving the
pseries_eeh_err_inject() definition outside of the CONFIG_PCI_IOV
code block.
- New eeh_pe_inject_mmio_error wrapper function added to avoid
CONFIG_EEH is not set.
---
 arch/powerpc/include/asm/eeh.h               |  6 ++-
 arch/powerpc/kernel/eeh.c                    |  9 +++--
 arch/powerpc/platforms/pseries/eeh_pseries.c | 39 +++++++++++++++++++-
 3 files changed, 48 insertions(+), 6 deletions(-)

Comments

Vaibhav Jain Aug. 27, 2024, 5:03 a.m. UTC | #1
Hi Narayana,

Thanks for the patch.

Narayana Murty N <nnmlinux@linux.ibm.com> writes:

> VFIO_EEH_PE_INJECT_ERR ioctl is currently failing on pseries
> due to missing implementation of err_inject eeh_ops for pseries.
> This patch implements pseries_eeh_err_inject in eeh_ops/pseries
> eeh_ops. Implements support for injecting MMIO load/store error
> for testing from user space.
>
> The check on PCI error type code is moved to platform code, since
> the eeh_pe_inject_err can be allowed to more error types depending
> on platform requirement.
>
> Signed-off-by: Narayana Murty N <nnmlinux@linux.ibm.com>

Code changes LGTM, hence
Reviewed-by: Vaibhav Jain <vaibhav@linux.ibm.com>

> ---
>
> Testing:
> ========
> vfio-test [1] by Alex Willamson, was forked and updated to add
> support inject error on pSeries guest and used to test this
> patch[2].
>
> References:
> ===========
> [1] https://github.com/awilliam/tests
> [2] https://github.com/nnmwebmin/vfio-ppc-tests/tree/vfio-ppc-ex
>
> ================
> Changelog:
> V1:https://lore.kernel.org/all/20240822082713.529982-1-nnmlinux@linux.ibm.com/
> - Resolved build issues for ppc64|le_defconfig by moving the
> pseries_eeh_err_inject() definition outside of the CONFIG_PCI_IOV
> code block.
> - New eeh_pe_inject_mmio_error wrapper function added to avoid
> CONFIG_EEH is not set.
> ---
>  arch/powerpc/include/asm/eeh.h               |  6 ++-
>  arch/powerpc/kernel/eeh.c                    |  9 +++--
>  arch/powerpc/platforms/pseries/eeh_pseries.c | 39 +++++++++++++++++++-
>  3 files changed, 48 insertions(+), 6 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
> index 91a9fd53254f..8da6b047a4fe 100644
> --- a/arch/powerpc/include/asm/eeh.h
> +++ b/arch/powerpc/include/asm/eeh.h
> @@ -308,7 +308,7 @@ int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed);
>  int eeh_pe_configure(struct eeh_pe *pe);
>  int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
>  		      unsigned long addr, unsigned long mask);
> -
> +int eeh_pe_inject_mmio_error(struct pci_dev *pdev);
>  /**
>   * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
>   *
> @@ -338,6 +338,10 @@ static inline int eeh_check_failure(const volatile void __iomem *token)
>  	return 0;
>  }
>  
> +static inline int eeh_pe_inject_mmio_error(struct pci_dev *pdev)
> +{
> +	return -ENXIO;
> +}
>  #define eeh_dev_check_failure(x) (0)
>  
>  static inline void eeh_addr_cache_init(void) { }
> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
> index d03f17987fca..49ab11a287a3 100644
> --- a/arch/powerpc/kernel/eeh.c
> +++ b/arch/powerpc/kernel/eeh.c
> @@ -1537,10 +1537,6 @@ int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
>  	if (!eeh_ops || !eeh_ops->err_inject)
>  		return -ENOENT;
>  
> -	/* Check on PCI error type */
> -	if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64)
> -		return -EINVAL;
> -
>  	/* Check on PCI error function */
>  	if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX)
>  		return -EINVAL;
> @@ -1851,6 +1847,11 @@ static const struct file_operations eeh_dev_break_fops = {
>  	.read   = eeh_debugfs_dev_usage,
>  };
>  
> +int eeh_pe_inject_mmio_error(struct pci_dev *pdev)
> +{
> +	return eeh_debugfs_break_device(pdev);
> +}
> +
>  static ssize_t eeh_dev_can_recover(struct file *filp,
>  				   const char __user *user_buf,
>  				   size_t count, loff_t *ppos)
> diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
> index b1ae0c0d1187..1893f66371fa 100644
> --- a/arch/powerpc/platforms/pseries/eeh_pseries.c
> +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
> @@ -784,6 +784,43 @@ static int pseries_notify_resume(struct eeh_dev *edev)
>  }
>  #endif
>  
> +/**
> + * pseries_eeh_err_inject - Inject specified error to the indicated PE
> + * @pe: the indicated PE
> + * @type: error type
> + * @func: specific error type
> + * @addr: address
> + * @mask: address mask
> + * The routine is called to inject specified error, which is
> + * determined by @type and @func, to the indicated PE
> + */
> +static int pseries_eeh_err_inject(struct eeh_pe *pe, int type, int func,
> +				  unsigned long addr, unsigned long mask)
> +{
> +	struct	eeh_dev	*pdev;
> +
> +	/* Check on PCI error type */
> +	if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64)
> +		return -EINVAL;
> +
> +	switch (func) {
> +	case EEH_ERR_FUNC_LD_MEM_ADDR:
> +	case EEH_ERR_FUNC_LD_MEM_DATA:
> +	case EEH_ERR_FUNC_ST_MEM_ADDR:
> +	case EEH_ERR_FUNC_ST_MEM_DATA:
> +		/* injects a MMIO error for all pdev's belonging to PE */
> +		pci_lock_rescan_remove();
> +		list_for_each_entry(pdev, &pe->edevs, entry)
> +			eeh_pe_inject_mmio_error(pdev->pdev);
> +		pci_unlock_rescan_remove();
> +		break;
> +	default:
> +		return -ERANGE;
> +	}
> +
> +	return 0;
> +}
> +
>  static struct eeh_ops pseries_eeh_ops = {
>  	.name			= "pseries",
>  	.probe			= pseries_eeh_probe,
> @@ -792,7 +829,7 @@ static struct eeh_ops pseries_eeh_ops = {
>  	.reset			= pseries_eeh_reset,
>  	.get_log		= pseries_eeh_get_log,
>  	.configure_bridge       = pseries_eeh_configure_bridge,
> -	.err_inject		= NULL,
> +	.err_inject		= pseries_eeh_err_inject,
>  	.read_config		= pseries_eeh_read_config,
>  	.write_config		= pseries_eeh_write_config,
>  	.next_error		= NULL,
> -- 
> 2.45.2
>
Mahesh J Salgaonkar Sept. 4, 2024, 9:18 a.m. UTC | #2
On 2024-08-23 10:11:58 Fri, Narayana Murty N wrote:
> VFIO_EEH_PE_INJECT_ERR ioctl is currently failing on pseries
> due to missing implementation of err_inject eeh_ops for pseries.
> This patch implements pseries_eeh_err_inject in eeh_ops/pseries
> eeh_ops. Implements support for injecting MMIO load/store error
> for testing from user space.
> 
> The check on PCI error type code is moved to platform code, since
> the eeh_pe_inject_err can be allowed to more error types depending
> on platform requirement.
> 
> Signed-off-by: Narayana Murty N <nnmlinux@linux.ibm.com>
> ---
> 
> Testing:
> ========
> vfio-test [1] by Alex Willamson, was forked and updated to add
> support inject error on pSeries guest and used to test this
> patch[2].
> 
> References:
> ===========
> [1] https://github.com/awilliam/tests
> [2] https://github.com/nnmwebmin/vfio-ppc-tests/tree/vfio-ppc-ex
> 
> ================
> Changelog:
> V1:https://lore.kernel.org/all/20240822082713.529982-1-nnmlinux@linux.ibm.com/
> - Resolved build issues for ppc64|le_defconfig by moving the
> pseries_eeh_err_inject() definition outside of the CONFIG_PCI_IOV
> code block.
> - New eeh_pe_inject_mmio_error wrapper function added to avoid
> CONFIG_EEH is not set.
> ---
>  arch/powerpc/include/asm/eeh.h               |  6 ++-
>  arch/powerpc/kernel/eeh.c                    |  9 +++--
>  arch/powerpc/platforms/pseries/eeh_pseries.c | 39 +++++++++++++++++++-
>  3 files changed, 48 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
> index 91a9fd53254f..8da6b047a4fe 100644
> --- a/arch/powerpc/include/asm/eeh.h
> +++ b/arch/powerpc/include/asm/eeh.h
> @@ -308,7 +308,7 @@ int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed);
>  int eeh_pe_configure(struct eeh_pe *pe);
>  int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
>  		      unsigned long addr, unsigned long mask);
> -
> +int eeh_pe_inject_mmio_error(struct pci_dev *pdev);
>  /**
>   * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
>   *
> @@ -338,6 +338,10 @@ static inline int eeh_check_failure(const volatile void __iomem *token)
>  	return 0;
>  }
>  
> +static inline int eeh_pe_inject_mmio_error(struct pci_dev *pdev)
> +{
> +	return -ENXIO;
> +}
>  #define eeh_dev_check_failure(x) (0)
>  
>  static inline void eeh_addr_cache_init(void) { }
> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
> index d03f17987fca..49ab11a287a3 100644
> --- a/arch/powerpc/kernel/eeh.c
> +++ b/arch/powerpc/kernel/eeh.c
> @@ -1537,10 +1537,6 @@ int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
>  	if (!eeh_ops || !eeh_ops->err_inject)
>  		return -ENOENT;
>  
> -	/* Check on PCI error type */
> -	if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64)
> -		return -EINVAL;
> -
>  	/* Check on PCI error function */
>  	if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX)
>  		return -EINVAL;
> @@ -1851,6 +1847,11 @@ static const struct file_operations eeh_dev_break_fops = {
>  	.read   = eeh_debugfs_dev_usage,
>  };
>  
> +int eeh_pe_inject_mmio_error(struct pci_dev *pdev)
> +{
> +	return eeh_debugfs_break_device(pdev);
> +}
> +
>  static ssize_t eeh_dev_can_recover(struct file *filp,
>  				   const char __user *user_buf,
>  				   size_t count, loff_t *ppos)
> diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
> index b1ae0c0d1187..1893f66371fa 100644
> --- a/arch/powerpc/platforms/pseries/eeh_pseries.c
> +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
> @@ -784,6 +784,43 @@ static int pseries_notify_resume(struct eeh_dev *edev)
>  }
>  #endif
>  
> +/**
> + * pseries_eeh_err_inject - Inject specified error to the indicated PE
> + * @pe: the indicated PE
> + * @type: error type
> + * @func: specific error type
> + * @addr: address
> + * @mask: address mask
> + * The routine is called to inject specified error, which is
> + * determined by @type and @func, to the indicated PE
> + */
> +static int pseries_eeh_err_inject(struct eeh_pe *pe, int type, int func,
> +				  unsigned long addr, unsigned long mask)
> +{
> +	struct	eeh_dev	*pdev;
> +
> +	/* Check on PCI error type */
> +	if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64)
> +		return -EINVAL;
> +
> +	switch (func) {
> +	case EEH_ERR_FUNC_LD_MEM_ADDR:
> +	case EEH_ERR_FUNC_LD_MEM_DATA:
> +	case EEH_ERR_FUNC_ST_MEM_ADDR:
> +	case EEH_ERR_FUNC_ST_MEM_DATA:
> +		/* injects a MMIO error for all pdev's belonging to PE */
> +		pci_lock_rescan_remove();
> +		list_for_each_entry(pdev, &pe->edevs, entry)
> +			eeh_pe_inject_mmio_error(pdev->pdev);
> +		pci_unlock_rescan_remove();


Changes looks good to me.

Reviewed-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>

I have tested this with your VFIO chenges at https://github.com/nnmwebmin/vfio-ppc-tests/tree/vfio-ppc-ex

Tested-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>

Thanks,
-Mahesh.
Michael Ellerman Sept. 5, 2024, 1:03 p.m. UTC | #3
Narayana Murty N <nnmlinux@linux.ibm.com> writes:
> VFIO_EEH_PE_INJECT_ERR ioctl is currently failing on pseries
> due to missing implementation of err_inject eeh_ops for pseries.
> This patch implements pseries_eeh_err_inject in eeh_ops/pseries
> eeh_ops. Implements support for injecting MMIO load/store error
> for testing from user space.
>
> The check on PCI error type code is moved to platform code, since
> the eeh_pe_inject_err can be allowed to more error types depending
> on platform requirement.
>
> Signed-off-by: Narayana Murty N <nnmlinux@linux.ibm.com>
> ---
>
> Testing:
> ========
> vfio-test [1] by Alex Willamson, was forked and updated to add
> support inject error on pSeries guest and used to test this
> patch[2].
>
> References:
> ===========
> [1] https://github.com/awilliam/tests
> [2] https://github.com/nnmwebmin/vfio-ppc-tests/tree/vfio-ppc-ex
>
> ================
> Changelog:
> V1:https://lore.kernel.org/all/20240822082713.529982-1-nnmlinux@linux.ibm.com/
> - Resolved build issues for ppc64|le_defconfig by moving the
> pseries_eeh_err_inject() definition outside of the CONFIG_PCI_IOV
> code block.
> - New eeh_pe_inject_mmio_error wrapper function added to avoid
> CONFIG_EEH is not set.
 
I don't see why that's necessary?

It's only called from eeh_pseries.c, which is only built for
PPC_PSERIES, and when PPC_PSERIES=y, EEH is always enabled.

> diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
> index 91a9fd53254f..8da6b047a4fe 100644
> --- a/arch/powerpc/include/asm/eeh.h
> +++ b/arch/powerpc/include/asm/eeh.h
> @@ -308,7 +308,7 @@ int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed);
>  int eeh_pe_configure(struct eeh_pe *pe);
>  int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
>  		      unsigned long addr, unsigned long mask);
> -
> +int eeh_pe_inject_mmio_error(struct pci_dev *pdev);
>  /**
>   * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
>   *
> @@ -338,6 +338,10 @@ static inline int eeh_check_failure(const volatile void __iomem *token)
>  	return 0;
>  }
>  
> +static inline int eeh_pe_inject_mmio_error(struct pci_dev *pdev)
> +{
> +	return -ENXIO;
> +}
>  #define eeh_dev_check_failure(x) (0)
>  
>  static inline void eeh_addr_cache_init(void) { }
> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
> index d03f17987fca..49ab11a287a3 100644
> --- a/arch/powerpc/kernel/eeh.c
> +++ b/arch/powerpc/kernel/eeh.c
> @@ -1537,10 +1537,6 @@ int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
>  	if (!eeh_ops || !eeh_ops->err_inject)
>  		return -ENOENT;
>  
> -	/* Check on PCI error type */
> -	if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64)
> -		return -EINVAL;
> -
 
The change log should mention why it's OK to remove these checks. You
add the same checks in pseries_eeh_err_inject(), but what about
pnv_eeh_err_inject() ?

It is OK AFAICS, because pnv_eeh_err_inject() already contains
equivalent checks, but you should spell that out.

cheers
Narayana Murty N Sept. 9, 2024, 2:04 p.m. UTC | #4
On 05/09/24 6:33 PM, Michael Ellerman wrote:
> Narayana Murty N <nnmlinux@linux.ibm.com> writes:
>> VFIO_EEH_PE_INJECT_ERR ioctl is currently failing on pseries
>> due to missing implementation of err_inject eeh_ops for pseries.
>> This patch implements pseries_eeh_err_inject in eeh_ops/pseries
>> eeh_ops. Implements support for injecting MMIO load/store error
>> for testing from user space.
>>
>> The check on PCI error type code is moved to platform code, since
>> the eeh_pe_inject_err can be allowed to more error types depending
>> on platform requirement.
>>
>> Signed-off-by: Narayana Murty N <nnmlinux@linux.ibm.com>
>> ---
>>
>> Testing:
>> ========
>> vfio-test [1] by Alex Willamson, was forked and updated to add
>> support inject error on pSeries guest and used to test this
>> patch[2].
>>
>> References:
>> ===========
>> [1] https://github.com/awilliam/tests
>> [2] https://github.com/nnmwebmin/vfio-ppc-tests/tree/vfio-ppc-ex
>>
>> ================
>> Changelog:
>> V1:https://lore.kernel.org/all/20240822082713.529982-1-nnmlinux@linux.ibm.com/
>> - Resolved build issues for ppc64|le_defconfig by moving the
>> pseries_eeh_err_inject() definition outside of the CONFIG_PCI_IOV
>> code block.
>> - New eeh_pe_inject_mmio_error wrapper function added to avoid
>> CONFIG_EEH is not set.
>   
> I don't see why that's necessary?
>
> It's only called from eeh_pseries.c, which is only built for
> PPC_PSERIES, and when PPC_PSERIES=y, EEH is always enabled.
>
>> diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
>> index 91a9fd53254f..8da6b047a4fe 100644
>> --- a/arch/powerpc/include/asm/eeh.h
>> +++ b/arch/powerpc/include/asm/eeh.h
>> @@ -308,7 +308,7 @@ int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed);
>>   int eeh_pe_configure(struct eeh_pe *pe);
>>   int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
>>   		      unsigned long addr, unsigned long mask);
>> -
>> +int eeh_pe_inject_mmio_error(struct pci_dev *pdev);
>>   /**
>>    * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
>>    *
>> @@ -338,6 +338,10 @@ static inline int eeh_check_failure(const volatile void __iomem *token)
>>   	return 0;
>>   }
>>   
>> +static inline int eeh_pe_inject_mmio_error(struct pci_dev *pdev)
>> +{
>> +	return -ENXIO;
>> +}
>>   #define eeh_dev_check_failure(x) (0)
>>   
>>   static inline void eeh_addr_cache_init(void) { }
>> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
>> index d03f17987fca..49ab11a287a3 100644
>> --- a/arch/powerpc/kernel/eeh.c
>> +++ b/arch/powerpc/kernel/eeh.c
>> @@ -1537,10 +1537,6 @@ int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
>>   	if (!eeh_ops || !eeh_ops->err_inject)
>>   		return -ENOENT;
>>   
>> -	/* Check on PCI error type */
>> -	if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64)
>> -		return -EINVAL;
>> -
>   
> The change log should mention why it's OK to remove these checks. You
> add the same checks in pseries_eeh_err_inject(), but what about
> pnv_eeh_err_inject() ?
>
> It is OK AFAICS, because pnv_eeh_err_inject() already contains
> equivalent checks, but you should spell that out.
>
> cheers

yes mpe. I do agree, your comments are addressed in V3 posted

here 
https://lore.kernel.org/all/20240909140220.529333-1-nnmlinux@linux.ibm.com/

regards,

Narayana Murty.
Michael Ellerman Sept. 10, 2024, 7:22 a.m. UTC | #5
Narayana Murty N <nnmlinux@linux.ibm.com> writes:
> On 05/09/24 6:33 PM, Michael Ellerman wrote:
>> Narayana Murty N <nnmlinux@linux.ibm.com> writes:
>>> VFIO_EEH_PE_INJECT_ERR ioctl is currently failing on pseries
>>> due to missing implementation of err_inject eeh_ops for pseries.
>>> This patch implements pseries_eeh_err_inject in eeh_ops/pseries
>>> eeh_ops. Implements support for injecting MMIO load/store error
>>> for testing from user space.
>>>
>>> The check on PCI error type code is moved to platform code, since
>>> the eeh_pe_inject_err can be allowed to more error types depending
>>> on platform requirement.
>>>
>>> Signed-off-by: Narayana Murty N <nnmlinux@linux.ibm.com>
>>> ---
>>>
>>> Testing:
>>> ========
>>> vfio-test [1] by Alex Willamson, was forked and updated to add
>>> support inject error on pSeries guest and used to test this
>>> patch[2].
>>>
>>> References:
>>> ===========
>>> [1] https://github.com/awilliam/tests
>>> [2] https://github.com/nnmwebmin/vfio-ppc-tests/tree/vfio-ppc-ex
>>>
>>> ================
>>> Changelog:
>>> V1:https://lore.kernel.org/all/20240822082713.529982-1-nnmlinux@linux.ibm.com/
>>> - Resolved build issues for ppc64|le_defconfig by moving the
>>> pseries_eeh_err_inject() definition outside of the CONFIG_PCI_IOV
>>> code block.
>>> - New eeh_pe_inject_mmio_error wrapper function added to avoid
>>> CONFIG_EEH is not set.
>>   
>> I don't see why that's necessary?
>>
>> It's only called from eeh_pseries.c, which is only built for
>> PPC_PSERIES, and when PPC_PSERIES=y, EEH is always enabled.
>>
>>> diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
>>> index 91a9fd53254f..8da6b047a4fe 100644
>>> --- a/arch/powerpc/include/asm/eeh.h
>>> +++ b/arch/powerpc/include/asm/eeh.h
>>> @@ -308,7 +308,7 @@ int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed);
>>>   int eeh_pe_configure(struct eeh_pe *pe);
>>>   int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
>>>   		      unsigned long addr, unsigned long mask);
>>> -
>>> +int eeh_pe_inject_mmio_error(struct pci_dev *pdev);
>>>   /**
>>>    * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
>>>    *
>>> @@ -338,6 +338,10 @@ static inline int eeh_check_failure(const volatile void __iomem *token)
>>>   	return 0;
>>>   }
>>>   
>>> +static inline int eeh_pe_inject_mmio_error(struct pci_dev *pdev)
>>> +{
>>> +	return -ENXIO;
>>> +}
>>>   #define eeh_dev_check_failure(x) (0)
>>>   
>>>   static inline void eeh_addr_cache_init(void) { }
>>> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
>>> index d03f17987fca..49ab11a287a3 100644
>>> --- a/arch/powerpc/kernel/eeh.c
>>> +++ b/arch/powerpc/kernel/eeh.c
>>> @@ -1537,10 +1537,6 @@ int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
>>>   	if (!eeh_ops || !eeh_ops->err_inject)
>>>   		return -ENOENT;
>>>   
>>> -	/* Check on PCI error type */
>>> -	if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64)
>>> -		return -EINVAL;
>>> -
>>   
>> The change log should mention why it's OK to remove these checks. You
>> add the same checks in pseries_eeh_err_inject(), but what about
>> pnv_eeh_err_inject() ?
>>
>> It is OK AFAICS, because pnv_eeh_err_inject() already contains
>> equivalent checks, but you should spell that out.
>>
>> cheers
>
> yes mpe. I do agree, your comments are addressed in V3 posted
>
> here 
> https://lore.kernel.org/all/20240909140220.529333-1-nnmlinux@linux.ibm.com/

Thanks.

cheers
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 91a9fd53254f..8da6b047a4fe 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -308,7 +308,7 @@  int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed);
 int eeh_pe_configure(struct eeh_pe *pe);
 int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
 		      unsigned long addr, unsigned long mask);
-
+int eeh_pe_inject_mmio_error(struct pci_dev *pdev);
 /**
  * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
  *
@@ -338,6 +338,10 @@  static inline int eeh_check_failure(const volatile void __iomem *token)
 	return 0;
 }
 
+static inline int eeh_pe_inject_mmio_error(struct pci_dev *pdev)
+{
+	return -ENXIO;
+}
 #define eeh_dev_check_failure(x) (0)
 
 static inline void eeh_addr_cache_init(void) { }
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index d03f17987fca..49ab11a287a3 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1537,10 +1537,6 @@  int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
 	if (!eeh_ops || !eeh_ops->err_inject)
 		return -ENOENT;
 
-	/* Check on PCI error type */
-	if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64)
-		return -EINVAL;
-
 	/* Check on PCI error function */
 	if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX)
 		return -EINVAL;
@@ -1851,6 +1847,11 @@  static const struct file_operations eeh_dev_break_fops = {
 	.read   = eeh_debugfs_dev_usage,
 };
 
+int eeh_pe_inject_mmio_error(struct pci_dev *pdev)
+{
+	return eeh_debugfs_break_device(pdev);
+}
+
 static ssize_t eeh_dev_can_recover(struct file *filp,
 				   const char __user *user_buf,
 				   size_t count, loff_t *ppos)
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index b1ae0c0d1187..1893f66371fa 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -784,6 +784,43 @@  static int pseries_notify_resume(struct eeh_dev *edev)
 }
 #endif
 
+/**
+ * pseries_eeh_err_inject - Inject specified error to the indicated PE
+ * @pe: the indicated PE
+ * @type: error type
+ * @func: specific error type
+ * @addr: address
+ * @mask: address mask
+ * The routine is called to inject specified error, which is
+ * determined by @type and @func, to the indicated PE
+ */
+static int pseries_eeh_err_inject(struct eeh_pe *pe, int type, int func,
+				  unsigned long addr, unsigned long mask)
+{
+	struct	eeh_dev	*pdev;
+
+	/* Check on PCI error type */
+	if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64)
+		return -EINVAL;
+
+	switch (func) {
+	case EEH_ERR_FUNC_LD_MEM_ADDR:
+	case EEH_ERR_FUNC_LD_MEM_DATA:
+	case EEH_ERR_FUNC_ST_MEM_ADDR:
+	case EEH_ERR_FUNC_ST_MEM_DATA:
+		/* injects a MMIO error for all pdev's belonging to PE */
+		pci_lock_rescan_remove();
+		list_for_each_entry(pdev, &pe->edevs, entry)
+			eeh_pe_inject_mmio_error(pdev->pdev);
+		pci_unlock_rescan_remove();
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	return 0;
+}
+
 static struct eeh_ops pseries_eeh_ops = {
 	.name			= "pseries",
 	.probe			= pseries_eeh_probe,
@@ -792,7 +829,7 @@  static struct eeh_ops pseries_eeh_ops = {
 	.reset			= pseries_eeh_reset,
 	.get_log		= pseries_eeh_get_log,
 	.configure_bridge       = pseries_eeh_configure_bridge,
-	.err_inject		= NULL,
+	.err_inject		= pseries_eeh_err_inject,
 	.read_config		= pseries_eeh_read_config,
 	.write_config		= pseries_eeh_write_config,
 	.next_error		= NULL,