diff mbox series

[v9,4/6] iommu/arm-smmu-v3: Add CS_NONE quirk for CONFIG_TEGRA241_CMDQV

Message ID d20dc9939523fac490bc02e57d7836f680916a36.1718228494.git.nicolinc@nvidia.com
State Handled Elsewhere
Headers show
Series Add Tegra241 (Grace) CMDQV Support (part 1/2) | expand

Commit Message

Nicolin Chen June 12, 2024, 9:45 p.m. UTC
The CMDQV extension in NVIDIA Tegra241 SoC only supports CS_NONE in the
CS field of CMD_SYNC. Add a quirk flag to accommodate that.

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 8 +++++++-
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +++
 2 files changed, 10 insertions(+), 1 deletion(-)

Comments

Will Deacon July 2, 2024, 5:43 p.m. UTC | #1
On Wed, Jun 12, 2024 at 02:45:31PM -0700, Nicolin Chen wrote:
> The CMDQV extension in NVIDIA Tegra241 SoC only supports CS_NONE in the
> CS field of CMD_SYNC. Add a quirk flag to accommodate that.
> 
> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 8 +++++++-
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +++
>  2 files changed, 10 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index c864c634cd23..ba0e24d5ffbf 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -345,6 +345,11 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
>  		 FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) |
>  		 FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
>  
> +	if (q->quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY) {
> +		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE);
> +		return;
> +	}
> +
>  	if (!(smmu->options & ARM_SMMU_OPT_MSIPOLL)) {
>  		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
>  		return;
> @@ -690,7 +695,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
>  					 struct arm_smmu_cmdq *cmdq,
>  					 struct arm_smmu_ll_queue *llq)
>  {
> -	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
> +	if (smmu->options & ARM_SMMU_OPT_MSIPOLL &&
> +	    !(cmdq->q.quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY))
>  		return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
>  
>  	return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq);
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index 180c0b1e0658..01227c0de290 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -543,6 +543,9 @@ struct arm_smmu_queue {
>  
>  	u32 __iomem			*prod_reg;
>  	u32 __iomem			*cons_reg;
> +
> +#define CMDQ_QUIRK_SYNC_CS_NONE_ONLY	BIT(0)	/* CMD_SYNC CS field supports CS_NONE only */
> +	u32				quirks;

Please can you use the existing smmu->options field instead of adding
another place to track quirks? Or do you need this only for some of the
queues for a given SMMU device?

Thanks,

Will
Nicolin Chen July 2, 2024, 6:19 p.m. UTC | #2
Hi Will,

On Tue, Jul 02, 2024 at 06:43:07PM +0100, Will Deacon wrote:
> On Wed, Jun 12, 2024 at 02:45:31PM -0700, Nicolin Chen wrote:
> > The CMDQV extension in NVIDIA Tegra241 SoC only supports CS_NONE in the
> > CS field of CMD_SYNC. Add a quirk flag to accommodate that.
> >
> > Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
> > Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> > ---
> >  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 8 +++++++-
> >  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +++
> >  2 files changed, 10 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > index c864c634cd23..ba0e24d5ffbf 100644
> > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > @@ -345,6 +345,11 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
> >                FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) |
> >                FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
> >
> > +     if (q->quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY) {
> > +             cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE);
> > +             return;
> > +     }
> > +
> >       if (!(smmu->options & ARM_SMMU_OPT_MSIPOLL)) {
> >               cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
> >               return;
> > @@ -690,7 +695,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
> >                                        struct arm_smmu_cmdq *cmdq,
> >                                        struct arm_smmu_ll_queue *llq)
> >  {
> > -     if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
> > +     if (smmu->options & ARM_SMMU_OPT_MSIPOLL &&
> > +         !(cmdq->q.quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY))
> >               return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
> >
> >       return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq);
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > index 180c0b1e0658..01227c0de290 100644
> > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > @@ -543,6 +543,9 @@ struct arm_smmu_queue {
> >
> >       u32 __iomem                     *prod_reg;
> >       u32 __iomem                     *cons_reg;
> > +
> > +#define CMDQ_QUIRK_SYNC_CS_NONE_ONLY BIT(0)  /* CMD_SYNC CS field supports CS_NONE only */
> > +     u32                             quirks;
> 
> Please can you use the existing smmu->options field instead of adding
> another place to track quirks? Or do you need this only for some of the
> queues for a given SMMU device?

VCMDQs are extension of a regular SMMU (with its own CMDQ). So,
SMMU CMDQ still supports SIG_IRQ for the CS field, while VCMDQs
could only support SIG_NONE. In another word, this quirk is not
per SMMU but per Queue.

I can highlight this in the commit message, if that would make
it clear.

Thanks
Nicolin
Will Deacon July 2, 2024, 6:49 p.m. UTC | #3
On Tue, Jul 02, 2024 at 11:19:56AM -0700, Nicolin Chen wrote:
> Hi Will,
> 
> On Tue, Jul 02, 2024 at 06:43:07PM +0100, Will Deacon wrote:
> > On Wed, Jun 12, 2024 at 02:45:31PM -0700, Nicolin Chen wrote:
> > > The CMDQV extension in NVIDIA Tegra241 SoC only supports CS_NONE in the
> > > CS field of CMD_SYNC. Add a quirk flag to accommodate that.
> > >
> > > Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
> > > Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> > > ---
> > >  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 8 +++++++-
> > >  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +++
> > >  2 files changed, 10 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > > index c864c634cd23..ba0e24d5ffbf 100644
> > > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > > @@ -345,6 +345,11 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
> > >                FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) |
> > >                FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
> > >
> > > +     if (q->quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY) {
> > > +             cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE);
> > > +             return;
> > > +     }
> > > +
> > >       if (!(smmu->options & ARM_SMMU_OPT_MSIPOLL)) {
> > >               cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
> > >               return;
> > > @@ -690,7 +695,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
> > >                                        struct arm_smmu_cmdq *cmdq,
> > >                                        struct arm_smmu_ll_queue *llq)
> > >  {
> > > -     if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
> > > +     if (smmu->options & ARM_SMMU_OPT_MSIPOLL &&
> > > +         !(cmdq->q.quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY))
> > >               return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
> > >
> > >       return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq);
> > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > > index 180c0b1e0658..01227c0de290 100644
> > > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > > @@ -543,6 +543,9 @@ struct arm_smmu_queue {
> > >
> > >       u32 __iomem                     *prod_reg;
> > >       u32 __iomem                     *cons_reg;
> > > +
> > > +#define CMDQ_QUIRK_SYNC_CS_NONE_ONLY BIT(0)  /* CMD_SYNC CS field supports CS_NONE only */
> > > +     u32                             quirks;
> > 
> > Please can you use the existing smmu->options field instead of adding
> > another place to track quirks? Or do you need this only for some of the
> > queues for a given SMMU device?
> 
> VCMDQs are extension of a regular SMMU (with its own CMDQ). So,
> SMMU CMDQ still supports SIG_IRQ for the CS field, while VCMDQs
> could only support SIG_NONE. In another word, this quirk is not
> per SMMU but per Queue.
> 
> I can highlight this in the commit message, if that would make
> it clear.

I think we could still use smmu->options and have something like
ARM_SMMU_OPT_SECONDARY_CMDQ_CS_NONE_ONLY which could be applied
when the queue is != arm_smmu_get_cmdq(smmu).

Will
Nicolin Chen July 2, 2024, 7:47 p.m. UTC | #4
On Tue, Jul 02, 2024 at 07:49:42PM +0100, Will Deacon wrote:
> On Tue, Jul 02, 2024 at 11:19:56AM -0700, Nicolin Chen wrote:
> > Hi Will,
> >
> > On Tue, Jul 02, 2024 at 06:43:07PM +0100, Will Deacon wrote:
> > > On Wed, Jun 12, 2024 at 02:45:31PM -0700, Nicolin Chen wrote:
> > > > The CMDQV extension in NVIDIA Tegra241 SoC only supports CS_NONE in the
> > > > CS field of CMD_SYNC. Add a quirk flag to accommodate that.
> > > >
> > > > Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
> > > > Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> > > > ---
> > > >  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 8 +++++++-
> > > >  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +++
> > > >  2 files changed, 10 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > > > index c864c634cd23..ba0e24d5ffbf 100644
> > > > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > > > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > > > @@ -345,6 +345,11 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
> > > >                FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) |
> > > >                FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
> > > >
> > > > +     if (q->quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY) {
> > > > +             cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE);
> > > > +             return;
> > > > +     }
> > > > +
> > > >       if (!(smmu->options & ARM_SMMU_OPT_MSIPOLL)) {
> > > >               cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
> > > >               return;
> > > > @@ -690,7 +695,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
> > > >                                        struct arm_smmu_cmdq *cmdq,
> > > >                                        struct arm_smmu_ll_queue *llq)
> > > >  {
> > > > -     if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
> > > > +     if (smmu->options & ARM_SMMU_OPT_MSIPOLL &&
> > > > +         !(cmdq->q.quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY))
> > > >               return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
> > > >
> > > >       return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq);
> > > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > > > index 180c0b1e0658..01227c0de290 100644
> > > > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > > > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > > > @@ -543,6 +543,9 @@ struct arm_smmu_queue {
> > > >
> > > >       u32 __iomem                     *prod_reg;
> > > >       u32 __iomem                     *cons_reg;
> > > > +
> > > > +#define CMDQ_QUIRK_SYNC_CS_NONE_ONLY BIT(0)  /* CMD_SYNC CS field supports CS_NONE only */
> > > > +     u32                             quirks;
> > >
> > > Please can you use the existing smmu->options field instead of adding
> > > another place to track quirks? Or do you need this only for some of the
> > > queues for a given SMMU device?
> >
> > VCMDQs are extension of a regular SMMU (with its own CMDQ). So,
> > SMMU CMDQ still supports SIG_IRQ for the CS field, while VCMDQs
> > could only support SIG_NONE. In another word, this quirk is not
> > per SMMU but per Queue.
> >
> > I can highlight this in the commit message, if that would make
> > it clear.
> 
> I think we could still use smmu->options and have something like
> ARM_SMMU_OPT_SECONDARY_CMDQ_CS_NONE_ONLY which could be applied
> when the queue is != arm_smmu_get_cmdq(smmu).

A queue can be cmdq, ecmdq, vcmdq. Only VCMDQ has such a quirk.
So arm_smmu_get_cmdq(smmu) is unlikely going to work if we add
ECMDQ later. Also, ARM_SMMU_OPT_SECONDARY_CMDQ_CS_NONE_ONLY is
very ambiguous IMHO. What we need is to check clearly if VCMDQ
is being used, so that leaves us an alternative:
--------------------------------------------------------------
enum arm_smmu_cmdq_type {
	ARM_SMMU_CMDQ,
	ARM_SMMU_ECMDQ,
	TEGRA241_VCMDQ,
};

@@ -543,6 +543,9 @@ struct arm_smmu_queue {

 	u32 __iomem			*prod_reg;
 	u32 __iomem			*cons_reg;
+
+	enum arm_smmu_queue_type	type;
 };

 struct arm_smmu_queue_poll {
@@ -345,6 +345,11 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
 		FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) |
 		FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
 
+	if (cmdq->type == TEGRA241_VCMDQ) {
+		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE);
+		return;
+	}
+
	if (!(smmu->options & ARM_SMMU_OPT_MSIPOLL)) {
		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
		return;
@@ -690,7 +695,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
 					struct arm_smmu_cmdq *cmdq,
 					struct arm_smmu_ll_queue *llq)
 {
-	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
+	if (smmu->options & ARM_SMMU_OPT_MSIPOLL &&
+	    cmdq->type != TEGRA241_VCMDQ) {
 		return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
 
--------------------------------------------------------------

Would you prefer this one? I feel CMDQ_QUIRK_SYNC_CS_NONE_ONLY
is more general looking though..

Thanks
Nicolin
Nicolin Chen July 2, 2024, 8:10 p.m. UTC | #5
On Tue, Jul 02, 2024 at 12:47:14PM -0700, Nicolin Chen wrote:
> @@ -345,6 +345,11 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
>  		FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) |
>  		FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
>  
> +	if (cmdq->type == TEGRA241_VCMDQ) {
> +		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE);
> +		return;
> +	}
> +
> 	if (!(smmu->options & ARM_SMMU_OPT_MSIPOLL)) {
> 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
> 		return;
> @@ -690,7 +695,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
>  					struct arm_smmu_cmdq *cmdq,
>  					struct arm_smmu_ll_queue *llq)
>  {
> -	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
> +	if (smmu->options & ARM_SMMU_OPT_MSIPOLL &&
> +	    cmdq->type != TEGRA241_VCMDQ) {
>  		return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
>  
> --------------------------------------------------------------
> 
> Would you prefer this one? I feel CMDQ_QUIRK_SYNC_CS_NONE_ONLY
> is more general looking though..

And we would need some additional lines of comments for the two
pieces above, explaining why TEGRA241_VCMDQ type needs the first
one while bypasses the second one. Again, it feels even worse :(

Thanks
Nicolin
Will Deacon July 5, 2024, 3:27 p.m. UTC | #6
On Tue, Jul 02, 2024 at 01:10:19PM -0700, Nicolin Chen wrote:
> On Tue, Jul 02, 2024 at 12:47:14PM -0700, Nicolin Chen wrote:
> > @@ -345,6 +345,11 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
> >  		FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) |
> >  		FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
> >  
> > +	if (cmdq->type == TEGRA241_VCMDQ) {
> > +		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE);
> > +		return;
> > +	}
> > +
> > 	if (!(smmu->options & ARM_SMMU_OPT_MSIPOLL)) {
> > 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
> > 		return;
> > @@ -690,7 +695,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
> >  					struct arm_smmu_cmdq *cmdq,
> >  					struct arm_smmu_ll_queue *llq)
> >  {
> > -	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
> > +	if (smmu->options & ARM_SMMU_OPT_MSIPOLL &&
> > +	    cmdq->type != TEGRA241_VCMDQ) {
> >  		return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
> >  
> > --------------------------------------------------------------
> > 
> > Would you prefer this one? I feel CMDQ_QUIRK_SYNC_CS_NONE_ONLY
> > is more general looking though..
> 
> And we would need some additional lines of comments for the two
> pieces above, explaining why TEGRA241_VCMDQ type needs the first
> one while bypasses the second one. Again, it feels even worse :(

I hacked the code around a bit this afternoon. Please can you see if:

https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git/log/?h=for-nicolin/grace-vcmdq-wip

does roughly what you need?

Will
Nicolin Chen July 5, 2024, 6:10 p.m. UTC | #7
Hi Will,

On Fri, Jul 05, 2024 at 04:27:21PM +0100, Will Deacon wrote:
> On Tue, Jul 02, 2024 at 01:10:19PM -0700, Nicolin Chen wrote:
> > On Tue, Jul 02, 2024 at 12:47:14PM -0700, Nicolin Chen wrote:
> > > @@ -345,6 +345,11 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
> > >             FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) |
> > >             FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
> > >
> > > +   if (cmdq->type == TEGRA241_VCMDQ) {
> > > +           cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE);
> > > +           return;
> > > +   }
> > > +
> > >     if (!(smmu->options & ARM_SMMU_OPT_MSIPOLL)) {
> > >             cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
> > >             return;
> > > @@ -690,7 +695,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
> > >                                     struct arm_smmu_cmdq *cmdq,
> > >                                     struct arm_smmu_ll_queue *llq)
> > >  {
> > > -   if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
> > > +   if (smmu->options & ARM_SMMU_OPT_MSIPOLL &&
> > > +       cmdq->type != TEGRA241_VCMDQ) {
> > >             return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
> > >
> > > --------------------------------------------------------------
> > >
> > > Would you prefer this one? I feel CMDQ_QUIRK_SYNC_CS_NONE_ONLY
> > > is more general looking though..
> >
> > And we would need some additional lines of comments for the two
> > pieces above, explaining why TEGRA241_VCMDQ type needs the first
> > one while bypasses the second one. Again, it feels even worse :(
> 
> I hacked the code around a bit this afternoon. Please can you see if:
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git/log/?h=for-nicolin/grace-vcmdq-wip
> 
> does roughly what you need?

I appreciate the patch. Yet, we cannot use IORT's model field.
This would need to go through IORT documentation, for A. And B,
we had a very long discussion with ARM (Robin was there) years
ago, and concluded that this CMDQV would not be a model in IORT
but a DSDT node as an extension. So, this is firm...

With that, we cannot avoid an unconditional hard-coding tegra
function call even if we switch to an impl design:

+static int acpi_smmu_impl_init(u32 model, struct arm_smmu_device *smmu)
+{
+	/*
+	 * unconditional go through ACPI table to detect if there is a tegra241
+	 * implementation that extends SMMU with a CMDQV. The probe() will fill
+	 * the smmu->impl pointer upon success. Otherwise, fall back to regular
+	 * SMMU CMDQ.
+	 */
+	tegra241_impl_acpi_probe(smmu);
+	return 0;
+}

As for arm_smmu_cmdq_needs_busy_polling, it doesn't really look
very optimal to me. But if you insist on having an smmu option,
we still have to take in the PATCH-3 in this series, enforcing
an arm_smmu_cmdq_build_sync_cmd() call in the IRQ handler too.
So, it would eventually look like [attachment].

Thanks!
Nicolin
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 3e2eb88535de..e57ea8d39c98 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -352,15 +352,26 @@ arm_smmu_get_cmdq(struct arm_smmu_device *smmu, u8 opcode)
 	return &smmu->cmdq;
 }
 
+static bool arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device *smmu,
+					     struct arm_smmu_cmdq *cmdq)
+{
+	if (cmdq == &smmu->cmdq)
+		return false;
+
+	return smmu->options & ARM_SMMU_OPT_SECONDARY_CMDQ_CS_NONE_ONLY;
+}
+
 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
-					 struct arm_smmu_queue *q, u32 prod)
+					 struct arm_smmu_cmdq *cmdq, u32 prod)
 {
+	struct arm_smmu_queue *q = &cmdq->q;
+
 	cmd[1] = 0;
 	cmd[0] = FIELD_PREP(CMDQ_0_OP, CMDQ_OP_CMD_SYNC) |
 		 FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) |
 		 FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
 
-	if (q->quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY) {
+	if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq)) {
 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE);
 		return;
 	}
@@ -380,7 +391,7 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
 }
 
 void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
-			      struct arm_smmu_queue *q)
+			      struct arm_smmu_cmdq *cmdq)
 {
 	static const char * const cerror_str[] = {
 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
@@ -388,6 +399,7 @@ void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
 	};
+	struct arm_smmu_queue *q = &cmdq->q;
 
 	int i;
 	u64 cmd[CMDQ_ENT_DWORDS];
@@ -426,14 +438,14 @@ void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
 
 	/* Convert the erroneous command into a CMD_SYNC */
-	arm_smmu_cmdq_build_sync_cmd(cmd, smmu, q, cons);
+	arm_smmu_cmdq_build_sync_cmd(cmd, smmu, cmdq, cons);
 
 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
 }
 
 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
 {
-	__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
+	__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq);
 }
 
 /*
@@ -711,7 +723,7 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
 					 struct arm_smmu_ll_queue *llq)
 {
 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL &&
-	    !(cmdq->q.quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY))
+	    !arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
 		return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
 
 	return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq);
@@ -797,7 +809,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
 	if (sync) {
 		prod = queue_inc_prod_n(&llq, n);
-		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
+		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, cmdq, prod);
 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
 
 		/*
@@ -3985,6 +3997,8 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
 
 	smmu->tegra241_cmdqv = tegra241_cmdqv_acpi_probe(smmu, node);
+	if (smmu->tegra241_cmdqv)
+		smmu->options |= ARM_SMMU_OPT_SECONDARY_CMDQ_CS_NONE_ONLY;
 
 	return 0;
 }
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 2c1fe7e129cd..0962aa839080 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -654,10 +654,11 @@ struct arm_smmu_device {
 #define ARM_SMMU_FEAT_ATTR_TYPES_OVR	(1 << 20)
 	u32				features;
 
-#define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
-#define ARM_SMMU_OPT_PAGE0_REGS_ONLY	(1 << 1)
-#define ARM_SMMU_OPT_MSIPOLL		(1 << 2)
-#define ARM_SMMU_OPT_CMDQ_FORCE_SYNC	(1 << 3)
+#define ARM_SMMU_OPT_SKIP_PREFETCH			(1 << 0)
+#define ARM_SMMU_OPT_PAGE0_REGS_ONLY			(1 << 1)
+#define ARM_SMMU_OPT_MSIPOLL				(1 << 2)
+#define ARM_SMMU_OPT_CMDQ_FORCE_SYNC			(1 << 3)
+#define ARM_SMMU_OPT_SECONDARY_CMDQ_CS_NONE_ONLY	(1 << 4)
 	u32				options;
 
 	struct arm_smmu_cmdq		cmdq;
@@ -805,7 +806,7 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
 			    unsigned long iova, size_t size);
 
 void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
-			      struct arm_smmu_queue *q);
+			      struct arm_smmu_cmdq *cmdq);
 int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
 			    struct arm_smmu_queue *q, void __iomem *page,
 			    unsigned long prod_off, unsigned long cons_off,
diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
index bb696c66e56d..4b1de8517bec 100644
--- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
+++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
@@ -266,7 +266,7 @@ static void tegra241_vintf0_handle_error(struct tegra241_vintf *vintf)
 			u32 gerror = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR));
 
 			__arm_smmu_cmdq_skip_err(vintf->cmdqv->smmu,
-						 &vcmdq->cmdq.q);
+						 &vcmdq->cmdq);
 			writel(gerror, REG_VCMDQ_PAGE0(vcmdq, GERRORN));
 			map &= ~BIT_ULL(lidx);
 		}
Nicolin Chen July 6, 2024, 12:32 a.m. UTC | #8
On Fri, Jul 05, 2024 at 11:10:47AM -0700, Nicolin Chen wrote:
> Hi Will,
> 
> On Fri, Jul 05, 2024 at 04:27:21PM +0100, Will Deacon wrote:
> > On Tue, Jul 02, 2024 at 01:10:19PM -0700, Nicolin Chen wrote:
> > > On Tue, Jul 02, 2024 at 12:47:14PM -0700, Nicolin Chen wrote:
> > > > @@ -345,6 +345,11 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
> > > >             FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) |
> > > >             FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
> > > >
> > > > +   if (cmdq->type == TEGRA241_VCMDQ) {
> > > > +           cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE);
> > > > +           return;
> > > > +   }
> > > > +
> > > >     if (!(smmu->options & ARM_SMMU_OPT_MSIPOLL)) {
> > > >             cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
> > > >             return;
> > > > @@ -690,7 +695,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
> > > >                                     struct arm_smmu_cmdq *cmdq,
> > > >                                     struct arm_smmu_ll_queue *llq)
> > > >  {
> > > > -   if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
> > > > +   if (smmu->options & ARM_SMMU_OPT_MSIPOLL &&
> > > > +       cmdq->type != TEGRA241_VCMDQ) {
> > > >             return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
> > > >
> > > > --------------------------------------------------------------
> > > >
> > > > Would you prefer this one? I feel CMDQ_QUIRK_SYNC_CS_NONE_ONLY
> > > > is more general looking though..
> > >
> > > And we would need some additional lines of comments for the two
> > > pieces above, explaining why TEGRA241_VCMDQ type needs the first
> > > one while bypasses the second one. Again, it feels even worse :(
> > 
> > I hacked the code around a bit this afternoon. Please can you see if:
> > 
> > https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git/log/?h=for-nicolin/grace-vcmdq-wip
> > 
> > does roughly what you need?
> 
> I appreciate the patch. Yet, we cannot use IORT's model field.
> This would need to go through IORT documentation, for A. And B,
> we had a very long discussion with ARM (Robin was there) years
> ago, and concluded that this CMDQV would not be a model in IORT
> but a DSDT node as an extension. So, this is firm...
> 
> With that, we cannot avoid an unconditional hard-coding tegra
> function call even if we switch to an impl design:
> 
> +static int acpi_smmu_impl_init(u32 model, struct arm_smmu_device *smmu)
> +{
> +	/*
> +	 * unconditional go through ACPI table to detect if there is a tegra241
> +	 * implementation that extends SMMU with a CMDQV. The probe() will fill
> +	 * the smmu->impl pointer upon success. Otherwise, fall back to regular
> +	 * SMMU CMDQ.
> +	 */
> +	tegra241_impl_acpi_probe(smmu);
> +	return 0;
> +}
> 
> As for arm_smmu_cmdq_needs_busy_polling, it doesn't really look
> very optimal to me. But if you insist on having an smmu option,
> we still have to take in the PATCH-3 in this series, enforcing
> an arm_smmu_cmdq_build_sync_cmd() call in the IRQ handler too.
> So, it would eventually look like [attachment].

Please ignore the attachment. Since we are adding arm_smmu_impl,
I figure that we could add an arm_smmu_cmdq_impl too. There's an
another small feature that I didn't implement in this v9, while
being able to benefit from a cmdq impl now.

The impl can also hold a boolean busy_polling, so we won't need
a global smmu option.

I will send a new version asap, though I am not sure if we can
still make it to this cycle that we hoped for :-/

Thanks
Nicolin
Will Deacon July 8, 2024, 11:29 a.m. UTC | #9
On Fri, Jul 05, 2024 at 11:10:42AM -0700, Nicolin Chen wrote:
> On Fri, Jul 05, 2024 at 04:27:21PM +0100, Will Deacon wrote:
> > On Tue, Jul 02, 2024 at 01:10:19PM -0700, Nicolin Chen wrote:
> > > On Tue, Jul 02, 2024 at 12:47:14PM -0700, Nicolin Chen wrote:
> > > > @@ -345,6 +345,11 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
> > > >             FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) |
> > > >             FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
> > > >
> > > > +   if (cmdq->type == TEGRA241_VCMDQ) {
> > > > +           cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE);
> > > > +           return;
> > > > +   }
> > > > +
> > > >     if (!(smmu->options & ARM_SMMU_OPT_MSIPOLL)) {
> > > >             cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
> > > >             return;
> > > > @@ -690,7 +695,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
> > > >                                     struct arm_smmu_cmdq *cmdq,
> > > >                                     struct arm_smmu_ll_queue *llq)
> > > >  {
> > > > -   if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
> > > > +   if (smmu->options & ARM_SMMU_OPT_MSIPOLL &&
> > > > +       cmdq->type != TEGRA241_VCMDQ) {
> > > >             return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
> > > >
> > > > --------------------------------------------------------------
> > > >
> > > > Would you prefer this one? I feel CMDQ_QUIRK_SYNC_CS_NONE_ONLY
> > > > is more general looking though..
> > >
> > > And we would need some additional lines of comments for the two
> > > pieces above, explaining why TEGRA241_VCMDQ type needs the first
> > > one while bypasses the second one. Again, it feels even worse :(
> > 
> > I hacked the code around a bit this afternoon. Please can you see if:
> > 
> > https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git/log/?h=for-nicolin/grace-vcmdq-wip
> > 
> > does roughly what you need?
> 
> I appreciate the patch. Yet, we cannot use IORT's model field.
> This would need to go through IORT documentation, for A. And B,
> we had a very long discussion with ARM (Robin was there) years
> ago, and concluded that this CMDQV would not be a model in IORT
> but a DSDT node as an extension. So, this is firm...

Seems like a bad outcome given that you've clearly modified the IP, but
whatever. We can parse the DSDT when we detect whatever the model
actually is; I don't think that's a huge issue.

> With that, we cannot avoid an unconditional hard-coding tegra
> function call even if we switch to an impl design:
> 
> +static int acpi_smmu_impl_init(u32 model, struct arm_smmu_device *smmu)
> +{
> +	/*
> +	 * unconditional go through ACPI table to detect if there is a tegra241
> +	 * implementation that extends SMMU with a CMDQV. The probe() will fill
> +	 * the smmu->impl pointer upon success. Otherwise, fall back to regular
> +	 * SMMU CMDQ.
> +	 */
> +	tegra241_impl_acpi_probe(smmu);

In-line the minimal DSDT parsing to figure out if we're on a Tegra part.
If it's that bad, put it in a static inline in arm-smmu-v3.h.

> +	return 0;
> +}
> 
> As for arm_smmu_cmdq_needs_busy_polling, it doesn't really look
> very optimal to me.

"optimal" in what sense? In that you don't like how it smells, or that
it's measurably bad?

> But if you insist on having an smmu option, we still have to take in the
> PATCH-3 in this series, enforcing an arm_smmu_cmdq_build_sync_cmd() call
> in the IRQ handler too.  So, it would eventually look like [attachment].

With my hacks, I think you can just call arm_smmu_cmdq_build_sync_cmd()
from the irqhandler and it will work.

Will
Will Deacon July 8, 2024, 11:31 a.m. UTC | #10
On Fri, Jul 05, 2024 at 05:32:24PM -0700, Nicolin Chen wrote:
> On Fri, Jul 05, 2024 at 11:10:47AM -0700, Nicolin Chen wrote:
> > On Fri, Jul 05, 2024 at 04:27:21PM +0100, Will Deacon wrote:
> > > On Tue, Jul 02, 2024 at 01:10:19PM -0700, Nicolin Chen wrote:
> > > > On Tue, Jul 02, 2024 at 12:47:14PM -0700, Nicolin Chen wrote:
> > > > > @@ -345,6 +345,11 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
> > > > >             FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) |
> > > > >             FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
> > > > >
> > > > > +   if (cmdq->type == TEGRA241_VCMDQ) {
> > > > > +           cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE);
> > > > > +           return;
> > > > > +   }
> > > > > +
> > > > >     if (!(smmu->options & ARM_SMMU_OPT_MSIPOLL)) {
> > > > >             cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
> > > > >             return;
> > > > > @@ -690,7 +695,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
> > > > >                                     struct arm_smmu_cmdq *cmdq,
> > > > >                                     struct arm_smmu_ll_queue *llq)
> > > > >  {
> > > > > -   if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
> > > > > +   if (smmu->options & ARM_SMMU_OPT_MSIPOLL &&
> > > > > +       cmdq->type != TEGRA241_VCMDQ) {
> > > > >             return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
> > > > >
> > > > > --------------------------------------------------------------
> > > > >
> > > > > Would you prefer this one? I feel CMDQ_QUIRK_SYNC_CS_NONE_ONLY
> > > > > is more general looking though..
> > > >
> > > > And we would need some additional lines of comments for the two
> > > > pieces above, explaining why TEGRA241_VCMDQ type needs the first
> > > > one while bypasses the second one. Again, it feels even worse :(
> > > 
> > > I hacked the code around a bit this afternoon. Please can you see if:
> > > 
> > > https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git/log/?h=for-nicolin/grace-vcmdq-wip
> > > 
> > > does roughly what you need?
> > 
> > I appreciate the patch. Yet, we cannot use IORT's model field.
> > This would need to go through IORT documentation, for A. And B,
> > we had a very long discussion with ARM (Robin was there) years
> > ago, and concluded that this CMDQV would not be a model in IORT
> > but a DSDT node as an extension. So, this is firm...
> > 
> > With that, we cannot avoid an unconditional hard-coding tegra
> > function call even if we switch to an impl design:
> > 
> > +static int acpi_smmu_impl_init(u32 model, struct arm_smmu_device *smmu)
> > +{
> > +	/*
> > +	 * unconditional go through ACPI table to detect if there is a tegra241
> > +	 * implementation that extends SMMU with a CMDQV. The probe() will fill
> > +	 * the smmu->impl pointer upon success. Otherwise, fall back to regular
> > +	 * SMMU CMDQ.
> > +	 */
> > +	tegra241_impl_acpi_probe(smmu);
> > +	return 0;
> > +}
> > 
> > As for arm_smmu_cmdq_needs_busy_polling, it doesn't really look
> > very optimal to me. But if you insist on having an smmu option,
> > we still have to take in the PATCH-3 in this series, enforcing
> > an arm_smmu_cmdq_build_sync_cmd() call in the IRQ handler too.
> > So, it would eventually look like [attachment].
> 
> Please ignore the attachment. Since we are adding arm_smmu_impl,
> I figure that we could add an arm_smmu_cmdq_impl too. There's an
> another small feature that I didn't implement in this v9, while
> being able to benefit from a cmdq impl now.
> 
> The impl can also hold a boolean busy_polling, so we won't need
> a global smmu option.

So /that/ might be overkill. Architectural queues can use polling, so I
don't mind having that option in the driver and it should keep the number
of impl hooks to a minimum.

> I will send a new version asap, though I am not sure if we can
> still make it to this cycle that we hoped for :-/

I'm in fixes-only mode at this point, especially since we've not had a
linux-next for a while.

Will
Will Deacon July 8, 2024, 11:43 a.m. UTC | #11
On Mon, Jul 08, 2024 at 12:29:28PM +0100, Will Deacon wrote:
> On Fri, Jul 05, 2024 at 11:10:42AM -0700, Nicolin Chen wrote:
> > But if you insist on having an smmu option, we still have to take in the
> > PATCH-3 in this series, enforcing an arm_smmu_cmdq_build_sync_cmd() call
> > in the IRQ handler too.  So, it would eventually look like [attachment].
> 
> With my hacks, I think you can just call arm_smmu_cmdq_build_sync_cmd()
> from the irqhandler and it will work.

Hmm, actually, that will mean we end up using MSIs for the error case on
hardware which supports it, which is a strange change in behaviour.

What does your hardware do if it sees SIG_SEV in a CMD_SYNC? Is it just
a case of failing to generate the event on completion, or does it treat
it as an invalid opcode?

Will
Nicolin Chen July 8, 2024, 5:59 p.m. UTC | #12
On Mon, Jul 08, 2024 at 12:29:28PM +0100, Will Deacon wrote:
> > With that, we cannot avoid an unconditional hard-coding tegra
> > function call even if we switch to an impl design:
> >
> > +static int acpi_smmu_impl_init(u32 model, struct arm_smmu_device *smmu)
> > +{
> > +     /*
> > +      * unconditional go through ACPI table to detect if there is a tegra241
> > +      * implementation that extends SMMU with a CMDQV. The probe() will fill
> > +      * the smmu->impl pointer upon success. Otherwise, fall back to regular
> > +      * SMMU CMDQ.
> > +      */
> > +     tegra241_impl_acpi_probe(smmu);
> 
> In-line the minimal DSDT parsing to figure out if we're on a Tegra part.
> If it's that bad, put it in a static inline in arm-smmu-v3.h.

OK. How about the following?

/* arm-smmu-v3.h */
static inline void arm_smmu_impl_acpi_dsdt_probe(struct arm_smmu_device *smmu,
						 struct acpi_iort_node *node)
{
	tegra241_cmdqv_acpi_dsdt_probe(smmu, node);
}

/* arm-smmu-v3.c */
static int arm_smmu_impl_acpi_probe(struct arm_smmu_device *smmu,
				    struct acpi_iort_node *node)
{
	/*
	 * DSDT might holds some SMMU extension, so we have no option but to go
	 * through ACPI tables unconditionally. This probe function should fill
	 * the smmu->impl pointer upon success. Otherwise, just carry on with a
	 * standard SMMU.
	 */
	arm_smmu_impl_acpi_dsdt_probe(smmu, node);

	return 0;
}

> > +     return 0;
> > +}
> >
> > As for arm_smmu_cmdq_needs_busy_polling, it doesn't really look
> > very optimal to me.
> 
> "optimal" in what sense? In that you don't like how it smells, or that
> it's measurably bad?

It would potentially not work if someday an implementation has
two secondary queues? I got your point of making it an option
just like the existing ARM_SMMU_OPT_MSIPOLL though..

Thanks
Nicolin
Nicolin Chen July 8, 2024, 6:02 p.m. UTC | #13
On Mon, Jul 08, 2024 at 12:31:15PM +0100, Will Deacon wrote:
> > > As for arm_smmu_cmdq_needs_busy_polling, it doesn't really look
> > > very optimal to me. But if you insist on having an smmu option,
> > > we still have to take in the PATCH-3 in this series, enforcing
> > > an arm_smmu_cmdq_build_sync_cmd() call in the IRQ handler too.
> > > So, it would eventually look like [attachment].
> >
> > Please ignore the attachment. Since we are adding arm_smmu_impl,
> > I figure that we could add an arm_smmu_cmdq_impl too. There's an
> > another small feature that I didn't implement in this v9, while
> > being able to benefit from a cmdq impl now.
> >
> > The impl can also hold a boolean busy_polling, so we won't need
> > a global smmu option.
> 
> So /that/ might be overkill. Architectural queues can use polling, so I
> don't mind having that option in the driver and it should keep the number
> of impl hooks to a minimum.

OK. Let's make an option as you suggested.

> > I will send a new version asap, though I am not sure if we can
> > still make it to this cycle that we hoped for :-/
> 
> I'm in fixes-only mode at this point, especially since we've not had a
> linux-next for a while.

Sad that we missed again. Thanks for letting me know that..

Nicolin
Nicolin Chen July 8, 2024, 6:05 p.m. UTC | #14
On Mon, Jul 08, 2024 at 12:43:26PM +0100, Will Deacon wrote:
> External email: Use caution opening links or attachments
> 
> 
> On Mon, Jul 08, 2024 at 12:29:28PM +0100, Will Deacon wrote:
> > On Fri, Jul 05, 2024 at 11:10:42AM -0700, Nicolin Chen wrote:
> > > But if you insist on having an smmu option, we still have to take in the
> > > PATCH-3 in this series, enforcing an arm_smmu_cmdq_build_sync_cmd() call
> > > in the IRQ handler too.  So, it would eventually look like [attachment].
> >
> > With my hacks, I think you can just call arm_smmu_cmdq_build_sync_cmd()
> > from the irqhandler and it will work.
> 
> Hmm, actually, that will mean we end up using MSIs for the error case on
> hardware which supports it, which is a strange change in behaviour.

Yes. I highlighted the smae in the commit log of PATCH-3:
   iommu/arm-smmu-v3: Enforce arm_smmu_cmdq_build_sync_cmd

Do you foresee some potential risk of doing that?

> What does your hardware do if it sees SIG_SEV in a CMD_SYNC? Is it just
> a case of failing to generate the event on completion, or does it treat
> it as an invalid opcode?

That would be an invalid opcode.

Thanks
Nicolin
Nicolin Chen July 9, 2024, 6:29 p.m. UTC | #15
Hi Will,

On Mon, Jul 08, 2024 at 11:00:00AM -0700, Nicolin Chen wrote:
> On Mon, Jul 08, 2024 at 12:29:28PM +0100, Will Deacon wrote:
> > > With that, we cannot avoid an unconditional hard-coding tegra
> > > function call even if we switch to an impl design:
> > >
> > > +static int acpi_smmu_impl_init(u32 model, struct arm_smmu_device *smmu)
> > > +{
> > > +     /*
> > > +      * unconditional go through ACPI table to detect if there is a tegra241
> > > +      * implementation that extends SMMU with a CMDQV. The probe() will fill
> > > +      * the smmu->impl pointer upon success. Otherwise, fall back to regular
> > > +      * SMMU CMDQ.
> > > +      */
> > > +     tegra241_impl_acpi_probe(smmu);
> > 
> > In-line the minimal DSDT parsing to figure out if we're on a Tegra part.
> > If it's that bad, put it in a static inline in arm-smmu-v3.h.
> 
> OK. How about the following?
> 
> /* arm-smmu-v3.h */
> static inline void arm_smmu_impl_acpi_dsdt_probe(struct arm_smmu_device *smmu,
> 						 struct acpi_iort_node *node)
> {
> 	tegra241_cmdqv_acpi_dsdt_probe(smmu, node);
> }
> 
> /* arm-smmu-v3.c */
> static int arm_smmu_impl_acpi_probe(struct arm_smmu_device *smmu,
> 				    struct acpi_iort_node *node)
> {
> 	/*
> 	 * DSDT might holds some SMMU extension, so we have no option but to go
> 	 * through ACPI tables unconditionally. This probe function should fill
> 	 * the smmu->impl pointer upon success. Otherwise, just carry on with a
> 	 * standard SMMU.
> 	 */
> 	arm_smmu_impl_acpi_dsdt_probe(smmu, node);
> 
> 	return 0;
> }

I have reworked my series and it looks like:
------------------------------------------------------------- 
@ -627,9 +630,35 @@ struct arm_smmu_strtab_cfg {
        u32                             strtab_base_cfg;
 };
 
+struct arm_smmu_impl {
+       int (*device_reset)(struct arm_smmu_device *smmu);
+       void (*device_remove)(struct arm_smmu_device *smmu);
+       struct arm_smmu_cmdq *(*get_secondary_cmdq)(struct arm_smmu_device *smmu,
+                              u8 opcode);
+};
+
+#ifdef CONFIG_TEGRA241_CMDQV
+struct arm_smmu_device *
+tegra241_cmdqv_acpi_dsdt_probe(struct arm_smmu_device *smmu,
+                              struct acpi_iort_node *node);
+#endif
+
+static inline struct arm_smmu_device *
+arm_smmu_impl_acpi_dsdt_probe(struct arm_smmu_device *smmu,
+                             struct acpi_iort_node *node)
+{
+#ifdef CONFIG_TEGRA241_CMDQV
+       smmu = tegra241_cmdqv_acpi_dsdt_probe(smmu, node);
+#endif
+       return smmu;
+}
+
 /* An SMMUv3 instance */
 struct arm_smmu_device {
        struct device                   *dev;
+       /* An SMMUv3 implementation */
+       const struct arm_smmu_impl      *impl;
+
        void __iomem                    *base;
        void __iomem                    *page1;
------------------------------------------------------------- 

One thing that I want to confirm is about the smmu pointer.
I implemented in the way that SMMUv2 driver does, i.e. the
passed-in SMMU pointer gets devm_realloc() to &cmdev->smmu.
Is it something you would prefer?

Thanks
Nicolin
diff mbox series

Patch

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index c864c634cd23..ba0e24d5ffbf 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -345,6 +345,11 @@  static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
 		 FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) |
 		 FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
 
+	if (q->quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY) {
+		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE);
+		return;
+	}
+
 	if (!(smmu->options & ARM_SMMU_OPT_MSIPOLL)) {
 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
 		return;
@@ -690,7 +695,8 @@  static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
 					 struct arm_smmu_cmdq *cmdq,
 					 struct arm_smmu_ll_queue *llq)
 {
-	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
+	if (smmu->options & ARM_SMMU_OPT_MSIPOLL &&
+	    !(cmdq->q.quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY))
 		return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
 
 	return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 180c0b1e0658..01227c0de290 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -543,6 +543,9 @@  struct arm_smmu_queue {
 
 	u32 __iomem			*prod_reg;
 	u32 __iomem			*cons_reg;
+
+#define CMDQ_QUIRK_SYNC_CS_NONE_ONLY	BIT(0)	/* CMD_SYNC CS field supports CS_NONE only */
+	u32				quirks;
 };
 
 struct arm_smmu_queue_poll {